diff --git a/.gitignore b/.gitignore
index 4c86068b7ee3024416094613d2f8f8d74ce89921..964bfa4e48ee8e7c9387339d5775a3df90c63eb4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -70,10 +70,17 @@ build
 cmake-build-debug
 cmake-build-release
 
-
 #ios demo
 demo/ios/PaddleMobileDemo/PaddleMobileDemo/googlenet_combine/
 demo/ios/PaddleMobileDemo/PaddleMobileDemo/*.jpg
 demo/ios/PaddleMobileDemo/PaddleMobileDemo/PaddleMobile/*.a
 *.xcuserstate
 /tools/quantification/quantify
+
+# metal
+Podfile.lock
+metal/Pods/
+SwiftProtobuf.framework
+paddle-mobile.xcworkspace
+metal/models/
+metal/images/
diff --git a/README.md b/README.md
index 8db78ff27542c04d3e89fcd27ba26385c8f44da1..fb4daf3bde4658223cff6e2ebdad55d78412f339 100644
--- a/README.md
+++ b/README.md
@@ -26,16 +26,6 @@ Paddle-Moible是PaddlePaddle组织下的项目，是一个致力于嵌入式平
 
 - **ARM CPU**
 
-|mobilenet arm v7|1线程|2线程|4线程|
-|------------|----|-----|-----|
-|麒麟960(ms)|110.586|70.897|47.474|
-|||||
-|mobilenetssd arm v7|1线程|2线程|4线程|
-|麒麟960(ms)|222.124|138.952|90.856|
-|||||
-|googlenet(v1) arm v7|1线程|2线程|4线程|
-|麒麟960(ms)|348.018|240.304|169.998|
-
     arm cpu是paddle-mobile的主要支持方向，cpu的通用性一直是其优势。嵌入式深度学习，需要大量的cpu汇编实现。我们正在紧锣密鼓的编码，为的是能充分硬件的每一点加速能力。
     arm cpu的优化工作还在进行中，现在使用了常规的cpu优化。在arm a73上paddle-mobile arm-v7现在单核运行一次mobilenet1.0是110+ms，显然这不是我们的最终目标，我们正在用大量的汇编改写，后续性能仍会有巨大提升空间, 目前只支持armv7, 未来我们也会支持armv8。
     
diff --git a/metal/Podfile b/metal/Podfile
new file mode 100644
index 0000000000000000000000000000000000000000..6e9a6c6e3713ceaafc8d1769d7ec731ecc78b615
--- /dev/null
+++ b/metal/Podfile
@@ -0,0 +1,19 @@
+platform :ios, ‘9.0’
+use_frameworks!
+
+workspace 'paddle-mobile.xcworkspace'
+
+target 'paddle-mobile-demo' do
+	project 'paddle-mobile-demo/paddle-mobile-demo.xcodeproj'
+    pod 'SwiftProtobuf', '~> 1.0'
+end
+
+target 'paddle-mobile' do
+	project 'paddle-mobile/paddle-mobile.xcodeproj'
+	pod 'SwiftProtobuf', '~> 1.0'
+end
+
+target 'paddle-mobile-unit-test' do
+    project 'paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj'
+    pod 'SwiftProtobuf', '~> 1.0'
+end
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj
new file mode 100644
index 0000000000000000000000000000000000000000..f3ab9fc66a072cd5b0bbba56ae99258f04be3612
--- /dev/null
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj
@@ -0,0 +1,496 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 50;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		30D0ED21F392CFA3885B1002 /* Pods_paddle_mobile_demo.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */; };
+		FC013928210204A3008100E3 /* PreProcessKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC013927210204A3008100E3 /* PreProcessKernel.metal */; };
+		FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B8120E11C550081E9F8 /* AppDelegate.swift */; };
+		FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B8320E11C550081E9F8 /* ViewController.swift */; };
+		FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8520E11C550081E9F8 /* Main.storyboard */; };
+		FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8820E11C560081E9F8 /* Assets.xcassets */; };
+		FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */; };
+		FC3602C82108580600FACB58 /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC3602C72108580600FACB58 /* MetalHelper.swift */; };
+		FC918191211DBC3500B6F354 /* paddle-mobile.png in Resources */ = {isa = PBXBuildFile; fileRef = FC918190211DBC3500B6F354 /* paddle-mobile.png */; };
+		FC918193211DC70500B6F354 /* iphone.JPG in Resources */ = {isa = PBXBuildFile; fileRef = FC918192211DC70500B6F354 /* iphone.JPG */; };
+		FCD04E6320F3146B0007374F /* params in Resources */ = {isa = PBXBuildFile; fileRef = FCD04E6120F3146A0007374F /* params */; };
+		FCD04E6420F3146B0007374F /* model in Resources */ = {isa = PBXBuildFile; fileRef = FCD04E6220F3146A0007374F /* model */; };
+		FCDFD3FB211D72C3005AB38B /* ModelHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDFD3FA211D72C3005AB38B /* ModelHelper.swift */; };
+		FCDFD41B211D91C7005AB38B /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FCDFD41A211D91C7005AB38B /* synset.txt */; };
+		FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; };
+		FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
+		FCEEE7D4210627A000444BEC /* banana.jpeg in Resources */ = {isa = PBXBuildFile; fileRef = FCEEE7D3210627A000444BEC /* banana.jpeg */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+		FCEBEC2E20E1392000C0B14D /* Embed Frameworks */ = {
+			isa = PBXCopyFilesBuildPhase;
+			buildActionMask = 2147483647;
+			dstPath = "";
+			dstSubfolderSpec = 10;
+			files = (
+				FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */,
+			);
+			name = "Embed Frameworks";
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+		081C9CF10DB06C58B8B6B039 /* Pods-paddle-mobile-demo.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile-demo.release.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo.release.xcconfig"; sourceTree = "<group>"; };
+		18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_paddle_mobile_demo.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		878829884E1A14D7044721D5 /* Pods-paddle-mobile-demo.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile-demo.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo.debug.xcconfig"; sourceTree = "<group>"; };
+		FC013927210204A3008100E3 /* PreProcessKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreProcessKernel.metal; sourceTree = "<group>"; };
+		FC039B7E20E11C550081E9F8 /* paddle-mobile-demo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "paddle-mobile-demo.app"; sourceTree = BUILT_PRODUCTS_DIR; };
+		FC039B8120E11C550081E9F8 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
+		FC039B8320E11C550081E9F8 /* ViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = "<group>"; };
+		FC039B8620E11C550081E9F8 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
+		FC039B8820E11C560081E9F8 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
+		FC039B8B20E11C560081E9F8 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
+		FC039B8D20E11C560081E9F8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+		FC3602C72108580600FACB58 /* MetalHelper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = MetalHelper.swift; path = "../../paddle-mobile-unit-test/paddle-mobile-unit-test/MetalHelper.swift"; sourceTree = "<group>"; };
+		FC918190211DBC3500B6F354 /* paddle-mobile.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; path = "paddle-mobile.png"; sourceTree = "<group>"; };
+		FC918192211DC70500B6F354 /* iphone.JPG */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = iphone.JPG; sourceTree = "<group>"; };
+		FCD04E6120F3146A0007374F /* params */ = {isa = PBXFileReference; lastKnownFileType = file; path = params; sourceTree = "<group>"; };
+		FCD04E6220F3146A0007374F /* model */ = {isa = PBXFileReference; lastKnownFileType = file; path = model; sourceTree = "<group>"; };
+		FCDFD3FA211D72C3005AB38B /* ModelHelper.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelHelper.swift; sourceTree = "<group>"; };
+		FCDFD41A211D91C7005AB38B /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = "<group>"; };
+		FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		FCEEE7D3210627A000444BEC /* banana.jpeg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = banana.jpeg; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		FC039B7B20E11C550081E9F8 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */,
+				30D0ED21F392CFA3885B1002 /* Pods_paddle_mobile_demo.framework in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		5722B50FEC38F55CA9B6A57B /* Pods */ = {
+			isa = PBXGroup;
+			children = (
+				878829884E1A14D7044721D5 /* Pods-paddle-mobile-demo.debug.xcconfig */,
+				081C9CF10DB06C58B8B6B039 /* Pods-paddle-mobile-demo.release.xcconfig */,
+			);
+			name = Pods;
+			sourceTree = "<group>";
+		};
+		7B7DED984E9EE7BFB45E24E8 /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+				18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */,
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
+		FC039B7520E11C550081E9F8 = {
+			isa = PBXGroup;
+			children = (
+				FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */,
+				FC039B8020E11C550081E9F8 /* paddle-mobile-demo */,
+				FC039B7F20E11C550081E9F8 /* Products */,
+				5722B50FEC38F55CA9B6A57B /* Pods */,
+				7B7DED984E9EE7BFB45E24E8 /* Frameworks */,
+			);
+			sourceTree = "<group>";
+		};
+		FC039B7F20E11C550081E9F8 /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				FC039B7E20E11C550081E9F8 /* paddle-mobile-demo.app */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		FC039B8020E11C550081E9F8 /* paddle-mobile-demo */ = {
+			isa = PBXGroup;
+			children = (
+				FC0E2C2020EDC03B009C1FAC /* models */,
+				FC0E2C1D20EDC030009C1FAC /* images */,
+				FC039B8120E11C550081E9F8 /* AppDelegate.swift */,
+				FC013927210204A3008100E3 /* PreProcessKernel.metal */,
+				FC039B8320E11C550081E9F8 /* ViewController.swift */,
+				FC039B8520E11C550081E9F8 /* Main.storyboard */,
+				FC039B8820E11C560081E9F8 /* Assets.xcassets */,
+				FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */,
+				FC039B8D20E11C560081E9F8 /* Info.plist */,
+				FC3602C72108580600FACB58 /* MetalHelper.swift */,
+				FCDFD3FA211D72C3005AB38B /* ModelHelper.swift */,
+			);
+			path = "paddle-mobile-demo";
+			sourceTree = "<group>";
+		};
+		FC0E2C1D20EDC030009C1FAC /* images */ = {
+			isa = PBXGroup;
+			children = (
+				FC918192211DC70500B6F354 /* iphone.JPG */,
+				FC918190211DBC3500B6F354 /* paddle-mobile.png */,
+				FCDFD41A211D91C7005AB38B /* synset.txt */,
+				FCEEE7D3210627A000444BEC /* banana.jpeg */,
+			);
+			name = images;
+			path = ../../images;
+			sourceTree = "<group>";
+		};
+		FC0E2C2020EDC03B009C1FAC /* models */ = {
+			isa = PBXGroup;
+			children = (
+				FCD04E6020F3146A0007374F /* mobilenet */,
+			);
+			name = models;
+			path = ../../models;
+			sourceTree = "<group>";
+		};
+		FCD04E6020F3146A0007374F /* mobilenet */ = {
+			isa = PBXGroup;
+			children = (
+				FCD04E6120F3146A0007374F /* params */,
+				FCD04E6220F3146A0007374F /* model */,
+			);
+			path = mobilenet;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		FC039B7D20E11C550081E9F8 /* paddle-mobile-demo */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = FC039B9020E11C560081E9F8 /* Build configuration list for PBXNativeTarget "paddle-mobile-demo" */;
+			buildPhases = (
+				9E041A9C487A2E44C709327E /* [CP] Check Pods Manifest.lock */,
+				FC039B7A20E11C550081E9F8 /* Sources */,
+				FC039B7B20E11C550081E9F8 /* Frameworks */,
+				FC039B7C20E11C550081E9F8 /* Resources */,
+				84ED590C0E51ABA9C34F51B5 /* [CP] Embed Pods Frameworks */,
+				FCEBEC2E20E1392000C0B14D /* Embed Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "paddle-mobile-demo";
+			productName = "paddle-mobile-demo";
+			productReference = FC039B7E20E11C550081E9F8 /* paddle-mobile-demo.app */;
+			productType = "com.apple.product-type.application";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		FC039B7620E11C550081E9F8 /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				LastSwiftUpdateCheck = 0930;
+				LastUpgradeCheck = 0930;
+				ORGANIZATIONNAME = orange;
+				TargetAttributes = {
+					FC039B7D20E11C550081E9F8 = {
+						CreatedOnToolsVersion = 9.3.1;
+					};
+				};
+			};
+			buildConfigurationList = FC039B7920E11C550081E9F8 /* Build configuration list for PBXProject "paddle-mobile-demo" */;
+			compatibilityVersion = "Xcode 9.3";
+			developmentRegion = en;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+				Base,
+			);
+			mainGroup = FC039B7520E11C550081E9F8;
+			productRefGroup = FC039B7F20E11C550081E9F8 /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				FC039B7D20E11C550081E9F8 /* paddle-mobile-demo */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		FC039B7C20E11C550081E9F8 /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				FCD04E6320F3146B0007374F /* params in Resources */,
+				FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */,
+				FC918191211DBC3500B6F354 /* paddle-mobile.png in Resources */,
+				FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */,
+				FCEEE7D4210627A000444BEC /* banana.jpeg in Resources */,
+				FC918193211DC70500B6F354 /* iphone.JPG in Resources */,
+				FCDFD41B211D91C7005AB38B /* synset.txt in Resources */,
+				FCD04E6420F3146B0007374F /* model in Resources */,
+				FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXShellScriptBuildPhase section */
+		84ED590C0E51ABA9C34F51B5 /* [CP] Embed Pods Frameworks */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+				"${SRCROOT}/../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo-frameworks.sh",
+				"${BUILT_PRODUCTS_DIR}/SwiftProtobuf/SwiftProtobuf.framework",
+			);
+			name = "[CP] Embed Pods Frameworks";
+			outputPaths = (
+				"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/SwiftProtobuf.framework",
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "\"${SRCROOT}/../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo-frameworks.sh\"\n";
+			showEnvVarsInLog = 0;
+		};
+		9E041A9C487A2E44C709327E /* [CP] Check Pods Manifest.lock */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+				"${PODS_PODFILE_DIR_PATH}/Podfile.lock",
+				"${PODS_ROOT}/Manifest.lock",
+			);
+			name = "[CP] Check Pods Manifest.lock";
+			outputPaths = (
+				"$(DERIVED_FILE_DIR)/Pods-paddle-mobile-demo-checkManifestLockResult.txt",
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n    # print error to STDERR\n    echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n    exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n";
+			showEnvVarsInLog = 0;
+		};
+/* End PBXShellScriptBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		FC039B7A20E11C550081E9F8 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */,
+				FCDFD3FB211D72C3005AB38B /* ModelHelper.swift in Sources */,
+				FC013928210204A3008100E3 /* PreProcessKernel.metal in Sources */,
+				FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */,
+				FC3602C82108580600FACB58 /* MetalHelper.swift in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXVariantGroup section */
+		FC039B8520E11C550081E9F8 /* Main.storyboard */ = {
+			isa = PBXVariantGroup;
+			children = (
+				FC039B8620E11C550081E9F8 /* Base */,
+			);
+			name = Main.storyboard;
+			sourceTree = "<group>";
+		};
+		FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */ = {
+			isa = PBXVariantGroup;
+			children = (
+				FC039B8B20E11C560081E9F8 /* Base */,
+			);
+			name = LaunchScreen.storyboard;
+			sourceTree = "<group>";
+		};
+/* End PBXVariantGroup section */
+
+/* Begin XCBuildConfiguration section */
+		FC039B8E20E11C560081E9F8 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 11.3;
+				MTL_ENABLE_DEBUG_INFO = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = iphoneos;
+				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
+				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
+			};
+			name = Debug;
+		};
+		FC039B8F20E11C560081E9F8 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 11.3;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				SDKROOT = iphoneos;
+				SWIFT_COMPILATION_MODE = wholemodule;
+				SWIFT_OPTIMIZATION_LEVEL = "-O";
+				VALIDATE_PRODUCT = YES;
+			};
+			name = Release;
+		};
+		FC039B9120E11C560081E9F8 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 878829884E1A14D7044721D5 /* Pods-paddle-mobile-demo.debug.xcconfig */;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				CODE_SIGN_STYLE = Automatic;
+				DEVELOPMENT_TEAM = A798K58VVL;
+				INFOPLIST_FILE = "paddle-mobile-demo/Info.plist";
+				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+				);
+				PRODUCT_BUNDLE_IDENTIFIER = com.paddlemobile.metal;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				PROVISIONING_PROFILE = "";
+				PROVISIONING_PROFILE_SPECIFIER = "";
+				SWIFT_VERSION = 4.0;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Debug;
+		};
+		FC039B9220E11C560081E9F8 /* Release */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 081C9CF10DB06C58B8B6B039 /* Pods-paddle-mobile-demo.release.xcconfig */;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				CODE_SIGN_STYLE = Automatic;
+				DEVELOPMENT_TEAM = A798K58VVL;
+				INFOPLIST_FILE = "paddle-mobile-demo/Info.plist";
+				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+				);
+				PRODUCT_BUNDLE_IDENTIFIER = com.paddlemobile.metal;
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				PROVISIONING_PROFILE = "";
+				PROVISIONING_PROFILE_SPECIFIER = "";
+				SWIFT_VERSION = 4.0;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		FC039B7920E11C550081E9F8 /* Build configuration list for PBXProject "paddle-mobile-demo" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				FC039B8E20E11C560081E9F8 /* Debug */,
+				FC039B8F20E11C560081E9F8 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		FC039B9020E11C560081E9F8 /* Build configuration list for PBXNativeTarget "paddle-mobile-demo" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				FC039B9120E11C560081E9F8 /* Debug */,
+				FC039B9220E11C560081E9F8 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = FC039B7620E11C550081E9F8 /* Project object */;
+}
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.xcworkspace/contents.xcworkspacedata
new file mode 100644
index 0000000000000000000000000000000000000000..d363ac3d832069ff15c89241985b5be4f48a4e1a
--- /dev/null
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.xcworkspace/contents.xcworkspacedata
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Workspace
+   version = "1.0">
+   <FileRef
+      location = "self:paddle-mobile-demo.xcodeproj">
+   </FileRef>
+</Workspace>
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
new file mode 100644
index 0000000000000000000000000000000000000000..18d981003d68d0546c4804ac2ff47dd97c6e7921
--- /dev/null
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>IDEDidComputeMac32BitWarning</key>
+	<true/>
+</dict>
+</plist>
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.xcworkspace/xcuserdata/liuruilong.xcuserdatad/UserInterfaceState.xcuserstate b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.xcworkspace/xcuserdata/liuruilong.xcuserdatad/UserInterfaceState.xcuserstate
new file mode 100644
index 0000000000000000000000000000000000000000..c13bba168aef55d0004299258e02496fc2486236
Binary files /dev/null and b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.xcworkspace/xcuserdata/liuruilong.xcuserdatad/UserInterfaceState.xcuserstate differ
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/paddle-mobile-demo.xcscheme b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/paddle-mobile-demo.xcscheme
new file mode 100644
index 0000000000000000000000000000000000000000..46c65bd36a9ab7027b1cb7a81533dcd553ccb62e
--- /dev/null
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/paddle-mobile-demo.xcscheme
@@ -0,0 +1,91 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+   LastUpgradeVersion = "0940"
+   version = "1.3">
+   <BuildAction
+      parallelizeBuildables = "YES"
+      buildImplicitDependencies = "YES">
+      <BuildActionEntries>
+         <BuildActionEntry
+            buildForTesting = "YES"
+            buildForRunning = "YES"
+            buildForProfiling = "YES"
+            buildForArchiving = "YES"
+            buildForAnalyzing = "YES">
+            <BuildableReference
+               BuildableIdentifier = "primary"
+               BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
+               BuildableName = "paddle-mobile-demo.app"
+               BlueprintName = "paddle-mobile-demo"
+               ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
+            </BuildableReference>
+         </BuildActionEntry>
+      </BuildActionEntries>
+   </BuildAction>
+   <TestAction
+      buildConfiguration = "Debug"
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+      shouldUseLaunchSchemeArgsEnv = "YES">
+      <Testables>
+      </Testables>
+      <MacroExpansion>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
+            BuildableName = "paddle-mobile-demo.app"
+            BlueprintName = "paddle-mobile-demo"
+            ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
+         </BuildableReference>
+      </MacroExpansion>
+      <AdditionalOptions>
+      </AdditionalOptions>
+   </TestAction>
+   <LaunchAction
+      buildConfiguration = "Debug"
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+      launchStyle = "0"
+      useCustomWorkingDirectory = "NO"
+      ignoresPersistentStateOnLaunch = "NO"
+      debugDocumentVersioning = "YES"
+      debugServiceExtension = "internal"
+      allowLocationSimulation = "YES">
+      <BuildableProductRunnable
+         runnableDebuggingMode = "0">
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
+            BuildableName = "paddle-mobile-demo.app"
+            BlueprintName = "paddle-mobile-demo"
+            ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
+      <AdditionalOptions>
+      </AdditionalOptions>
+   </LaunchAction>
+   <ProfileAction
+      buildConfiguration = "Release"
+      shouldUseLaunchSchemeArgsEnv = "YES"
+      savedToolIdentifier = ""
+      useCustomWorkingDirectory = "NO"
+      debugDocumentVersioning = "YES">
+      <BuildableProductRunnable
+         runnableDebuggingMode = "0">
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
+            BuildableName = "paddle-mobile-demo.app"
+            BlueprintName = "paddle-mobile-demo"
+            ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
+         </BuildableReference>
+      </BuildableProductRunnable>
+   </ProfileAction>
+   <AnalyzeAction
+      buildConfiguration = "Debug">
+   </AnalyzeAction>
+   <ArchiveAction
+      buildConfiguration = "Release"
+      revealArchiveInOrganizer = "YES">
+   </ArchiveAction>
+</Scheme>
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist
new file mode 100644
index 0000000000000000000000000000000000000000..8f61f4a88a7bcbe39bbb56e22ef203803776fdec
--- /dev/null
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>SchemeUserState</key>
+	<dict>
+		<key>paddle-mobile-demo.xcscheme</key>
+		<dict>
+			<key>orderHint</key>
+			<integer>2</integer>
+		</dict>
+	</dict>
+	<key>SuppressBuildableAutocreation</key>
+	<dict>
+		<key>FC039B7D20E11C550081E9F8</key>
+		<dict>
+			<key>primary</key>
+			<true/>
+		</dict>
+	</dict>
+</dict>
+</plist>
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/AppDelegate.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/AppDelegate.swift
new file mode 100644
index 0000000000000000000000000000000000000000..54dad2b5bf721f3d132bad2502d30b34ca0773ab
--- /dev/null
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/AppDelegate.swift
@@ -0,0 +1,52 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import UIKit
+
+@UIApplicationMain
+class AppDelegate: UIResponder, UIApplicationDelegate {
+
+    var window: UIWindow?
+
+
+    func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplicationLaunchOptionsKey: Any]?) -> Bool {
+        // Override point for customization after application launch.
+        return true
+    }
+
+    func applicationWillResignActive(_ application: UIApplication) {
+        // Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state.
+        // Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game.
+    }
+
+    func applicationDidEnterBackground(_ application: UIApplication) {
+        // Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later.
+        // If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits.
+    }
+
+    func applicationWillEnterForeground(_ application: UIApplication) {
+        // Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background.
+    }
+
+    func applicationDidBecomeActive(_ application: UIApplication) {
+        // Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface.
+    }
+
+    func applicationWillTerminate(_ application: UIApplication) {
+        // Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:.
+    }
+
+
+}
+
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/Assets.xcassets/AppIcon.appiconset/Contents.json b/metal/paddle-mobile-demo/paddle-mobile-demo/Assets.xcassets/AppIcon.appiconset/Contents.json
new file mode 100644
index 0000000000000000000000000000000000000000..d8db8d65fd79fd541b2b7eba75c7378af3448f9c
--- /dev/null
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Assets.xcassets/AppIcon.appiconset/Contents.json
@@ -0,0 +1,98 @@
+{
+  "images" : [
+    {
+      "idiom" : "iphone",
+      "size" : "20x20",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "20x20",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "29x29",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "29x29",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "40x40",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "40x40",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "60x60",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "60x60",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "20x20",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "20x20",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "29x29",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "29x29",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "40x40",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "40x40",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "76x76",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "76x76",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "83.5x83.5",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ios-marketing",
+      "size" : "1024x1024",
+      "scale" : "1x"
+    }
+  ],
+  "info" : {
+    "version" : 1,
+    "author" : "xcode"
+  }
+}
\ No newline at end of file
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/Assets.xcassets/Contents.json b/metal/paddle-mobile-demo/paddle-mobile-demo/Assets.xcassets/Contents.json
new file mode 100644
index 0000000000000000000000000000000000000000..da4a164c918651cdd1e11dca5cc62c333f097601
--- /dev/null
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Assets.xcassets/Contents.json
@@ -0,0 +1,6 @@
+{
+  "info" : {
+    "version" : 1,
+    "author" : "xcode"
+  }
+}
\ No newline at end of file
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/LaunchScreen.storyboard b/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/LaunchScreen.storyboard
new file mode 100644
index 0000000000000000000000000000000000000000..f83f6fd5810b9c852cf98563d82d5ed1e84ff893
--- /dev/null
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/LaunchScreen.storyboard
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13122.16" systemVersion="17A277" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
+    <dependencies>
+        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13104.12"/>
+        <capability name="Safe area layout guides" minToolsVersion="9.0"/>
+        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
+    </dependencies>
+    <scenes>
+        <!--View Controller-->
+        <scene sceneID="EHf-IW-A2E">
+            <objects>
+                <viewController id="01J-lp-oVM" sceneMemberID="viewController">
+                    <view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
+                        <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
+                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                        <color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
+                        <viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
+                    </view>
+                </viewController>
+                <placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
+            </objects>
+            <point key="canvasLocation" x="53" y="375"/>
+        </scene>
+    </scenes>
+</document>
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/Main.storyboard b/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/Main.storyboard
new file mode 100644
index 0000000000000000000000000000000000000000..a5efadeb97ccc41449dc32a2c1dfcdfcf9fceac5
--- /dev/null
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Base.lproj/Main.storyboard
@@ -0,0 +1,208 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14113" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
+    <device id="retina4_7" orientation="portrait">
+        <adaptation id="fullscreen"/>
+    </device>
+    <dependencies>
+        <deployment identifier="iOS"/>
+        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="14088"/>
+        <capability name="Aspect ratio constraints" minToolsVersion="5.1"/>
+        <capability name="Safe area layout guides" minToolsVersion="9.0"/>
+        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
+    </dependencies>
+    <scenes>
+        <!--View Controller-->
+        <scene sceneID="tne-QT-ifu">
+            <objects>
+                <viewController id="BYZ-38-t0r" customClass="ViewController" customModule="paddle_mobile_demo" customModuleProvider="target" sceneMemberID="viewController">
+                    <view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
+                        <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
+                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                        <subviews>
+                            <imageView userInteractionEnabled="NO" contentMode="scaleAspectFit" horizontalHuggingPriority="251" verticalHuggingPriority="251" translatesAutoresizingMaskIntoConstraints="NO" id="ZZh-fw-LwK">
+                                <rect key="frame" x="0.0" y="20" width="375" height="247"/>
+                            </imageView>
+                            <label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="Thread:" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="2EB-m2-a3L">
+                                <rect key="frame" x="10" y="538" width="68" height="24"/>
+                                <constraints>
+                                    <constraint firstAttribute="width" constant="68" id="Q5J-tq-JSX"/>
+                                    <constraint firstAttribute="height" constant="24" id="SYv-As-Si8"/>
+                                </constraints>
+                                <fontDescription key="fontDescription" type="system" pointSize="20"/>
+                                <nil key="textColor"/>
+                                <nil key="highlightedColor"/>
+                            </label>
+                            <pickerView contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="DlO-dk-RMr">
+                                <rect key="frame" x="88" y="510.5" width="287" height="80"/>
+                                <constraints>
+                                    <constraint firstAttribute="height" constant="80" id="Sbi-05-Mwd"/>
+                                </constraints>
+                            </pickerView>
+                            <pickerView contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="6MG-gv-hD5">
+                                <rect key="frame" x="85" y="401" width="290" height="80"/>
+                                <constraints>
+                                    <constraint firstAttribute="height" constant="80" id="yAL-JY-G6b"/>
+                                </constraints>
+                            </pickerView>
+                            <label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="Models" textAlignment="natural" lineBreakMode="tailTruncation" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="avL-VK-Kha">
+                                <rect key="frame" x="10" y="429" width="65" height="24"/>
+                                <constraints>
+                                    <constraint firstAttribute="width" constant="65" id="6oA-g2-Xq4"/>
+                                    <constraint firstAttribute="height" constant="24" id="EwE-B3-z2R"/>
+                                </constraints>
+                                <fontDescription key="fontDescription" type="system" pointSize="20"/>
+                                <nil key="textColor"/>
+                                <nil key="highlightedColor"/>
+                            </label>
+                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="wUL-9N-u1V">
+                                <rect key="frame" x="16" y="597" width="63.5" height="30"/>
+                                <color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                <state key="normal" title="Image">
+                                    <color key="titleColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                </state>
+                                <connections>
+                                    <action selector="selectImageAct:" destination="BYZ-38-t0r" eventType="touchUpInside" id="5uR-SM-fKO"/>
+                                </connections>
+                            </button>
+                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="XpL-9M-UOp">
+                                <rect key="frame" x="109.5" y="597" width="63" height="30"/>
+                                <color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                <state key="normal" title="Load">
+                                    <color key="titleColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                </state>
+                                <connections>
+                                    <action selector="loadAct:" destination="BYZ-38-t0r" eventType="touchUpInside" id="fZ5-CQ-jCY"/>
+                                </connections>
+                            </button>
+                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="R90-Yf-S6g">
+                                <rect key="frame" x="202.5" y="597" width="63.5" height="30"/>
+                                <color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                <state key="normal" title="Predict">
+                                    <color key="titleColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                </state>
+                                <connections>
+                                    <action selector="predictAct:" destination="BYZ-38-t0r" eventType="touchUpInside" id="Iyy-sY-gt4"/>
+                                </connections>
+                            </button>
+                            <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="a3K-ri-NVs">
+                                <rect key="frame" x="296" y="597" width="63" height="30"/>
+                                <color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                <state key="normal" title="Clear">
+                                    <color key="titleColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                </state>
+                                <connections>
+                                    <action selector="clearAct:" destination="BYZ-38-t0r" eventType="touchUpInside" id="JYf-UX-rCR"/>
+                                </connections>
+                            </button>
+                            <view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="w7H-Sk-Rai">
+                                <rect key="frame" x="79.5" y="597" width="30" height="30"/>
+                                <color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                <constraints>
+                                    <constraint firstAttribute="height" constant="30" id="tje-ct-ded"/>
+                                    <constraint firstAttribute="width" constant="30" id="vYd-Fc-KAj"/>
+                                </constraints>
+                            </view>
+                            <view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="T4O-nx-ciH">
+                                <rect key="frame" x="266" y="597" width="30" height="30"/>
+                                <color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                <constraints>
+                                    <constraint firstAttribute="height" constant="30" id="CZQ-vS-4di"/>
+                                    <constraint firstAttribute="width" constant="30" id="fXE-S7-ZXL"/>
+                                </constraints>
+                            </view>
+                            <view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="976-fk-Kx2">
+                                <rect key="frame" x="172.5" y="597" width="30" height="30"/>
+                                <color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                <constraints>
+                                    <constraint firstAttribute="height" constant="30" id="JBc-yg-8YH"/>
+                                    <constraint firstAttribute="width" constant="30" id="L4p-hP-s5C"/>
+                                </constraints>
+                            </view>
+                            <label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="耗时:" lineBreakMode="tailTruncation" numberOfLines="0" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="m5L-O7-P31">
+                                <rect key="frame" x="15" y="277" width="350" height="38"/>
+                                <constraints>
+                                    <constraint firstAttribute="height" constant="38" id="6SS-sb-7I2"/>
+                                </constraints>
+                                <fontDescription key="fontDescription" type="system" pointSize="15"/>
+                                <nil key="textColor"/>
+                                <nil key="highlightedColor"/>
+                            </label>
+                            <imageView userInteractionEnabled="NO" contentMode="scaleToFill" horizontalHuggingPriority="251" verticalHuggingPriority="251" image="paddle-mobile.png" translatesAutoresizingMaskIntoConstraints="NO" id="4ey-Xr-U4e">
+                                <rect key="frame" x="90" y="637" width="195" height="30"/>
+                                <constraints>
+                                    <constraint firstAttribute="width" secondItem="4ey-Xr-U4e" secondAttribute="height" multiplier="6.5:1" id="8c5-FF-lB9"/>
+                                </constraints>
+                            </imageView>
+                            <textView clipsSubviews="YES" multipleTouchEnabled="YES" contentMode="scaleToFill" editable="NO" text="结果:" textAlignment="natural" translatesAutoresizingMaskIntoConstraints="NO" id="VQn-bS-fWp">
+                                <rect key="frame" x="10" y="323" width="355" height="70"/>
+                                <color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
+                                <constraints>
+                                    <constraint firstAttribute="height" constant="70" id="07M-Gx-Elk"/>
+                                </constraints>
+                                <fontDescription key="fontDescription" type="system" pointSize="15"/>
+                                <textInputTraits key="textInputTraits" autocapitalizationType="sentences"/>
+                            </textView>
+                        </subviews>
+                        <color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
+                        <constraints>
+                            <constraint firstItem="6Tk-OE-BBY" firstAttribute="trailing" secondItem="VQn-bS-fWp" secondAttribute="trailing" constant="10" id="1Xg-0h-9SE"/>
+                            <constraint firstItem="avL-VK-Kha" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="10" id="2t9-hS-VXa"/>
+                            <constraint firstItem="R90-Yf-S6g" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="76b-Ny-1Og"/>
+                            <constraint firstItem="DlO-dk-RMr" firstAttribute="centerY" secondItem="2EB-m2-a3L" secondAttribute="centerY" id="7R7-7x-IRs"/>
+                            <constraint firstItem="a3K-ri-NVs" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="8Gv-HO-dKf"/>
+                            <constraint firstItem="w7H-Sk-Rai" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="94H-ZN-G7S"/>
+                            <constraint firstItem="2EB-m2-a3L" firstAttribute="top" secondItem="avL-VK-Kha" secondAttribute="bottom" constant="85" id="A5J-Qv-Ux5"/>
+                            <constraint firstItem="6Tk-OE-BBY" firstAttribute="trailing" secondItem="a3K-ri-NVs" secondAttribute="trailing" constant="16" id="Avk-9e-Pvg"/>
+                            <constraint firstItem="DlO-dk-RMr" firstAttribute="leading" secondItem="2EB-m2-a3L" secondAttribute="trailing" constant="10" id="CYY-XV-JFd"/>
+                            <constraint firstItem="T4O-nx-ciH" firstAttribute="leading" secondItem="R90-Yf-S6g" secondAttribute="trailing" id="ImW-FE-Mua"/>
+                            <constraint firstItem="T4O-nx-ciH" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="KIi-87-AGM"/>
+                            <constraint firstItem="XpL-9M-UOp" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="KWW-qT-Rzf"/>
+                            <constraint firstItem="6MG-gv-hD5" firstAttribute="centerY" secondItem="avL-VK-Kha" secondAttribute="centerY" id="KZa-YZ-DEs"/>
+                            <constraint firstItem="2EB-m2-a3L" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="10" id="Le3-TN-zOL"/>
+                            <constraint firstItem="ZZh-fw-LwK" firstAttribute="trailing" secondItem="6Tk-OE-BBY" secondAttribute="trailing" id="MeS-HQ-voE"/>
+                            <constraint firstItem="m5L-O7-P31" firstAttribute="top" secondItem="ZZh-fw-LwK" secondAttribute="bottom" constant="10" id="NUL-Ta-VI8"/>
+                            <constraint firstItem="m5L-O7-P31" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="15" id="RFA-z1-9aB"/>
+                            <constraint firstItem="wUL-9N-u1V" firstAttribute="width" secondItem="a3K-ri-NVs" secondAttribute="width" id="Rp6-Bh-BN3"/>
+                            <constraint firstItem="6MG-gv-hD5" firstAttribute="trailing" secondItem="6Tk-OE-BBY" secondAttribute="trailing" id="S0W-0G-75m"/>
+                            <constraint firstItem="w7H-Sk-Rai" firstAttribute="leading" secondItem="wUL-9N-u1V" secondAttribute="trailing" id="VBM-8b-jP0"/>
+                            <constraint firstItem="VQn-bS-fWp" firstAttribute="top" secondItem="m5L-O7-P31" secondAttribute="bottom" constant="8" id="VpS-4N-mOo"/>
+                            <constraint firstItem="wUL-9N-u1V" firstAttribute="top" secondItem="2EB-m2-a3L" secondAttribute="bottom" constant="35" id="VpU-j2-gaE"/>
+                            <constraint firstItem="wUL-9N-u1V" firstAttribute="width" secondItem="XpL-9M-UOp" secondAttribute="width" id="Xrz-oE-aIz"/>
+                            <constraint firstItem="wUL-9N-u1V" firstAttribute="width" secondItem="R90-Yf-S6g" secondAttribute="width" id="a4b-Rh-yKG"/>
+                            <constraint firstItem="6Tk-OE-BBY" firstAttribute="trailing" secondItem="m5L-O7-P31" secondAttribute="trailing" constant="10" id="aOn-WU-xP7"/>
+                            <constraint firstItem="R90-Yf-S6g" firstAttribute="leading" secondItem="976-fk-Kx2" secondAttribute="trailing" id="amy-QU-hbW"/>
+                            <constraint firstItem="a3K-ri-NVs" firstAttribute="leading" secondItem="T4O-nx-ciH" secondAttribute="trailing" id="dkX-Iq-hYk"/>
+                            <constraint firstItem="ZZh-fw-LwK" firstAttribute="top" secondItem="6Tk-OE-BBY" secondAttribute="top" id="eIC-fZ-OEE"/>
+                            <constraint firstItem="976-fk-Kx2" firstAttribute="centerY" secondItem="wUL-9N-u1V" secondAttribute="centerY" id="fFg-pB-eyU"/>
+                            <constraint firstItem="6Tk-OE-BBY" firstAttribute="bottom" secondItem="wUL-9N-u1V" secondAttribute="bottom" constant="40" id="fG6-0p-I0P"/>
+                            <constraint firstItem="XpL-9M-UOp" firstAttribute="leading" secondItem="w7H-Sk-Rai" secondAttribute="trailing" id="guC-Db-cA9"/>
+                            <constraint firstItem="6MG-gv-hD5" firstAttribute="leading" secondItem="avL-VK-Kha" secondAttribute="trailing" constant="10" id="jNW-iC-u7V"/>
+                            <constraint firstItem="4ey-Xr-U4e" firstAttribute="bottom" secondItem="6Tk-OE-BBY" secondAttribute="bottom" id="o1X-q5-P7j"/>
+                            <constraint firstItem="6MG-gv-hD5" firstAttribute="top" secondItem="VQn-bS-fWp" secondAttribute="bottom" constant="8" id="tAE-ss-jlA"/>
+                            <constraint firstItem="4ey-Xr-U4e" firstAttribute="top" secondItem="wUL-9N-u1V" secondAttribute="bottom" constant="10" id="udc-wT-jqd"/>
+                            <constraint firstItem="ZZh-fw-LwK" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" id="vXI-l2-CjL"/>
+                            <constraint firstItem="VQn-bS-fWp" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="10" id="wtI-Dl-YPq"/>
+                            <constraint firstItem="976-fk-Kx2" firstAttribute="leading" secondItem="XpL-9M-UOp" secondAttribute="trailing" id="wxP-4D-gDn"/>
+                            <constraint firstItem="wUL-9N-u1V" firstAttribute="leading" secondItem="6Tk-OE-BBY" secondAttribute="leading" constant="16" id="xzZ-jO-4fI"/>
+                            <constraint firstItem="DlO-dk-RMr" firstAttribute="trailing" secondItem="6Tk-OE-BBY" secondAttribute="trailing" id="z6f-Nb-ASh"/>
+                            <constraint firstItem="4ey-Xr-U4e" firstAttribute="centerX" secondItem="8bC-Xf-vdC" secondAttribute="centerX" id="zzi-Qz-G9G"/>
+                        </constraints>
+                        <viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
+                    </view>
+                    <connections>
+                        <outlet property="elapsedTimeLabel" destination="m5L-O7-P31" id="vJ7-EQ-Z5f"/>
+                        <outlet property="modelPickerView" destination="6MG-gv-hD5" id="l0g-ue-raK"/>
+                        <outlet property="resultTextView" destination="VQn-bS-fWp" id="306-c7-3vM"/>
+                        <outlet property="selectImageView" destination="ZZh-fw-LwK" id="afR-Bv-6AW"/>
+                        <outlet property="threadPickerView" destination="DlO-dk-RMr" id="Kk4-QV-b5o"/>
+                    </connections>
+                </viewController>
+                <placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
+            </objects>
+            <point key="canvasLocation" x="-724" y="98.50074962518741"/>
+        </scene>
+    </scenes>
+    <resources>
+        <image name="paddle-mobile.png" width="402" height="62"/>
+    </resources>
+</document>
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/Info.plist b/metal/paddle-mobile-demo/paddle-mobile-demo/Info.plist
new file mode 100644
index 0000000000000000000000000000000000000000..665ff9e0cdcc7a102a23bc7b28754ba794c59967
--- /dev/null
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/Info.plist
@@ -0,0 +1,47 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>$(DEVELOPMENT_LANGUAGE)</string>
+	<key>CFBundleExecutable</key>
+	<string>$(EXECUTABLE_NAME)</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundlePackageType</key>
+	<string>APPL</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleVersion</key>
+	<string>1</string>
+	<key>LSRequiresIPhoneOS</key>
+	<true/>
+	<key>NSCameraUsageDescription</key>
+	<string>use camera</string>
+	<key>NSPhotoLibraryUsageDescription</key>
+	<string>use album</string>
+	<key>UILaunchStoryboardName</key>
+	<string>LaunchScreen</string>
+	<key>UIMainStoryboardFile</key>
+	<string>Main</string>
+	<key>UIRequiredDeviceCapabilities</key>
+	<array>
+		<string>armv7</string>
+	</array>
+	<key>UISupportedInterfaceOrientations</key>
+	<array>
+		<string>UIInterfaceOrientationPortrait</string>
+	</array>
+	<key>UISupportedInterfaceOrientations~ipad</key>
+	<array>
+		<string>UIInterfaceOrientationPortrait</string>
+		<string>UIInterfaceOrientationPortraitUpsideDown</string>
+		<string>UIInterfaceOrientationLandscapeLeft</string>
+		<string>UIInterfaceOrientationLandscapeRight</string>
+	</array>
+</dict>
+</plist>
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/MetalHelper.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/MetalHelper.swift
new file mode 100644
index 0000000000000000000000000000000000000000..74fa89d93e042f90fe1b590a596ec584fff67f6d
--- /dev/null
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/MetalHelper.swift
@@ -0,0 +1,48 @@
+//
+//  MetalHelper.swift
+//  paddle-mobile-demo
+//
+//  Created by liuRuiLong on 2018/7/25.
+//  Copyright © 2018年 orange. All rights reserved.
+//
+
+import Metal
+import MetalKit
+import Foundation
+import paddle_mobile
+import MetalPerformanceShaders
+
+class MetalHelper {
+    let device: MTLDevice
+    let queue: MTLCommandQueue
+    let textureLoader: MTKTextureLoader
+    static let shared: MetalHelper = MetalHelper.init()
+    private init(){
+        device = MTLCreateSystemDefaultDevice()!
+        queue = device.makeCommandQueue()!
+        textureLoader = MTKTextureLoader.init(device: device)
+    }
+    
+    static func scaleTexture(queue: MTLCommandQueue, input: MTLTexture, size:(width: Int, height: Int), complete: @escaping (MTLTexture) -> Void) {
+        let tmpTextureDes = MTLTextureDescriptor.init()
+        tmpTextureDes.width = size.width
+        tmpTextureDes.height = size.height
+        tmpTextureDes.depth = 1
+        tmpTextureDes.usage = [.shaderRead, .shaderWrite]
+        tmpTextureDes.pixelFormat = .rgba32Float
+        tmpTextureDes.textureType = .type2D
+        tmpTextureDes.storageMode = .shared
+        tmpTextureDes.cpuCacheMode = .defaultCache
+        let dest = MetalHelper.shared.device.makeTexture(descriptor: tmpTextureDes)
+        
+        let scale = MPSImageLanczosScale.init(device: MetalHelper.shared.device)
+        
+        let buffer = queue.makeCommandBuffer()
+        scale.encode(commandBuffer: buffer!, sourceTexture: input, destinationTexture: dest!)
+        buffer?.addCompletedHandler({ (buffer) in
+            complete(dest!)
+        })
+        buffer?.commit()
+    }
+}
+
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/ModelHelper.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/ModelHelper.swift
new file mode 100644
index 0000000000000000000000000000000000000000..7e1f66855e45453eee9fdbe034a309aee44ff960
--- /dev/null
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/ModelHelper.swift
@@ -0,0 +1,89 @@
+//
+//  ModelHelper.swift
+//  paddle-mobile-demo
+//
+//  Created by liuRuiLong on 2018/8/10.
+//  Copyright © 2018年 orange. All rights reserved.
+//
+
+import UIKit
+import MetalKit
+import Foundation
+import paddle_mobile
+import MetalPerformanceShaders
+
+class PreProccess: CusomKernel {
+    init(device: MTLDevice) {
+        let s = CusomKernel.Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
+        super.init(device: device, inFunctionName: "preprocess", outputDim: s, usePaddleMobileLib: false)
+    }
+}
+
+let modelHelperMap: [SupportModel : ModelHelper] = [.mobilenet : MobileNetHelper.init()]
+
+enum SupportModel: String{
+    case mobilenet = "mobilenet"
+    static func supportedModels() -> [SupportModel] {
+        return [.mobilenet]
+    }
+}
+
+protocol ModelHelper {
+    var dim: [Int] { get }
+    var modelPath: String { get }
+    var paramPath: String { get }
+    var modelDir: String { get }
+    var preprocessKernel: CusomKernel { get }
+    func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void)
+    func resultStr(res: [Float]) -> String
+}
+
+extension ModelHelper {
+    func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) {
+        let texture = try? MetalHelper.shared.textureLoader.newTexture(cgImage: image, options: [:]) ?! " texture loader error"
+        MetalHelper.scaleTexture(queue: MetalHelper.shared.queue, input: texture!, size: (224, 224)) { (resTexture) in
+            getTexture(resTexture)
+        }
+    }
+}
+
+struct MobileNetHelper: ModelHelper{
+    class PreWords {
+        var contents: [String] = []
+        init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
+            if let filePath = inBundle.path(forResource: fileName, ofType: type) {
+                let string = try! String.init(contentsOfFile: filePath)
+                contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
+                    String($0[$0.index($0.startIndex, offsetBy: 10)...])
+                }
+            }else{
+                fatalError("no file call \(fileName)")
+            }
+        }
+        subscript(index: Int) -> String{
+            return contents[index]
+        }
+    }
+    let labels = PreWords.init(fileName: "synset")
+    
+    func resultStr(res: [Float]) -> String {
+        var s: [String] = []
+        res.top(r: 5).enumerated().forEach{
+            s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
+        }
+        return s.joined(separator: "\n")
+    }
+    
+    var preprocessKernel: CusomKernel
+    let dim = [1, 224, 224, 3]
+    let modelPath: String
+    let paramPath: String
+    let modelDir: String
+    
+    init() {
+        modelPath = Bundle.main.path(forResource: "model", ofType: nil) ?! "model null"
+        paramPath = Bundle.main.path(forResource: "params", ofType: nil) ?! "para null"
+        modelDir = ""
+        preprocessKernel = PreProccess.init(device: MetalHelper.shared.device)
+    }
+}
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/PreProcessKernel.metal b/metal/paddle-mobile-demo/paddle-mobile-demo/PreProcessKernel.metal
new file mode 100644
index 0000000000000000000000000000000000000000..f359ab39ac5fbc18febfb6f0da367e72b61b959c
--- /dev/null
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/PreProcessKernel.metal
@@ -0,0 +1,44 @@
+//
+//  PreProcessKernel.metal
+//  paddle-mobile-demo
+//
+//  Created by liuRuiLong on 2018/7/20.
+//  Copyright © 2018年 orange. All rights reserved.
+//
+
+#include <metal_stdlib>
+using namespace metal;
+
+
+kernel void preprocess(
+                       texture2d<float, access::read> inTexture [[texture(0)]],
+                       texture2d<float, access::write> outTexture [[texture(1)]],
+                       uint2 gid [[thread_position_in_grid]])
+{
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height()) {
+        return;
+    }
+    const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f);
+    const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
+    outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
+}
+
+kernel void preprocess_half(
+                       texture2d<half, access::read> inTexture [[texture(0)]],
+                       texture2d<half, access::write> outTexture [[texture(1)]],
+                       uint2 gid [[thread_position_in_grid]])
+{
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height()) {
+        return;
+    }
+    const auto means = half4(123.68f, 116.78f, 103.94f, 0.0f);
+    const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
+    outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
+}
+
+
+
+
+
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift b/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift
new file mode 100644
index 0000000000000000000000000000000000000000..30fdaf078556bdc4546aec4f27e153f469d9e5ac
--- /dev/null
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo/ViewController.swift
@@ -0,0 +1,177 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import UIKit
+import MetalKit
+import paddle_mobile
+import MetalPerformanceShaders
+
+let threadSupport = [1]
+
+class ViewController: UIViewController {
+    @IBOutlet weak var resultTextView: UITextView!
+    @IBOutlet weak var selectImageView: UIImageView!
+    @IBOutlet weak var elapsedTimeLabel: UILabel!
+    @IBOutlet weak var modelPickerView: UIPickerView!
+    @IBOutlet weak var threadPickerView: UIPickerView!
+    var selectImage: UIImage?
+    var program: Program?
+    var executor: Executor<Float32>?
+    var modelType: SupportModel = .mobilenet
+    var toPredictTexture: MTLTexture?
+    var modelHelper: ModelHelper {
+        return modelHelperMap[modelType] ?! " has no this type "
+    }
+    var threadNum = 1
+    
+    @IBAction func loadAct(_ sender: Any) {
+        let inModelHelper = modelHelper
+        let queue = MetalHelper.shared.queue
+        let loader = Loader<Float32>.init()
+        do {
+            let modelPath = inModelHelper.modelPath
+            let paraPath = inModelHelper.paramPath
+            
+            program = try loader.load(device: MetalHelper.shared.device, modelPath: modelPath, paraPath: paraPath)
+            executor = try Executor<Float32>.init(inDevice: MetalHelper.shared.device, inQueue: queue, inProgram: program!)
+        } catch let error {
+            print(error)
+        }
+    }
+    
+    @IBAction func selectImageAct(_ sender: Any) {
+        let imagePicker = UIImagePickerController()
+        imagePicker.sourceType = .camera
+        imagePicker.delegate = self
+        self.present(imagePicker, animated: true, completion: nil)
+    }
+    
+    @IBAction func clearAct(_ sender: Any) {
+        executor?.clear()
+        program = nil
+        executor = nil
+        
+    }
+    
+    @IBAction func predictAct(_ sender: Any) {        
+        guard let inTexture = toPredictTexture else {
+            resultTextView.text = "请选择图片 ! "
+            return
+        }
+        
+        guard let inExecutor = executor else {
+            resultTextView.text = "请先 load ! "
+            return
+        }
+
+        do {
+            let max = 100
+            var startDate = Date.init()
+            for i in 0..<max {
+                try inExecutor.predict(input: inTexture, expect: modelHelper.dim, completionHandle: { [weak self] (result) in
+                    guard let sSelf = self else {
+                        fatalError()
+                    }
+                    
+                    if i == (max / 2 - 1) {
+                        startDate = Date.init()
+                    }
+                    
+                    if i == max - 1 {
+                        let time = Date.init().timeIntervalSince(startDate)
+                        DispatchQueue.main.async {
+                            sSelf.resultTextView.text = sSelf.modelHelper.resultStr(res: result.resultArr)
+                            sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max/2) * 1000.0) ms"
+                        }
+                    }
+                }, preProcessKernle: self.modelHelper.preprocessKernel)
+            }
+        } catch let error {
+            print(error)
+        }
+    }
+
+    override func viewDidLoad() {
+        super.viewDidLoad()
+        modelPickerView.delegate = self
+        modelPickerView.dataSource = self
+        threadPickerView.delegate = self
+        threadPickerView.dataSource = self
+        
+        selectImage = UIImage.init(named: "banana.jpeg")
+        selectImageView.image = selectImage
+        modelHelper.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
+            self?.toPredictTexture = texture
+        }
+    }
+}
+
+extension ViewController: UIPickerViewDataSource, UIPickerViewDelegate{
+    func numberOfComponents(in pickerView: UIPickerView) -> Int {
+        if pickerView == modelPickerView {
+            return 1
+        } else if pickerView == threadPickerView {
+            return 1
+        } else {
+            fatalError()
+        }
+    }
+    
+    func pickerView(_ pickerView: UIPickerView, numberOfRowsInComponent component: Int) -> Int {
+        if pickerView == modelPickerView {
+            return SupportModel.supportedModels().count
+        } else if pickerView == threadPickerView {
+            return threadSupport.count
+        } else {
+            fatalError()
+        }
+    }
+    
+    public func pickerView(_ pickerView: UIPickerView, titleForRow row: Int, forComponent component: Int) -> String? {
+        if pickerView == modelPickerView {
+            return SupportModel.supportedModels()[row].rawValue
+        } else if pickerView == threadPickerView {
+            return "\(threadSupport[row])"
+        } else {
+            fatalError()
+        }
+    }
+    
+    public func pickerView(_ pickerView: UIPickerView, didSelectRow row: Int, inComponent component: Int) {
+        if pickerView == modelPickerView {
+            self.modelType = SupportModel.supportedModels()[row]
+        } else if pickerView == threadPickerView {
+            self.threadNum = threadSupport[row]
+        } else {
+            fatalError()
+        }
+    }
+}
+
+extension ViewController:  UIImagePickerControllerDelegate, UINavigationControllerDelegate {
+    func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) {
+        picker.dismiss(animated: true){[weak self] in
+            guard let sSelf = self, let image =  info["UIImagePickerControllerOriginalImage"] as? UIImage else{
+                fatalError("no image")
+            }
+            sSelf.selectImage = image
+            sSelf.selectImageView.image = image
+            sSelf.modelHelper.getTexture(image: image.cgImage!, getTexture: { (texture) in
+                sSelf.toPredictTexture = texture
+            })
+        }
+    }
+}
+
+
diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.pbxproj b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.pbxproj
new file mode 100644
index 0000000000000000000000000000000000000000..50d58bb45bb5c0e8e5ffbbe8f10ce3e41b770f7c
--- /dev/null
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.pbxproj
@@ -0,0 +1,478 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 50;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		168DA950D7D6CF91EBF70A17 /* Pods_paddle_mobile_unit_test.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 8BCD4792E483BFEE9F5523DE /* Pods_paddle_mobile_unit_test.framework */; };
+		FC607427211DF3B100B17547 /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FC60734E211DF3B000B17547 /* synset.txt */; };
+		FC607428211DF3B100B17547 /* banana.jpeg in Resources */ = {isa = PBXBuildFile; fileRef = FC60734F211DF3B000B17547 /* banana.jpeg */; };
+		FC607429211DF3B100B17547 /* iphone.JPG in Resources */ = {isa = PBXBuildFile; fileRef = FC607350211DF3B000B17547 /* iphone.JPG */; };
+		FC60742A211DF3B100B17547 /* paddle-mobile.png in Resources */ = {isa = PBXBuildFile; fileRef = FC607351211DF3B000B17547 /* paddle-mobile.png */; };
+		FC60742B211DF3B100B17547 /* params in Resources */ = {isa = PBXBuildFile; fileRef = FC607354211DF3B000B17547 /* params */; };
+		FC60742C211DF3B100B17547 /* model in Resources */ = {isa = PBXBuildFile; fileRef = FC607355211DF3B000B17547 /* model */; };
+		FC91818D211DAE9A00B6F354 /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FC91818C211DAE9A00B6F354 /* paddle_mobile.framework */; };
+		FC91818E211DAE9A00B6F354 /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FC91818C211DAE9A00B6F354 /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
+		FCDFD409211D9185005AB38B /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDFD408211D9185005AB38B /* AppDelegate.swift */; };
+		FCDFD40B211D9185005AB38B /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDFD40A211D9185005AB38B /* ViewController.swift */; };
+		FCDFD40E211D9185005AB38B /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FCDFD40C211D9185005AB38B /* Main.storyboard */; };
+		FCDFD410211D9187005AB38B /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = FCDFD40F211D9187005AB38B /* Assets.xcassets */; };
+		FCDFD413211D9187005AB38B /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FCDFD411211D9187005AB38B /* LaunchScreen.storyboard */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+		FC91818F211DAE9B00B6F354 /* Embed Frameworks */ = {
+			isa = PBXCopyFilesBuildPhase;
+			buildActionMask = 2147483647;
+			dstPath = "";
+			dstSubfolderSpec = 10;
+			files = (
+				FC91818E211DAE9A00B6F354 /* paddle_mobile.framework in Embed Frameworks */,
+			);
+			name = "Embed Frameworks";
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+		5CC132C848027BE970FB2637 /* Pods-paddle-mobile-unit-test.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile-unit-test.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile-unit-test/Pods-paddle-mobile-unit-test.debug.xcconfig"; sourceTree = "<group>"; };
+		72F34AE9677943FC580DE7F4 /* Pods-paddle-mobile-unit-test.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile-unit-test.release.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile-unit-test/Pods-paddle-mobile-unit-test.release.xcconfig"; sourceTree = "<group>"; };
+		8BCD4792E483BFEE9F5523DE /* Pods_paddle_mobile_unit_test.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_paddle_mobile_unit_test.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		FC60734E211DF3B000B17547 /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = "<group>"; };
+		FC60734F211DF3B000B17547 /* banana.jpeg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = banana.jpeg; sourceTree = "<group>"; };
+		FC607350211DF3B000B17547 /* iphone.JPG */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = iphone.JPG; sourceTree = "<group>"; };
+		FC607351211DF3B000B17547 /* paddle-mobile.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; path = "paddle-mobile.png"; sourceTree = "<group>"; };
+		FC607354211DF3B000B17547 /* params */ = {isa = PBXFileReference; lastKnownFileType = file; path = params; sourceTree = "<group>"; };
+		FC607355211DF3B000B17547 /* model */ = {isa = PBXFileReference; lastKnownFileType = file; path = model; sourceTree = "<group>"; };
+		FC91818C211DAE9A00B6F354 /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		FCDFD405211D9185005AB38B /* paddle-mobile-unit-test.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "paddle-mobile-unit-test.app"; sourceTree = BUILT_PRODUCTS_DIR; };
+		FCDFD408211D9185005AB38B /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
+		FCDFD40A211D9185005AB38B /* ViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = "<group>"; };
+		FCDFD40D211D9185005AB38B /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
+		FCDFD40F211D9187005AB38B /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
+		FCDFD412211D9187005AB38B /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
+		FCDFD414211D9187005AB38B /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		FCDFD402211D9185005AB38B /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				FC91818D211DAE9A00B6F354 /* paddle_mobile.framework in Frameworks */,
+				168DA950D7D6CF91EBF70A17 /* Pods_paddle_mobile_unit_test.framework in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		CF78F766C11CC8AD67269581 /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+				8BCD4792E483BFEE9F5523DE /* Pods_paddle_mobile_unit_test.framework */,
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
+		F752428B187BC4E0928ACD3D /* Pods */ = {
+			isa = PBXGroup;
+			children = (
+				5CC132C848027BE970FB2637 /* Pods-paddle-mobile-unit-test.debug.xcconfig */,
+				72F34AE9677943FC580DE7F4 /* Pods-paddle-mobile-unit-test.release.xcconfig */,
+			);
+			name = Pods;
+			sourceTree = "<group>";
+		};
+		FC60734D211DF3B000B17547 /* images */ = {
+			isa = PBXGroup;
+			children = (
+				FC60734E211DF3B000B17547 /* synset.txt */,
+				FC60734F211DF3B000B17547 /* banana.jpeg */,
+				FC607350211DF3B000B17547 /* iphone.JPG */,
+				FC607351211DF3B000B17547 /* paddle-mobile.png */,
+			);
+			name = images;
+			path = ../../images;
+			sourceTree = "<group>";
+		};
+		FC607352211DF3B000B17547 /* models */ = {
+			isa = PBXGroup;
+			children = (
+				FC607353211DF3B000B17547 /* mobilenet */,
+			);
+			name = models;
+			path = ../../models;
+			sourceTree = "<group>";
+		};
+		FC607353211DF3B000B17547 /* mobilenet */ = {
+			isa = PBXGroup;
+			children = (
+				FC607354211DF3B000B17547 /* params */,
+				FC607355211DF3B000B17547 /* model */,
+			);
+			path = mobilenet;
+			sourceTree = "<group>";
+		};
+		FCDFD3FC211D9185005AB38B = {
+			isa = PBXGroup;
+			children = (
+				FC91818C211DAE9A00B6F354 /* paddle_mobile.framework */,
+				FCDFD407211D9185005AB38B /* paddle-mobile-unit-test */,
+				FCDFD406211D9185005AB38B /* Products */,
+				F752428B187BC4E0928ACD3D /* Pods */,
+				CF78F766C11CC8AD67269581 /* Frameworks */,
+			);
+			sourceTree = "<group>";
+		};
+		FCDFD406211D9185005AB38B /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				FCDFD405211D9185005AB38B /* paddle-mobile-unit-test.app */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		FCDFD407211D9185005AB38B /* paddle-mobile-unit-test */ = {
+			isa = PBXGroup;
+			children = (
+				FC60734D211DF3B000B17547 /* images */,
+				FC607352211DF3B000B17547 /* models */,
+				FCDFD408211D9185005AB38B /* AppDelegate.swift */,
+				FCDFD40A211D9185005AB38B /* ViewController.swift */,
+				FCDFD40C211D9185005AB38B /* Main.storyboard */,
+				FCDFD40F211D9187005AB38B /* Assets.xcassets */,
+				FCDFD411211D9187005AB38B /* LaunchScreen.storyboard */,
+				FCDFD414211D9187005AB38B /* Info.plist */,
+			);
+			path = "paddle-mobile-unit-test";
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+		FCDFD404211D9185005AB38B /* paddle-mobile-unit-test */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = FCDFD417211D9187005AB38B /* Build configuration list for PBXNativeTarget "paddle-mobile-unit-test" */;
+			buildPhases = (
+				5F5A9A9DC0C6307DEA4294C1 /* [CP] Check Pods Manifest.lock */,
+				FCDFD401211D9185005AB38B /* Sources */,
+				FCDFD402211D9185005AB38B /* Frameworks */,
+				FCDFD403211D9185005AB38B /* Resources */,
+				53A2089068F9D64BB96D4322 /* [CP] Embed Pods Frameworks */,
+				FC91818F211DAE9B00B6F354 /* Embed Frameworks */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "paddle-mobile-unit-test";
+			productName = "paddle-mobile-unit-test";
+			productReference = FCDFD405211D9185005AB38B /* paddle-mobile-unit-test.app */;
+			productType = "com.apple.product-type.application";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		FCDFD3FD211D9185005AB38B /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				LastSwiftUpdateCheck = 0940;
+				LastUpgradeCheck = 0940;
+				ORGANIZATIONNAME = orange;
+				TargetAttributes = {
+					FCDFD404211D9185005AB38B = {
+						CreatedOnToolsVersion = 9.4.1;
+					};
+				};
+			};
+			buildConfigurationList = FCDFD400211D9185005AB38B /* Build configuration list for PBXProject "paddle-mobile-unit-test" */;
+			compatibilityVersion = "Xcode 9.3";
+			developmentRegion = en;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+				Base,
+			);
+			mainGroup = FCDFD3FC211D9185005AB38B;
+			productRefGroup = FCDFD406211D9185005AB38B /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				FCDFD404211D9185005AB38B /* paddle-mobile-unit-test */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		FCDFD403211D9185005AB38B /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				FC607427211DF3B100B17547 /* synset.txt in Resources */,
+				FC60742B211DF3B100B17547 /* params in Resources */,
+				FC607428211DF3B100B17547 /* banana.jpeg in Resources */,
+				FC60742A211DF3B100B17547 /* paddle-mobile.png in Resources */,
+				FC607429211DF3B100B17547 /* iphone.JPG in Resources */,
+				FC60742C211DF3B100B17547 /* model in Resources */,
+				FCDFD413211D9187005AB38B /* LaunchScreen.storyboard in Resources */,
+				FCDFD410211D9187005AB38B /* Assets.xcassets in Resources */,
+				FCDFD40E211D9185005AB38B /* Main.storyboard in Resources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXShellScriptBuildPhase section */
+		53A2089068F9D64BB96D4322 /* [CP] Embed Pods Frameworks */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+				"${SRCROOT}/../Pods/Target Support Files/Pods-paddle-mobile-unit-test/Pods-paddle-mobile-unit-test-frameworks.sh",
+				"${BUILT_PRODUCTS_DIR}/SwiftProtobuf/SwiftProtobuf.framework",
+			);
+			name = "[CP] Embed Pods Frameworks";
+			outputPaths = (
+				"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/SwiftProtobuf.framework",
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "\"${SRCROOT}/../Pods/Target Support Files/Pods-paddle-mobile-unit-test/Pods-paddle-mobile-unit-test-frameworks.sh\"\n";
+			showEnvVarsInLog = 0;
+		};
+		5F5A9A9DC0C6307DEA4294C1 /* [CP] Check Pods Manifest.lock */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+				"${PODS_PODFILE_DIR_PATH}/Podfile.lock",
+				"${PODS_ROOT}/Manifest.lock",
+			);
+			name = "[CP] Check Pods Manifest.lock";
+			outputPaths = (
+				"$(DERIVED_FILE_DIR)/Pods-paddle-mobile-unit-test-checkManifestLockResult.txt",
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n    # print error to STDERR\n    echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n    exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n";
+			showEnvVarsInLog = 0;
+		};
+/* End PBXShellScriptBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		FCDFD401211D9185005AB38B /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				FCDFD40B211D9185005AB38B /* ViewController.swift in Sources */,
+				FCDFD409211D9185005AB38B /* AppDelegate.swift in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXVariantGroup section */
+		FCDFD40C211D9185005AB38B /* Main.storyboard */ = {
+			isa = PBXVariantGroup;
+			children = (
+				FCDFD40D211D9185005AB38B /* Base */,
+			);
+			name = Main.storyboard;
+			sourceTree = "<group>";
+		};
+		FCDFD411211D9187005AB38B /* LaunchScreen.storyboard */ = {
+			isa = PBXVariantGroup;
+			children = (
+				FCDFD412211D9187005AB38B /* Base */,
+			);
+			name = LaunchScreen.storyboard;
+			sourceTree = "<group>";
+		};
+/* End PBXVariantGroup section */
+
+/* Begin XCBuildConfiguration section */
+		FCDFD415211D9187005AB38B /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 11.4;
+				MTL_ENABLE_DEBUG_INFO = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = iphoneos;
+				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
+				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
+			};
+			name = Debug;
+		};
+		FCDFD416211D9187005AB38B /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 11.4;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				SDKROOT = iphoneos;
+				SWIFT_COMPILATION_MODE = wholemodule;
+				SWIFT_OPTIMIZATION_LEVEL = "-O";
+				VALIDATE_PRODUCT = YES;
+			};
+			name = Release;
+		};
+		FCDFD418211D9187005AB38B /* Debug */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 5CC132C848027BE970FB2637 /* Pods-paddle-mobile-unit-test.debug.xcconfig */;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				CODE_SIGN_STYLE = Automatic;
+				DEVELOPMENT_TEAM = A798K58VVL;
+				INFOPLIST_FILE = "paddle-mobile-unit-test/Info.plist";
+				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+				);
+				PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile-unit-test";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SWIFT_VERSION = 4.0;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Debug;
+		};
+		FCDFD419211D9187005AB38B /* Release */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = 72F34AE9677943FC580DE7F4 /* Pods-paddle-mobile-unit-test.release.xcconfig */;
+			buildSettings = {
+				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
+				CODE_SIGN_STYLE = Automatic;
+				DEVELOPMENT_TEAM = A798K58VVL;
+				INFOPLIST_FILE = "paddle-mobile-unit-test/Info.plist";
+				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+				);
+				PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile-unit-test";
+				PRODUCT_NAME = "$(TARGET_NAME)";
+				SWIFT_VERSION = 4.0;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		FCDFD400211D9185005AB38B /* Build configuration list for PBXProject "paddle-mobile-unit-test" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				FCDFD415211D9187005AB38B /* Debug */,
+				FCDFD416211D9187005AB38B /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		FCDFD417211D9187005AB38B /* Build configuration list for PBXNativeTarget "paddle-mobile-unit-test" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				FCDFD418211D9187005AB38B /* Debug */,
+				FCDFD419211D9187005AB38B /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = FCDFD3FD211D9185005AB38B /* Project object */;
+}
diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.xcworkspace/contents.xcworkspacedata
new file mode 100644
index 0000000000000000000000000000000000000000..cb4dfcfed95671fcf6dca7b01068d171ad562443
--- /dev/null
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.xcworkspace/contents.xcworkspacedata
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Workspace
+   version = "1.0">
+   <FileRef
+      location = "self:paddle-mobile-unit-test.xcodeproj">
+   </FileRef>
+</Workspace>
diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
new file mode 100644
index 0000000000000000000000000000000000000000..18d981003d68d0546c4804ac2ff47dd97c6e7921
--- /dev/null
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>IDEDidComputeMac32BitWarning</key>
+	<true/>
+</dict>
+</plist>
diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.xcworkspace/xcuserdata/liuruilong.xcuserdatad/UserInterfaceState.xcuserstate b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.xcworkspace/xcuserdata/liuruilong.xcuserdatad/UserInterfaceState.xcuserstate
new file mode 100644
index 0000000000000000000000000000000000000000..775d17b268941d24e8e9ebd7ac5ae26c2c0dbda9
Binary files /dev/null and b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.xcworkspace/xcuserdata/liuruilong.xcuserdatad/UserInterfaceState.xcuserstate differ
diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist
new file mode 100644
index 0000000000000000000000000000000000000000..994fb8e4886aba91298c168a1b06888d8825b655
--- /dev/null
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>SchemeUserState</key>
+	<dict>
+		<key>paddle-mobile-unit-test.xcscheme</key>
+		<dict>
+			<key>orderHint</key>
+			<integer>6</integer>
+		</dict>
+	</dict>
+</dict>
+</plist>
diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/AppDelegate.swift b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/AppDelegate.swift
new file mode 100644
index 0000000000000000000000000000000000000000..6ab6f7c05e30049e850170409efcd6f049c73abe
--- /dev/null
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/AppDelegate.swift
@@ -0,0 +1,44 @@
+//
+//  AppDelegate.swift
+//  paddle-mobile-unit-test
+//
+//  Created by liuRuiLong on 2018/8/10.
+//  Copyright © 2018年 orange. All rights reserved.
+//
+
+import UIKit
+
+@UIApplicationMain
+class AppDelegate: UIResponder, UIApplicationDelegate {
+
+    var window: UIWindow?
+
+    func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplicationLaunchOptionsKey: Any]?) -> Bool {
+        // Override point for customization after application launch.
+        return true
+    }
+
+    func applicationWillResignActive(_ application: UIApplication) {
+        // Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state.
+        // Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game.
+    }
+
+    func applicationDidEnterBackground(_ application: UIApplication) {
+        // Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later.
+        // If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits.
+    }
+
+    func applicationWillEnterForeground(_ application: UIApplication) {
+        // Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background.
+    }
+
+    func applicationDidBecomeActive(_ application: UIApplication) {
+        // Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface.
+    }
+
+    func applicationWillTerminate(_ application: UIApplication) {
+        // Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:.
+    }
+
+
+}
diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Assets.xcassets/AppIcon.appiconset/Contents.json b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Assets.xcassets/AppIcon.appiconset/Contents.json
new file mode 100644
index 0000000000000000000000000000000000000000..d8db8d65fd79fd541b2b7eba75c7378af3448f9c
--- /dev/null
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Assets.xcassets/AppIcon.appiconset/Contents.json
@@ -0,0 +1,98 @@
+{
+  "images" : [
+    {
+      "idiom" : "iphone",
+      "size" : "20x20",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "20x20",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "29x29",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "29x29",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "40x40",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "40x40",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "60x60",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "iphone",
+      "size" : "60x60",
+      "scale" : "3x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "20x20",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "20x20",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "29x29",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "29x29",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "40x40",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "40x40",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "76x76",
+      "scale" : "1x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "76x76",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ipad",
+      "size" : "83.5x83.5",
+      "scale" : "2x"
+    },
+    {
+      "idiom" : "ios-marketing",
+      "size" : "1024x1024",
+      "scale" : "1x"
+    }
+  ],
+  "info" : {
+    "version" : 1,
+    "author" : "xcode"
+  }
+}
\ No newline at end of file
diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Assets.xcassets/Contents.json b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Assets.xcassets/Contents.json
new file mode 100644
index 0000000000000000000000000000000000000000..da4a164c918651cdd1e11dca5cc62c333f097601
--- /dev/null
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Assets.xcassets/Contents.json
@@ -0,0 +1,6 @@
+{
+  "info" : {
+    "version" : 1,
+    "author" : "xcode"
+  }
+}
\ No newline at end of file
diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Base.lproj/LaunchScreen.storyboard b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Base.lproj/LaunchScreen.storyboard
new file mode 100644
index 0000000000000000000000000000000000000000..f83f6fd5810b9c852cf98563d82d5ed1e84ff893
--- /dev/null
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Base.lproj/LaunchScreen.storyboard
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13122.16" systemVersion="17A277" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
+    <dependencies>
+        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13104.12"/>
+        <capability name="Safe area layout guides" minToolsVersion="9.0"/>
+        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
+    </dependencies>
+    <scenes>
+        <!--View Controller-->
+        <scene sceneID="EHf-IW-A2E">
+            <objects>
+                <viewController id="01J-lp-oVM" sceneMemberID="viewController">
+                    <view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
+                        <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
+                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                        <color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
+                        <viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
+                    </view>
+                </viewController>
+                <placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
+            </objects>
+            <point key="canvasLocation" x="53" y="375"/>
+        </scene>
+    </scenes>
+</document>
diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Base.lproj/Main.storyboard b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Base.lproj/Main.storyboard
new file mode 100644
index 0000000000000000000000000000000000000000..03c13c2286150ad7416086bec99d2c46ccca6efc
--- /dev/null
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Base.lproj/Main.storyboard
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13122.16" systemVersion="17A277" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
+    <dependencies>
+        <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13104.12"/>
+        <capability name="Safe area layout guides" minToolsVersion="9.0"/>
+        <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
+    </dependencies>
+    <scenes>
+        <!--View Controller-->
+        <scene sceneID="tne-QT-ifu">
+            <objects>
+                <viewController id="BYZ-38-t0r" customClass="ViewController" customModuleProvider="target" sceneMemberID="viewController">
+                    <view key="view" contentMode="scaleToFill" id="8bC-Xf-vdC">
+                        <rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
+                        <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
+                        <color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
+                        <viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
+                    </view>
+                </viewController>
+                <placeholder placeholderIdentifier="IBFirstResponder" id="dkx-z0-nzr" sceneMemberID="firstResponder"/>
+            </objects>
+        </scene>
+    </scenes>
+</document>
diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Info.plist b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Info.plist
new file mode 100644
index 0000000000000000000000000000000000000000..16be3b681122de83e380d47b840b7d0486f71f86
--- /dev/null
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/Info.plist
@@ -0,0 +1,45 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>$(DEVELOPMENT_LANGUAGE)</string>
+	<key>CFBundleExecutable</key>
+	<string>$(EXECUTABLE_NAME)</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundlePackageType</key>
+	<string>APPL</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleVersion</key>
+	<string>1</string>
+	<key>LSRequiresIPhoneOS</key>
+	<true/>
+	<key>UILaunchStoryboardName</key>
+	<string>LaunchScreen</string>
+	<key>UIMainStoryboardFile</key>
+	<string>Main</string>
+	<key>UIRequiredDeviceCapabilities</key>
+	<array>
+		<string>armv7</string>
+	</array>
+	<key>UISupportedInterfaceOrientations</key>
+	<array>
+		<string>UIInterfaceOrientationPortrait</string>
+		<string>UIInterfaceOrientationLandscapeLeft</string>
+		<string>UIInterfaceOrientationLandscapeRight</string>
+	</array>
+	<key>UISupportedInterfaceOrientations~ipad</key>
+	<array>
+		<string>UIInterfaceOrientationPortrait</string>
+		<string>UIInterfaceOrientationPortraitUpsideDown</string>
+		<string>UIInterfaceOrientationLandscapeLeft</string>
+		<string>UIInterfaceOrientationLandscapeRight</string>
+	</array>
+</dict>
+</plist>
diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift
new file mode 100644
index 0000000000000000000000000000000000000000..d57b610e4d10f02d2eace4892a6d55eda8f2c9b9
--- /dev/null
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test/ViewController.swift
@@ -0,0 +1,19 @@
+//
+//  ViewController.swift
+//  paddle-mobile-unit-test
+//
+//  Created by liuRuiLong on 2018/8/10.
+//  Copyright © 2018年 orange. All rights reserved.
+//
+
+import UIKit
+import paddle_mobile
+
+class ViewController: UIViewController {
+
+    override func viewDidLoad() {
+        super.viewDidLoad()
+        print(" done ")
+    }
+
+}
diff --git a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj
new file mode 100644
index 0000000000000000000000000000000000000000..6bceab43210c42ef83a2152463caf3bc8917b8c8
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj
@@ -0,0 +1,629 @@
+// !$*UTF8*$!
+{
+	archiveVersion = 1;
+	classes = {
+	};
+	objectVersion = 50;
+	objects = {
+
+/* Begin PBXBuildFile section */
+		D3831F70E7E0B565B9AC22DA /* Pods_paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */; };
+		FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */ = {isa = PBXBuildFile; fileRef = FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */; settings = {ATTRIBUTES = (Public, ); }; };
+		FC039B9720E11C9A0081E9F8 /* Extensions.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9420E11C9A0081E9F8 /* Extensions.swift */; };
+		FC039B9820E11C9A0081E9F8 /* Errors.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9520E11C9A0081E9F8 /* Errors.swift */; };
+		FC039B9920E11C9A0081E9F8 /* Types.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9620E11C9A0081E9F8 /* Types.swift */; };
+		FC039B9B20E11CA00081E9F8 /* Executor.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9A20E11CA00081E9F8 /* Executor.swift */; };
+		FC039B9F20E11CB20081E9F8 /* Tensor.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9D20E11CB20081E9F8 /* Tensor.swift */; };
+		FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B9E20E11CB20081E9F8 /* Dim.swift */; };
+		FC039BA220E11CB70081E9F8 /* Loader.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA120E11CB70081E9F8 /* Loader.swift */; };
+		FC039BA920E11CBC0081E9F8 /* ConvOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA420E11CBC0081E9F8 /* ConvOp.swift */; };
+		FC039BAA20E11CBC0081E9F8 /* ElementwiseAddOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA520E11CBC0081E9F8 /* ElementwiseAddOp.swift */; };
+		FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA620E11CBC0081E9F8 /* Operator.swift */; };
+		FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA720E11CBC0081E9F8 /* BatchNormOp.swift */; };
+		FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA820E11CBC0081E9F8 /* ReluOp.swift */; };
+		FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BAF20E11CC20081E9F8 /* framework.pb.swift */; };
+		FC039BB920E11CC20081E9F8 /* Scope.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB020E11CC20081E9F8 /* Scope.swift */; };
+		FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB120E11CC20081E9F8 /* TensorDesc.swift */; };
+		FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB220E11CC20081E9F8 /* ProgramDesc.swift */; };
+		FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB320E11CC20081E9F8 /* VarDesc.swift */; };
+		FC039BBD20E11CC20081E9F8 /* Program.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB420E11CC20081E9F8 /* Program.swift */; };
+		FC039BBE20E11CC20081E9F8 /* OpDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB520E11CC20081E9F8 /* OpDesc.swift */; };
+		FC039BBF20E11CC20081E9F8 /* Attribute.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB620E11CC20081E9F8 /* Attribute.swift */; };
+		FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB720E11CC20081E9F8 /* BlockDesc.swift */; };
+		FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */; };
+		FC0E2DBC20EE45FE009C1FAC /* ConvKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */; };
+		FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */; };
+		FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */; };
+		FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC1B16B220EC9A4F00678B91 /* Kernels.metal */; };
+		FC1B186620ECF1C600678B91 /* ResizeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC1B186520ECF1C600678B91 /* ResizeKernel.swift */; };
+		FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */; };
+		FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74820F0B954007C0C6D /* ConvKernel.metal */; };
+		FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */; };
+		FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */; };
+		FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC60DB8820E9AAA500FF203F /* MetalExtension.swift */; };
+		FC82735920E3C04200BE430A /* OpCreator.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC82735820E3C04200BE430A /* OpCreator.swift */; };
+		FC9D037920E229E4000F735A /* OpParam.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037820E229E4000F735A /* OpParam.swift */; };
+		FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037F20E22FBB000F735A /* FeedOp.swift */; };
+		FC9D038220E2312E000F735A /* FetchOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D038120E2312E000F735A /* FetchOp.swift */; };
+		FC9D038420E23B01000F735A /* Texture.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D038320E23B01000F735A /* Texture.swift */; };
+		FCD04E6620F314C50007374F /* PoolOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6520F314C50007374F /* PoolOp.swift */; };
+		FCD04E6820F315020007374F /* PoolKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6720F315020007374F /* PoolKernel.swift */; };
+		FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6920F319EC0007374F /* SoftmaxOp.swift */; };
+		FCD04E6C20F31A280007374F /* SoftmaxKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6B20F31A280007374F /* SoftmaxKernel.swift */; };
+		FCD04E6E20F31B4B0007374F /* ReshapeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6D20F31B4B0007374F /* ReshapeOp.swift */; };
+		FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6F20F31B720007374F /* ReshapeKernel.swift */; };
+		FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7120F343420007374F /* ConvAddOp.swift */; };
+		FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E7320F3437E0007374F /* ConvAddKernel.swift */; };
+		FCDC0FEB21099A1D00DC9EFB /* Tools.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCDC0FEA21099A1D00DC9EFB /* Tools.swift */; };
+		FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */; };
+		FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */; };
+		FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF2D73720E64E70007AC5F5 /* Kernel.swift */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+		CDF58151D902A1CBAE56A0C2 /* Pods-paddle-mobile.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile/Pods-paddle-mobile.debug.xcconfig"; sourceTree = "<group>"; };
+		DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		E2A7957C92EDA5C3BEC0FFC2 /* Pods-paddle-mobile.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile.release.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile/Pods-paddle-mobile.release.xcconfig"; sourceTree = "<group>"; };
+		FC039B6A20E11C3C0081E9F8 /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = paddle_mobile.h; sourceTree = "<group>"; };
+		FC039B6E20E11C3C0081E9F8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
+		FC039B9420E11C9A0081E9F8 /* Extensions.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Extensions.swift; sourceTree = "<group>"; };
+		FC039B9520E11C9A0081E9F8 /* Errors.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Errors.swift; sourceTree = "<group>"; };
+		FC039B9620E11C9A0081E9F8 /* Types.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Types.swift; sourceTree = "<group>"; };
+		FC039B9A20E11CA00081E9F8 /* Executor.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Executor.swift; sourceTree = "<group>"; };
+		FC039B9D20E11CB20081E9F8 /* Tensor.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Tensor.swift; sourceTree = "<group>"; };
+		FC039B9E20E11CB20081E9F8 /* Dim.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Dim.swift; sourceTree = "<group>"; };
+		FC039BA120E11CB70081E9F8 /* Loader.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Loader.swift; sourceTree = "<group>"; };
+		FC039BA420E11CBC0081E9F8 /* ConvOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ConvOp.swift; sourceTree = "<group>"; };
+		FC039BA520E11CBC0081E9F8 /* ElementwiseAddOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ElementwiseAddOp.swift; sourceTree = "<group>"; };
+		FC039BA620E11CBC0081E9F8 /* Operator.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Operator.swift; sourceTree = "<group>"; };
+		FC039BA720E11CBC0081E9F8 /* BatchNormOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BatchNormOp.swift; sourceTree = "<group>"; };
+		FC039BA820E11CBC0081E9F8 /* ReluOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ReluOp.swift; sourceTree = "<group>"; };
+		FC039BAF20E11CC20081E9F8 /* framework.pb.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = framework.pb.swift; sourceTree = "<group>"; };
+		FC039BB020E11CC20081E9F8 /* Scope.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Scope.swift; sourceTree = "<group>"; };
+		FC039BB120E11CC20081E9F8 /* TensorDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TensorDesc.swift; sourceTree = "<group>"; };
+		FC039BB220E11CC20081E9F8 /* ProgramDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ProgramDesc.swift; sourceTree = "<group>"; };
+		FC039BB320E11CC20081E9F8 /* VarDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VarDesc.swift; sourceTree = "<group>"; };
+		FC039BB420E11CC20081E9F8 /* Program.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Program.swift; sourceTree = "<group>"; };
+		FC039BB520E11CC20081E9F8 /* OpDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = OpDesc.swift; sourceTree = "<group>"; };
+		FC039BB620E11CC20081E9F8 /* Attribute.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Attribute.swift; sourceTree = "<group>"; };
+		FC039BB720E11CC20081E9F8 /* BlockDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BlockDesc.swift; sourceTree = "<group>"; };
+		FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ReluKernel.swift; sourceTree = "<group>"; };
+		FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvKernel.swift; sourceTree = "<group>"; };
+		FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BatchNormKernel.swift; sourceTree = "<group>"; };
+		FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddKernel.swift; sourceTree = "<group>"; };
+		FC1B16B220EC9A4F00678B91 /* Kernels.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Kernels.metal; sourceTree = "<group>"; };
+		FC1B186520ECF1C600678B91 /* ResizeKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ResizeKernel.swift; sourceTree = "<group>"; };
+		FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PaddleMobileUnitTest.swift; sourceTree = "<group>"; };
+		FC4CB74820F0B954007C0C6D /* ConvKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvKernel.metal; sourceTree = "<group>"; };
+		FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProgramOptimize.swift; sourceTree = "<group>"; };
+		FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture2DTo2DArrayKernel.swift; sourceTree = "<group>"; };
+		FC60DB8820E9AAA500FF203F /* MetalExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MetalExtension.swift; sourceTree = "<group>"; };
+		FC82735820E3C04200BE430A /* OpCreator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpCreator.swift; sourceTree = "<group>"; };
+		FC9D037820E229E4000F735A /* OpParam.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpParam.swift; sourceTree = "<group>"; };
+		FC9D037F20E22FBB000F735A /* FeedOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeedOp.swift; sourceTree = "<group>"; };
+		FC9D038120E2312E000F735A /* FetchOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FetchOp.swift; sourceTree = "<group>"; };
+		FC9D038320E23B01000F735A /* Texture.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture.swift; sourceTree = "<group>"; };
+		FCD04E6520F314C50007374F /* PoolOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolOp.swift; sourceTree = "<group>"; };
+		FCD04E6720F315020007374F /* PoolKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolKernel.swift; sourceTree = "<group>"; };
+		FCD04E6920F319EC0007374F /* SoftmaxOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SoftmaxOp.swift; sourceTree = "<group>"; };
+		FCD04E6B20F31A280007374F /* SoftmaxKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SoftmaxKernel.swift; sourceTree = "<group>"; };
+		FCD04E6D20F31B4B0007374F /* ReshapeOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ReshapeOp.swift; sourceTree = "<group>"; };
+		FCD04E6F20F31B720007374F /* ReshapeKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ReshapeKernel.swift; sourceTree = "<group>"; };
+		FCD04E7120F343420007374F /* ConvAddOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddOp.swift; sourceTree = "<group>"; };
+		FCD04E7320F3437E0007374F /* ConvAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddKernel.swift; sourceTree = "<group>"; };
+		FCDC0FEA21099A1D00DC9EFB /* Tools.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Tools.swift; sourceTree = "<group>"; };
+		FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = ConvAddBatchNormReluOp.swift; path = "paddle-mobile/Operators/ConvAddBatchNormReluOp.swift"; sourceTree = SOURCE_ROOT; };
+		FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddBatchNormReluKernel.swift; sourceTree = "<group>"; };
+		FCF2D73720E64E70007AC5F5 /* Kernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = Kernel.swift; path = "paddle-mobile/Operators/Kernels/Kernel.swift"; sourceTree = SOURCE_ROOT; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+		FC039B6620E11C3C0081E9F8 /* Frameworks */ = {
+			isa = PBXFrameworksBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				D3831F70E7E0B565B9AC22DA /* Pods_paddle_mobile.framework in Frameworks */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+		336CBE234BF5DE48658DE65F /* Frameworks */ = {
+			isa = PBXGroup;
+			children = (
+				DD2E06330A1E7129C918DB46 /* Pods_paddle_mobile.framework */,
+			);
+			name = Frameworks;
+			sourceTree = "<group>";
+		};
+		8EB858F9B68D372C9F1CA263 /* Pods */ = {
+			isa = PBXGroup;
+			children = (
+				CDF58151D902A1CBAE56A0C2 /* Pods-paddle-mobile.debug.xcconfig */,
+				E2A7957C92EDA5C3BEC0FFC2 /* Pods-paddle-mobile.release.xcconfig */,
+			);
+			name = Pods;
+			sourceTree = "<group>";
+		};
+		FC039B6020E11C3C0081E9F8 = {
+			isa = PBXGroup;
+			children = (
+				FC039B6C20E11C3C0081E9F8 /* paddle-mobile */,
+				FC039B6B20E11C3C0081E9F8 /* Products */,
+				8EB858F9B68D372C9F1CA263 /* Pods */,
+				336CBE234BF5DE48658DE65F /* Frameworks */,
+			);
+			sourceTree = "<group>";
+		};
+		FC039B6B20E11C3C0081E9F8 /* Products */ = {
+			isa = PBXGroup;
+			children = (
+				FC039B6A20E11C3C0081E9F8 /* paddle_mobile.framework */,
+			);
+			name = Products;
+			sourceTree = "<group>";
+		};
+		FC039B6C20E11C3C0081E9F8 /* paddle-mobile */ = {
+			isa = PBXGroup;
+			children = (
+				FC039BAE20E11CC20081E9F8 /* Program */,
+				FC039BA320E11CBC0081E9F8 /* Operators */,
+				FC039BA120E11CB70081E9F8 /* Loader.swift */,
+				FC039B9A20E11CA00081E9F8 /* Executor.swift */,
+				FC039B9C20E11CB20081E9F8 /* framework */,
+				FC039B9320E11C9A0081E9F8 /* Common */,
+				FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */,
+				FC039B6E20E11C3C0081E9F8 /* Info.plist */,
+			);
+			path = "paddle-mobile";
+			sourceTree = "<group>";
+		};
+		FC039B9320E11C9A0081E9F8 /* Common */ = {
+			isa = PBXGroup;
+			children = (
+				FC039B9420E11C9A0081E9F8 /* Extensions.swift */,
+				FC039B9520E11C9A0081E9F8 /* Errors.swift */,
+				FC039B9620E11C9A0081E9F8 /* Types.swift */,
+				FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */,
+				FC60DB8820E9AAA500FF203F /* MetalExtension.swift */,
+				FCDC0FEA21099A1D00DC9EFB /* Tools.swift */,
+			);
+			path = Common;
+			sourceTree = "<group>";
+		};
+		FC039B9C20E11CB20081E9F8 /* framework */ = {
+			isa = PBXGroup;
+			children = (
+				FC039B9D20E11CB20081E9F8 /* Tensor.swift */,
+				FC039B9E20E11CB20081E9F8 /* Dim.swift */,
+				FC9D038320E23B01000F735A /* Texture.swift */,
+			);
+			path = framework;
+			sourceTree = "<group>";
+		};
+		FC039BA320E11CBC0081E9F8 /* Operators */ = {
+			isa = PBXGroup;
+			children = (
+				FC086BA520E67E8500D85EF7 /* Kernels */,
+				FCD592FA20E248EC00252966 /* Base */,
+				FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */,
+				FC039BA420E11CBC0081E9F8 /* ConvOp.swift */,
+				FC039BA520E11CBC0081E9F8 /* ElementwiseAddOp.swift */,
+				FC039BA720E11CBC0081E9F8 /* BatchNormOp.swift */,
+				FC039BA820E11CBC0081E9F8 /* ReluOp.swift */,
+				FC9D037F20E22FBB000F735A /* FeedOp.swift */,
+				FC9D038120E2312E000F735A /* FetchOp.swift */,
+				FCD04E6520F314C50007374F /* PoolOp.swift */,
+				FCD04E6920F319EC0007374F /* SoftmaxOp.swift */,
+				FCD04E6D20F31B4B0007374F /* ReshapeOp.swift */,
+				FCD04E7120F343420007374F /* ConvAddOp.swift */,
+			);
+			path = Operators;
+			sourceTree = "<group>";
+		};
+		FC039BAE20E11CC20081E9F8 /* Program */ = {
+			isa = PBXGroup;
+			children = (
+				FC039BAF20E11CC20081E9F8 /* framework.pb.swift */,
+				FC039BB020E11CC20081E9F8 /* Scope.swift */,
+				FC039BB120E11CC20081E9F8 /* TensorDesc.swift */,
+				FC039BB220E11CC20081E9F8 /* ProgramDesc.swift */,
+				FC039BB320E11CC20081E9F8 /* VarDesc.swift */,
+				FC039BB420E11CC20081E9F8 /* Program.swift */,
+				FC039BB520E11CC20081E9F8 /* OpDesc.swift */,
+				FC039BB620E11CC20081E9F8 /* Attribute.swift */,
+				FC039BB720E11CC20081E9F8 /* BlockDesc.swift */,
+				FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */,
+			);
+			path = Program;
+			sourceTree = "<group>";
+		};
+		FC086BA520E67E8500D85EF7 /* Kernels */ = {
+			isa = PBXGroup;
+			children = (
+				FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */,
+				FCF2D73720E64E70007AC5F5 /* Kernel.swift */,
+				FC1B16B220EC9A4F00678B91 /* Kernels.metal */,
+				FC1B186520ECF1C600678B91 /* ResizeKernel.swift */,
+				FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */,
+				FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */,
+				FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */,
+				FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */,
+				FC4CB74820F0B954007C0C6D /* ConvKernel.metal */,
+				FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */,
+				FCD04E6720F315020007374F /* PoolKernel.swift */,
+				FCD04E6B20F31A280007374F /* SoftmaxKernel.swift */,
+				FCD04E6F20F31B720007374F /* ReshapeKernel.swift */,
+				FCD04E7320F3437E0007374F /* ConvAddKernel.swift */,
+			);
+			path = Kernels;
+			sourceTree = "<group>";
+		};
+		FCD592FA20E248EC00252966 /* Base */ = {
+			isa = PBXGroup;
+			children = (
+				FC9D037820E229E4000F735A /* OpParam.swift */,
+				FC039BA620E11CBC0081E9F8 /* Operator.swift */,
+				FC82735820E3C04200BE430A /* OpCreator.swift */,
+			);
+			path = Base;
+			sourceTree = "<group>";
+		};
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+		FC039B6720E11C3C0081E9F8 /* Headers */ = {
+			isa = PBXHeadersBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+		FC039B6920E11C3C0081E9F8 /* paddle-mobile */ = {
+			isa = PBXNativeTarget;
+			buildConfigurationList = FC039B7220E11C3C0081E9F8 /* Build configuration list for PBXNativeTarget "paddle-mobile" */;
+			buildPhases = (
+				AF33BB8D0770A77AC22B5EF4 /* [CP] Check Pods Manifest.lock */,
+				FC039B6520E11C3C0081E9F8 /* Sources */,
+				FC039B6620E11C3C0081E9F8 /* Frameworks */,
+				FC039B6720E11C3C0081E9F8 /* Headers */,
+				FC039B6820E11C3C0081E9F8 /* Resources */,
+			);
+			buildRules = (
+			);
+			dependencies = (
+			);
+			name = "paddle-mobile";
+			productName = "paddle-mobile";
+			productReference = FC039B6A20E11C3C0081E9F8 /* paddle_mobile.framework */;
+			productType = "com.apple.product-type.framework";
+		};
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+		FC039B6120E11C3C0081E9F8 /* Project object */ = {
+			isa = PBXProject;
+			attributes = {
+				LastUpgradeCheck = 0930;
+				ORGANIZATIONNAME = orange;
+				TargetAttributes = {
+					FC039B6920E11C3C0081E9F8 = {
+						CreatedOnToolsVersion = 9.3.1;
+					};
+				};
+			};
+			buildConfigurationList = FC039B6420E11C3C0081E9F8 /* Build configuration list for PBXProject "paddle-mobile" */;
+			compatibilityVersion = "Xcode 9.3";
+			developmentRegion = en;
+			hasScannedForEncodings = 0;
+			knownRegions = (
+				en,
+			);
+			mainGroup = FC039B6020E11C3C0081E9F8;
+			productRefGroup = FC039B6B20E11C3C0081E9F8 /* Products */;
+			projectDirPath = "";
+			projectRoot = "";
+			targets = (
+				FC039B6920E11C3C0081E9F8 /* paddle-mobile */,
+			);
+		};
+/* End PBXProject section */
+
+/* Begin PBXResourcesBuildPhase section */
+		FC039B6820E11C3C0081E9F8 /* Resources */ = {
+			isa = PBXResourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXResourcesBuildPhase section */
+
+/* Begin PBXShellScriptBuildPhase section */
+		AF33BB8D0770A77AC22B5EF4 /* [CP] Check Pods Manifest.lock */ = {
+			isa = PBXShellScriptBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+			);
+			inputPaths = (
+				"${PODS_PODFILE_DIR_PATH}/Podfile.lock",
+				"${PODS_ROOT}/Manifest.lock",
+			);
+			name = "[CP] Check Pods Manifest.lock";
+			outputPaths = (
+				"$(DERIVED_FILE_DIR)/Pods-paddle-mobile-checkManifestLockResult.txt",
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+			shellPath = /bin/sh;
+			shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n    # print error to STDERR\n    echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n    exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n";
+			showEnvVarsInLog = 0;
+		};
+/* End PBXShellScriptBuildPhase section */
+
+/* Begin PBXSourcesBuildPhase section */
+		FC039B6520E11C3C0081E9F8 /* Sources */ = {
+			isa = PBXSourcesBuildPhase;
+			buildActionMask = 2147483647;
+			files = (
+				FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */,
+				FC039B9F20E11CB20081E9F8 /* Tensor.swift in Sources */,
+				FC0E2DBC20EE45FE009C1FAC /* ConvKernel.swift in Sources */,
+				FC039BAA20E11CBC0081E9F8 /* ElementwiseAddOp.swift in Sources */,
+				FC039B9B20E11CA00081E9F8 /* Executor.swift in Sources */,
+				FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */,
+				FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */,
+				FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */,
+				FC9D037920E229E4000F735A /* OpParam.swift in Sources */,
+				FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */,
+				FC1B186620ECF1C600678B91 /* ResizeKernel.swift in Sources */,
+				FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */,
+				FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */,
+				FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */,
+				FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */,
+				FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */,
+				FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */,
+				FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */,
+				FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */,
+				FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */,
+				FC039B9920E11C9A0081E9F8 /* Types.swift in Sources */,
+				FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */,
+				FC039BA920E11CBC0081E9F8 /* ConvOp.swift in Sources */,
+				FC9D038420E23B01000F735A /* Texture.swift in Sources */,
+				FCD04E6E20F31B4B0007374F /* ReshapeOp.swift in Sources */,
+				FC039B9820E11C9A0081E9F8 /* Errors.swift in Sources */,
+				FC039BBF20E11CC20081E9F8 /* Attribute.swift in Sources */,
+				FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */,
+				FC039BB920E11CC20081E9F8 /* Scope.swift in Sources */,
+				FCD04E6620F314C50007374F /* PoolOp.swift in Sources */,
+				FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */,
+				FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */,
+				FCDC0FEB21099A1D00DC9EFB /* Tools.swift in Sources */,
+				FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */,
+				FC82735920E3C04200BE430A /* OpCreator.swift in Sources */,
+				FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */,
+				FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */,
+				FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */,
+				FC9D038220E2312E000F735A /* FetchOp.swift in Sources */,
+				FC039BBD20E11CC20081E9F8 /* Program.swift in Sources */,
+				FC039BA220E11CB70081E9F8 /* Loader.swift in Sources */,
+				FCD04E6C20F31A280007374F /* SoftmaxKernel.swift in Sources */,
+				FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */,
+				FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */,
+				FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */,
+				FCD04E6820F315020007374F /* PoolKernel.swift in Sources */,
+				FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */,
+				FC039BBE20E11CC20081E9F8 /* OpDesc.swift in Sources */,
+				FC039B9720E11C9A0081E9F8 /* Extensions.swift in Sources */,
+			);
+			runOnlyForDeploymentPostprocessing = 0;
+		};
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+		FC039B7020E11C3C0081E9F8 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				CURRENT_PROJECT_VERSION = 1;
+				DEBUG_INFORMATION_FORMAT = dwarf;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				ENABLE_TESTABILITY = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_DYNAMIC_NO_PIC = NO;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_OPTIMIZATION_LEVEL = 0;
+				GCC_PREPROCESSOR_DEFINITIONS = (
+					"DEBUG=1",
+					"$(inherited)",
+				);
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 11.3;
+				MTL_ENABLE_DEBUG_INFO = YES;
+				ONLY_ACTIVE_ARCH = YES;
+				SDKROOT = iphoneos;
+				SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
+				SWIFT_OPTIMIZATION_LEVEL = "-Onone";
+				VERSIONING_SYSTEM = "apple-generic";
+				VERSION_INFO_PREFIX = "";
+			};
+			name = Debug;
+		};
+		FC039B7120E11C3C0081E9F8 /* Release */ = {
+			isa = XCBuildConfiguration;
+			buildSettings = {
+				ALWAYS_SEARCH_USER_PATHS = NO;
+				CLANG_ANALYZER_NONNULL = YES;
+				CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
+				CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
+				CLANG_CXX_LIBRARY = "libc++";
+				CLANG_ENABLE_MODULES = YES;
+				CLANG_ENABLE_OBJC_ARC = YES;
+				CLANG_ENABLE_OBJC_WEAK = YES;
+				CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
+				CLANG_WARN_BOOL_CONVERSION = YES;
+				CLANG_WARN_COMMA = YES;
+				CLANG_WARN_CONSTANT_CONVERSION = YES;
+				CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
+				CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
+				CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
+				CLANG_WARN_EMPTY_BODY = YES;
+				CLANG_WARN_ENUM_CONVERSION = YES;
+				CLANG_WARN_INFINITE_RECURSION = YES;
+				CLANG_WARN_INT_CONVERSION = YES;
+				CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
+				CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
+				CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
+				CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
+				CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
+				CLANG_WARN_STRICT_PROTOTYPES = YES;
+				CLANG_WARN_SUSPICIOUS_MOVE = YES;
+				CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
+				CLANG_WARN_UNREACHABLE_CODE = YES;
+				CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
+				CODE_SIGN_IDENTITY = "iPhone Developer";
+				COPY_PHASE_STRIP = NO;
+				CURRENT_PROJECT_VERSION = 1;
+				DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+				ENABLE_NS_ASSERTIONS = NO;
+				ENABLE_STRICT_OBJC_MSGSEND = YES;
+				GCC_C_LANGUAGE_STANDARD = gnu11;
+				GCC_NO_COMMON_BLOCKS = YES;
+				GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+				GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
+				GCC_WARN_UNDECLARED_SELECTOR = YES;
+				GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
+				GCC_WARN_UNUSED_FUNCTION = YES;
+				GCC_WARN_UNUSED_VARIABLE = YES;
+				IPHONEOS_DEPLOYMENT_TARGET = 11.3;
+				MTL_ENABLE_DEBUG_INFO = NO;
+				SDKROOT = iphoneos;
+				SWIFT_COMPILATION_MODE = wholemodule;
+				SWIFT_OPTIMIZATION_LEVEL = "-O";
+				VALIDATE_PRODUCT = YES;
+				VERSIONING_SYSTEM = "apple-generic";
+				VERSION_INFO_PREFIX = "";
+			};
+			name = Release;
+		};
+		FC039B7320E11C3C0081E9F8 /* Debug */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = CDF58151D902A1CBAE56A0C2 /* Pods-paddle-mobile.debug.xcconfig */;
+			buildSettings = {
+				CODE_SIGN_IDENTITY = "";
+				CODE_SIGN_STYLE = Automatic;
+				DEFINES_MODULE = YES;
+				DEVELOPMENT_TEAM = A798K58VVL;
+				DYLIB_COMPATIBILITY_VERSION = 1;
+				DYLIB_CURRENT_VERSION = 1;
+				DYLIB_INSTALL_NAME_BASE = "@rpath";
+				INFOPLIST_FILE = "paddle-mobile/Info.plist";
+				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
+				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+					"@loader_path/Frameworks",
+				);
+				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
+				PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile";
+				PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
+				SKIP_INSTALL = YES;
+				SWIFT_VERSION = 4.0;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Debug;
+		};
+		FC039B7420E11C3C0081E9F8 /* Release */ = {
+			isa = XCBuildConfiguration;
+			baseConfigurationReference = E2A7957C92EDA5C3BEC0FFC2 /* Pods-paddle-mobile.release.xcconfig */;
+			buildSettings = {
+				CODE_SIGN_IDENTITY = "";
+				CODE_SIGN_STYLE = Automatic;
+				DEFINES_MODULE = YES;
+				DEVELOPMENT_TEAM = A798K58VVL;
+				DYLIB_COMPATIBILITY_VERSION = 1;
+				DYLIB_CURRENT_VERSION = 1;
+				DYLIB_INSTALL_NAME_BASE = "@rpath";
+				INFOPLIST_FILE = "paddle-mobile/Info.plist";
+				INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
+				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
+				LD_RUNPATH_SEARCH_PATHS = (
+					"$(inherited)",
+					"@executable_path/Frameworks",
+					"@loader_path/Frameworks",
+				);
+				MTL_LANGUAGE_REVISION = UseDeploymentTarget;
+				PRODUCT_BUNDLE_IDENTIFIER = "orange.paddle-mobile";
+				PRODUCT_NAME = "$(TARGET_NAME:c99extidentifier)";
+				SKIP_INSTALL = YES;
+				SWIFT_VERSION = 4.0;
+				TARGETED_DEVICE_FAMILY = "1,2";
+			};
+			name = Release;
+		};
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+		FC039B6420E11C3C0081E9F8 /* Build configuration list for PBXProject "paddle-mobile" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				FC039B7020E11C3C0081E9F8 /* Debug */,
+				FC039B7120E11C3C0081E9F8 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+		FC039B7220E11C3C0081E9F8 /* Build configuration list for PBXNativeTarget "paddle-mobile" */ = {
+			isa = XCConfigurationList;
+			buildConfigurations = (
+				FC039B7320E11C3C0081E9F8 /* Debug */,
+				FC039B7420E11C3C0081E9F8 /* Release */,
+			);
+			defaultConfigurationIsVisible = 0;
+			defaultConfigurationName = Release;
+		};
+/* End XCConfigurationList section */
+	};
+	rootObject = FC039B6120E11C3C0081E9F8 /* Project object */;
+}
diff --git a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.xcworkspace/contents.xcworkspacedata
new file mode 100644
index 0000000000000000000000000000000000000000..bb84e46b46d8c2c496c068dc15f2304785ed8e31
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.xcworkspace/contents.xcworkspacedata
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Workspace
+   version = "1.0">
+   <FileRef
+      location = "self:paddle-mobile.xcodeproj">
+   </FileRef>
+</Workspace>
diff --git a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
new file mode 100644
index 0000000000000000000000000000000000000000..18d981003d68d0546c4804ac2ff47dd97c6e7921
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>IDEDidComputeMac32BitWarning</key>
+	<true/>
+</dict>
+</plist>
diff --git a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.xcworkspace/xcuserdata/liuruilong.xcuserdatad/UserInterfaceState.xcuserstate b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.xcworkspace/xcuserdata/liuruilong.xcuserdatad/UserInterfaceState.xcuserstate
new file mode 100644
index 0000000000000000000000000000000000000000..6b9abef67c5d123b4fb22117ed3f4f575de52aa0
Binary files /dev/null and b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.xcworkspace/xcuserdata/liuruilong.xcuserdatad/UserInterfaceState.xcuserstate differ
diff --git a/metal/paddle-mobile/paddle-mobile.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/paddle-mobile.xcscheme b/metal/paddle-mobile/paddle-mobile.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/paddle-mobile.xcscheme
new file mode 100644
index 0000000000000000000000000000000000000000..7c83f42ceca9f68af4f45064cb29c9e3a3512b8e
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/paddle-mobile.xcscheme
@@ -0,0 +1,80 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+   LastUpgradeVersion = "0940"
+   version = "1.3">
+   <BuildAction
+      parallelizeBuildables = "YES"
+      buildImplicitDependencies = "YES">
+      <BuildActionEntries>
+         <BuildActionEntry
+            buildForTesting = "YES"
+            buildForRunning = "YES"
+            buildForProfiling = "YES"
+            buildForArchiving = "YES"
+            buildForAnalyzing = "YES">
+            <BuildableReference
+               BuildableIdentifier = "primary"
+               BlueprintIdentifier = "FC039B6920E11C3C0081E9F8"
+               BuildableName = "paddle_mobile.framework"
+               BlueprintName = "paddle-mobile"
+               ReferencedContainer = "container:paddle-mobile.xcodeproj">
+            </BuildableReference>
+         </BuildActionEntry>
+      </BuildActionEntries>
+   </BuildAction>
+   <TestAction
+      buildConfiguration = "Debug"
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+      shouldUseLaunchSchemeArgsEnv = "YES">
+      <Testables>
+      </Testables>
+      <AdditionalOptions>
+      </AdditionalOptions>
+   </TestAction>
+   <LaunchAction
+      buildConfiguration = "Debug"
+      selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
+      selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
+      launchStyle = "0"
+      useCustomWorkingDirectory = "NO"
+      ignoresPersistentStateOnLaunch = "NO"
+      debugDocumentVersioning = "YES"
+      debugServiceExtension = "internal"
+      allowLocationSimulation = "YES">
+      <MacroExpansion>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "FC039B6920E11C3C0081E9F8"
+            BuildableName = "paddle_mobile.framework"
+            BlueprintName = "paddle-mobile"
+            ReferencedContainer = "container:paddle-mobile.xcodeproj">
+         </BuildableReference>
+      </MacroExpansion>
+      <AdditionalOptions>
+      </AdditionalOptions>
+   </LaunchAction>
+   <ProfileAction
+      buildConfiguration = "Release"
+      shouldUseLaunchSchemeArgsEnv = "YES"
+      savedToolIdentifier = ""
+      useCustomWorkingDirectory = "NO"
+      debugDocumentVersioning = "YES">
+      <MacroExpansion>
+         <BuildableReference
+            BuildableIdentifier = "primary"
+            BlueprintIdentifier = "FC039B6920E11C3C0081E9F8"
+            BuildableName = "paddle_mobile.framework"
+            BlueprintName = "paddle-mobile"
+            ReferencedContainer = "container:paddle-mobile.xcodeproj">
+         </BuildableReference>
+      </MacroExpansion>
+   </ProfileAction>
+   <AnalyzeAction
+      buildConfiguration = "Debug">
+   </AnalyzeAction>
+   <ArchiveAction
+      buildConfiguration = "Release"
+      revealArchiveInOrganizer = "YES">
+   </ArchiveAction>
+</Scheme>
diff --git a/metal/paddle-mobile/paddle-mobile.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist b/metal/paddle-mobile/paddle-mobile.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist
new file mode 100644
index 0000000000000000000000000000000000000000..067e2a5bea9382a8f2ffebfd809d2c8217631975
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile.xcodeproj/xcuserdata/liuruilong.xcuserdatad/xcschemes/xcschememanagement.plist
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>SchemeUserState</key>
+	<dict>
+		<key>paddle-mobile.xcscheme</key>
+		<dict>
+			<key>orderHint</key>
+			<integer>0</integer>
+		</dict>
+	</dict>
+	<key>SuppressBuildableAutocreation</key>
+	<dict>
+		<key>FC039B6920E11C3C0081E9F8</key>
+		<dict>
+			<key>primary</key>
+			<true/>
+		</dict>
+	</dict>
+</dict>
+</plist>
diff --git a/metal/paddle-mobile/paddle-mobile/Common/Errors.swift b/metal/paddle-mobile/paddle-mobile/Common/Errors.swift
new file mode 100644
index 0000000000000000000000000000000000000000..decb9509a613710232de9a006e5289662fe2cae5
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Common/Errors.swift
@@ -0,0 +1,24 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+public enum PaddleMobileError: Error{
+    case loaderError(message: String)
+    case netError(message: String)
+    case memoryError(message: String)
+    case paramError(message: String)
+    case opError(message: String)
+    case predictError(message: String)
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Common/Extensions.swift b/metal/paddle-mobile/paddle-mobile/Common/Extensions.swift
new file mode 100644
index 0000000000000000000000000000000000000000..62954ede17d493ae12aa104d13a75dbc062e98a0
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Common/Extensions.swift
@@ -0,0 +1,112 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+// 自定义 ?!  如果 ?! 前的返回值为一个可选值, 则进行隐式解包, 如果有值则返回这个值, 如果为nil 则fatalError 传入的信息
+precedencegroup ExecutedOrFatalError{
+    associativity: left
+    higherThan: AssignmentPrecedence
+}
+infix operator ?!: ExecutedOrFatalError
+public func ?!<T>(option: T?, excuteOrError: @autoclosure () -> String) -> T{
+    if let inOpt = option {
+        return inOpt
+    }else{
+        print(excuteOrError())
+        fatalError(excuteOrError())
+    }
+}
+
+//Lense
+struct Lense<A, B> {
+    let from: (A) -> B
+    let to: (B, A) -> A
+}
+
+precedencegroup CombineLense{
+    associativity: left
+    higherThan: AssignmentPrecedence
+}
+
+infix operator >>>: CombineLense
+func >>><A, B, C>(left: Lense<B, C>, right: Lense<A, B>) -> Lense<A, C> {
+    return Lense<A, C>.init(from: { (a) -> C in
+        left.from(right.from(a))
+    }, to: { (c, a) -> A in
+        right.to( left.to(c, right.from(a)),a)
+    })
+}
+
+protocol CIntIndex {
+    associatedtype T;
+    subscript(index: CInt) -> T { get set};
+}
+
+extension Array: CIntIndex{
+    typealias T = Element
+    subscript(index: CInt) -> T {
+        get{
+            guard Int64(Int.max) >= Int64(index) else{
+                fatalError("cint index out of Int range")
+            }
+            return self[Int(index)]
+        }
+        set{
+            guard Int64(Int.max) >= Int64(index) else{
+                fatalError("cint index out of Int range")
+            }
+            self[Int(index)] = newValue
+        }
+        
+    }
+}
+
+extension Array where Element: AnyObject{
+    mutating func remove(element: Element) {
+        if let index = index(where: { (node) -> Bool in
+            return unsafeBitCast(element, to: Int.self) == unsafeBitCast(node, to: Int.self)
+        }) {
+            remove(at: index)
+        }
+    }
+    
+}
+
+//MARK: Array extension
+extension Array where Element: Comparable{
+    
+    /// 返回数组前 r 个元素, 并将元素处于原数组的位置作为元组的第一个元素返回
+    ///
+    /// - Parameter r: 前 r 个元素
+    /// - Returns: [(原有位置, 排好位置的元素)]
+    public func top(r: Int) -> [(Int, Element)] {
+        precondition(r <= self.count)
+        return Array<(Int, Element)>(zip(0..<self.count, self).sorted{ $0.1 > $1.1 }.prefix(through: r - 1))
+    }
+}
+
+extension String{
+    func cStr() -> UnsafePointer<Int8>? {
+        return (self as NSString).utf8String
+    }
+}
+
+func address<T: AnyObject>(o: T) -> String {
+    return String.init(format: "%018p", unsafeBitCast(o, to: Int.self))
+}
+
+
+
+
diff --git a/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift
new file mode 100644
index 0000000000000000000000000000000000000000..b750018260f64ae89f5b3aab5cc987eee9a11415
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Common/MetalExtension.swift
@@ -0,0 +1,280 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+fileprivate var defaultMetalLibrary: MTLLibrary?
+fileprivate var paddleMobileMetalLibrary: MTLLibrary?
+
+extension MTLDevice {
+    func defaultLibrary() -> MTLLibrary {
+        if defaultMetalLibrary == nil {
+            defaultMetalLibrary = makeDefaultLibrary()
+        }
+        if let inDefaultLib = defaultMetalLibrary {
+            return inDefaultLib
+        } else {
+            fatalError(" default metal libary is nil")
+        }
+    }
+    
+    func paddleMobileLibrary() -> MTLLibrary {
+        if paddleMobileMetalLibrary == nil {
+            guard let path = Bundle.init(for: Kernel.self).path(forResource: "default", ofType: "metallib") else {
+                fatalError("Counld't find paddle mobile library")
+            }
+            do {
+                paddleMobileMetalLibrary = try makeLibrary(filepath: path)
+            } catch _ {
+                fatalError("Counld't load paddle mobile library")
+            }
+        }
+        
+        if let inPaddleMobileLib = paddleMobileMetalLibrary {
+            return inPaddleMobileLib
+        } else {
+            fatalError("PaddleMobile metal libary is nil")
+        }
+    }
+    
+    func pipeLine(funcName: String, inPaddleMobileLib: Bool = true) -> MTLComputePipelineState {
+        let useLib = inPaddleMobileLib ? paddleMobileLibrary() : defaultLibrary()
+        guard let function = useLib.makeFunction(name: funcName) else {
+            fatalError(" function " + funcName + " not found")
+        }
+        do {
+            let pipLine = try makeComputePipelineState(function: function)
+            return pipLine
+        } catch _ {
+            fatalError("make pip line error occured")
+        }
+        
+    }
+    
+    func makeBuffer<P>(value: [P]) -> MTLBuffer {
+        let buffer = makeBuffer(length: value.count * MemoryLayout<P>.size, options: MTLResourceOptions.storageModeShared)
+        let contents = buffer?.contents().bindMemory(to: P.self, capacity: value.count * MemoryLayout<P>.size)
+        for i in 0..<value.count {
+            contents?[i] = value[i]
+        }
+        return buffer!
+    }
+    
+    func makeFloatTexture<P>(value: [P], textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture{
+        
+        let textureDesc = MTLTextureDescriptor.init()
+        textureDesc.width = textureWidth
+        textureDesc.height = textureHeight
+        textureDesc.depth = 1
+        textureDesc.usage = [.shaderRead, .shaderWrite]
+        textureDesc.pixelFormat = .rgba32Float
+        textureDesc.textureType = .type2DArray
+        textureDesc.storageMode = .shared
+        textureDesc.cpuCacheMode = .defaultCache
+        textureDesc.arrayLength = arrayLength
+        let texture = makeTexture(descriptor: textureDesc)!
+        
+        if arrayLength == 1 && value.count >= 4{
+            let pointer: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: value.count * MemoryLayout<P>.size)
+            for i in 0..<value.count {
+                pointer[i] = value[i]
+            }
+            
+            let bytesPerRow = texture.width * texture.depth * 4 * MemoryLayout<P>.size
+            let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: texture.width, height: texture.height, depth: texture.depth))
+            texture.replace(region: region, mipmapLevel: 0, withBytes: pointer, bytesPerRow: bytesPerRow)
+        } else {
+            
+            
+            
+        }
+        
+        return texture
+    }
+}
+
+extension MTLComputeCommandEncoder {
+    func dispatch(computePipline: MTLComputePipelineState, outTexture: MTLTexture) {
+        let slices = (outTexture.arrayLength * 4 + 3)/4
+        
+        let width = computePipline.threadExecutionWidth
+        let height = computePipline.maxTotalThreadsPerThreadgroup/width
+        let threadsPerGroup = MTLSize.init(width: width, height: height, depth: 1)
+        
+//        print(" thread: threads per group: \(threadsPerGroup) ")
+//        print(" thread: out texture width: \(outTexture.width) , out texture height: \(outTexture.height)")
+        
+        let groupWidth = (outTexture.width + width - 1)/width
+        let groupHeight = (outTexture.height + height - 1)/height
+        let groupDepth = slices
+        let groups = MTLSize.init(width: groupWidth, height: groupHeight, depth: groupDepth)
+        
+//        print("groups: \(groups) ")
+//        print("threads per group: \(threadsPerGroup)")
+        
+        setComputePipelineState(computePipline)
+        
+        dispatchThreadgroups(groups, threadsPerThreadgroup: threadsPerGroup)
+    }
+}
+
+
+public extension MTLTexture {
+    
+    func stridableFloatArray<P>(stridable: Bool = true) -> [(index: Int, value: P)] {
+        var arr: [P] = floatArray { (p: P) -> P in
+            return p;
+        }
+        var result:  [(index: Int, value: P)] = []
+        if arr.count > 100 && stridable {
+            for j in stride(from: 0, to: arr.count , by: arr.count / 100){
+                result.append((j, arr[j]))
+            }
+        } else {
+            for j in 0..<arr.count {
+                result.append((j, arr[j]))
+            }
+        }
+        return result
+    }
+    
+    func floatArray<P, T>(res: (P) -> T) -> [T] {
+        var fArr: [T] = []
+        if textureType == .type2DArray {
+            for i in 0..<arrayLength{
+                let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<P>.size, alignment: MemoryLayout<P>.alignment)
+                let bytesPerRow = width * depth * 4 * MemoryLayout<P>.size
+                let bytesPerImage = width * height * depth * 4 * MemoryLayout<P>.size
+                let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
+                getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i)
+                let p = bytes.assumingMemoryBound(to: P.self)
+               
+                for j in 0..<width * height * depth * 4 {
+                    fArr.append(res(p[j]))
+                }
+                bytes.deallocate()
+            }
+        } else if textureType == .type2D {
+            let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<P>.size, alignment: MemoryLayout<P>.alignment)
+            let bytesPerRow = width * depth * 4 * MemoryLayout<P>.size
+            let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
+            getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)
+            let p = bytes.assumingMemoryBound(to: P.self)
+
+            for j in 0..<width * height * 4 {
+                fArr.append(res(p[j]))
+            }
+            bytes.deallocate()
+        }
+        return fArr
+    }
+    
+    func logDesc<T>(header: String = "", stridable: Bool = true) -> T? {
+        print(header)
+        print("texture: \(self)")
+        let res: [(index: Int, value: T)] = stridableFloatArray(stridable: stridable)
+        print(res)
+  
+//        if textureType == .type2DArray {
+//            for i in 0..<arrayLength{
+//                var str: String = "slice: \(i): \n"
+//                let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
+//                let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
+//                let bytesPerImage = width * height * depth * 4 * MemoryLayout<T>.size
+//                let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
+//                getBytes(bytes, bytesPerRow: bytesPerRow, bytesPerImage: bytesPerImage, from: region, mipmapLevel: 0, slice: i)
+//                let p = bytes.assumingMemoryBound(to: T.self)
+//                str += "2d array count : \(width * height * depth * 4) \n"
+//                if stridable && width * height * depth * 4 > 100 {
+//                    for j in stride(from: 0, to: width * height * depth * 4 , by: width * height * depth * 4 / 100){
+//                        str += " index \(j): \(p[j])"
+//                    }
+//                } else {
+//                    for j in 0..<width * height * depth * 4 {
+//                        str += " index \(j): \(p[j])"
+//                    }
+//                }
+//
+//                bytes.deallocate()
+//                print(str)
+//            }
+//        } else if textureType == .type2D {
+//            var str: String = "texture 2D: "
+//            let bytes = UnsafeMutableRawPointer.allocate(byteCount: width * height * 4 * MemoryLayout<T>.size, alignment: MemoryLayout<T>.alignment)
+//            let bytesPerRow = width * depth * 4 * MemoryLayout<T>.size
+//            let region = MTLRegion.init(origin: MTLOrigin.init(x: 0, y: 0, z: 0), size: MTLSize.init(width: width, height: height, depth: depth))
+//            getBytes(bytes, bytesPerRow: bytesPerRow, from: region, mipmapLevel: 0)
+//            let p = bytes.assumingMemoryBound(to: T.self)
+//            str += "2d count : \(width * width * 4) \n"
+//
+//            if stridable {
+//                for j in stride(from: 0, to: width * height * 4, by: width * height * 4 / 100){
+//                    str += "index \(j): \(p[j]) "
+//                }
+//            } else {
+//                for j in 0..<width * height * 4 {
+//                    str += "index \(j): \(p[j]) "
+//                }
+//            }
+//
+//            print(str)
+//            bytes.deallocate()
+//        }
+        return nil
+           
+    }
+}
+
+
+public extension MTLBuffer {
+    func logDesc<T>(header: String = "", stridable: Bool = true) -> T? {
+        print(header)
+        print("MTLBuffer: \(self) ")
+        var str = ""
+        if stridable && length/MemoryLayout<T>.stride > 1000{
+            for j in stride(from: 0, to: length, by: length/MemoryLayout<T>.stride / 100){
+                str += " \(contents().assumingMemoryBound(to: T.self)[j])"
+            }
+        } else {
+            for i in 0..<length/MemoryLayout<T>.size {
+                str += " \(contents().assumingMemoryBound(to: T.self)[i])"
+            }
+        }
+        print(str)
+        return nil
+    }
+    
+    func makeTexture(textureWidth: Int, textureHeight: Int, arrayLength: Int) -> MTLTexture {
+        let textureDesc = MTLTextureDescriptor.init()
+        textureDesc.width = textureWidth
+        textureDesc.height = textureHeight
+        textureDesc.depth = 1
+        textureDesc.usage = [.shaderRead, .shaderWrite]
+        textureDesc.pixelFormat = .rgba32Float
+        textureDesc.textureType = .type2DArray
+        textureDesc.storageMode = .shared
+        textureDesc.cpuCacheMode = .defaultCache
+        textureDesc.arrayLength = arrayLength
+        let texture = makeTexture(descriptor: textureDesc, offset: 0, bytesPerRow: textureWidth * 4 * 4)!
+        return texture
+    }
+    
+    
+
+}
+
+
+
+
+
diff --git a/metal/paddle-mobile/paddle-mobile/Common/PaddleMobileUnitTest.swift b/metal/paddle-mobile/paddle-mobile/Common/PaddleMobileUnitTest.swift
new file mode 100644
index 0000000000000000000000000000000000000000..a2927c4693c35fd8181d891cc33fa27c2c4cf0b9
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Common/PaddleMobileUnitTest.swift
@@ -0,0 +1,149 @@
+//
+//  TestConvAddBatchNormRelu.swift
+//  paddle-mobile-demo
+//
+//  Created by liuRuiLong on 2018/7/25.
+//  Copyright © 2018年 orange. All rights reserved.
+//
+
+import Metal
+import Foundation
+
+public class PaddleMobileUnitTest {
+    let device: MTLDevice
+    let queue: MTLCommandQueue
+    public init(inDevice: MTLDevice, inQueue: MTLCommandQueue) {
+        device = inDevice
+        queue = inQueue
+    }
+    
+    public func testConvAddBnRelu() {
+        let buffer = queue.makeCommandBuffer() ?! " buffer is nil "
+        
+        let input: [Float32] = [
+         1.0, 2.0, 3.0, 4.0,
+         1.0, 2.0, 3.0, 4.0,
+         1.0, 2.0, 3.0, 4.0,
+         
+         1.0, 2.0, 3.0, 4.0,
+         1.0, 2.0, 3.0, 4.0,
+         1.0, 2.0, 3.0, 4.0,
+         
+         1.0, 2.0, 3.0, 4.0,
+         1.0, 2.0, 3.0, 4.0,
+         1.0, 2.0, 3.0, 4.0,
+        ]
+        
+        let filter: [Float32] = [
+        //1.0
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        //2.0
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        //3.0
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        //4.0
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        1.0, 1.0, 1.0, 1.0,
+        ]
+        
+        let biase: [Float32] = [1.0, 1.0, 1.0, 100.0]
+        let newScalue: [Float32] = [1.0, 1.0, 1.0, 1.0]
+        let newBiase: [Float32] = [1.0, 1.0, 1.0, 1.0]
+        
+        let inputeTexture = device.makeFloatTexture(value: input, textureWidth: 3, textureHeight: 3, arrayLength: 1)
+        
+        //filter
+        let filterBuffer = device.makeBuffer(value: filter)
+        
+        // biase
+        let biaseBuffer = device.makeBuffer(value: biase)
+        
+        // new scale
+        let newScalueBuffer = device.makeBuffer(value: newScalue)
+        
+        // new biase
+        let newBiaseBuffer = device.makeBuffer(value: newBiase)
+        
+        //output
+        let outputTexture = device.makeFloatTexture(value: [Float32](), textureWidth: 2, textureHeight: 2, arrayLength: 1)
+        
+        let filterSize: (width: Int, height: Int, channel: Int) = (3, 3, 4)
+        let paddings: (Int, Int) = (1, 1)
+        let stride: (Int, Int) = (2, 2)
+        
+        let offsetX = filterSize.width/2 - paddings.0
+        let offsetY = filterSize.height/2 - paddings.1
+        
+        let metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: 0, strideX: UInt16(stride.0), strideY: UInt16(stride.1), paddedZ: UInt16(paddings.0))
+        
+        let param = ConvAddBatchNormReluTestParam.init(inInputTexture: inputeTexture, inOutputTexture: outputTexture, inMetalParam: metalParam, inFilterBuffer: filterBuffer, inBiaseBuffer: biaseBuffer, inNewScaleBuffer: newScalueBuffer, inNewBiaseBuffer: newBiaseBuffer, inFilterSize: filterSize)
+        
+        
+        
+        let convAddBnReluKernel = ConvAddBatchNormReluKernel<Float32>.init(device: device, testParam: param)
+        
+        convAddBnReluKernel.test(commandBuffer: buffer, param: param)
+        
+        buffer.addCompletedHandler { (buffer) in
+            let _: Float32? = inputeTexture.logDesc(header: "input texture", stridable: false)
+            let _: Float32? = outputTexture.logDesc(header: "output texture", stridable: false)
+        }
+        
+        buffer.commit()
+        
+        
+//        let inputTexture = device.makeFloatTexture(value: <#T##[P]#>, textureWidth: <#T##Int#>, textureHeight: <#T##Int#>, arrayLength: <#T##Int#>)
+        
+        
+//        let param = ConvAddBatchNormReluTestParam.init(inInputTexture: <#T##MTLTexture#>, inOutputTexture: <#T##MTLTexture#>, inMetalParam: <#T##MetalConvParam#>, inFilterBuffer: <#T##MTLBuffer#>, inBiaseBuffer: <#T##MTLBuffer#>, inNewScaleBuffer: <#T##MTLBuffer#>, inNewBiaseBuffer: <#T##MTLBuffer#>, inFilterSize: <#T##(width: Int, height: Int, channel: Int)#>)
+        
+//        ConvAddBatchNormReluKernel.init(device: <#T##MTLDevice#>, testParam: <#T##ConvAddBatchNormReluTestParam#>)
+        
+        
+    }
+}
+
+
+
diff --git a/metal/paddle-mobile/paddle-mobile/Common/Tools.swift b/metal/paddle-mobile/paddle-mobile/Common/Tools.swift
new file mode 100644
index 0000000000000000000000000000000000000000..930198fbf9c2cbfd917ddcb9ecb1fe02767c21f9
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Common/Tools.swift
@@ -0,0 +1,21 @@
+//
+//  Tools.swift
+//  paddle-mobile
+//
+//  Created by liuRuiLong on 2018/7/26.
+//  Copyright © 2018年 orange. All rights reserved.
+//
+
+import Foundation
+
+func writeToLibrary<P: PrecisionType>(fileName: String, array: [P]) {
+    let libraryPath = NSSearchPathForDirectoriesInDomains(.libraryDirectory, .userDomainMask, true).last ?! " library path get error "
+    let filePath = libraryPath + "/" + fileName
+    let fileManager = FileManager.init()
+    fileManager.createFile(atPath: filePath, contents: nil, attributes: nil)
+    let fileHandler = FileHandle.init(forWritingAtPath: filePath) ?! " file handler nil "
+    let data = Data.init(buffer: UnsafeBufferPointer.init(start: array, count: array.count))
+    fileHandler.write(data)
+    fileHandler.closeFile()
+}
+
diff --git a/metal/paddle-mobile/paddle-mobile/Common/Types.swift b/metal/paddle-mobile/paddle-mobile/Common/Types.swift
new file mode 100644
index 0000000000000000000000000000000000000000..98353617f5090f1eeac0c644c17548555638a6ca
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Common/Types.swift
@@ -0,0 +1,106 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+public protocol SummableMultipliable: Equatable {
+    static func +(lhs: Self, rhs: Self) -> Self
+    static func *(lhs: Self, rhs: Self) -> Self
+    static func -(lhs: Self, rhs: Self) -> Self
+}
+public protocol PrecisionType: SummableMultipliable{
+    init(inFloat: Float32)
+    init(inFloat16: Float16)
+    init<P: PrecisionType>(_ inP: P)
+    static var bitSize: UInt { get }
+}
+
+public typealias Float16 = Int16
+extension Float16: PrecisionType {
+    public static func * (prefix: Float16, postfix: Float16) {
+        return prefix * postfix
+    }
+    
+    public init<P>(_ inP: P) where P : PrecisionType {
+        if P.bitSize == Float32.bitSize {
+            self = Float16(inFloat: inP as! Float32)
+        } else if P.bitSize == Float16.bitSize {
+            self = inP as! Float16
+        } else {
+            fatalError()
+        }
+    }
+    
+    public static var bitSize: UInt {
+        return 16
+    }
+    
+    public init(inFloat16: Float16) {
+        self = inFloat16
+    }
+    public init(inFloat: Float32) {
+        self = Int16(inFloat)
+    }
+    
+    
+    
+}
+
+extension Float32: PrecisionType {
+    public init<P>(_ inP: P) where P : PrecisionType {
+        if P.bitSize == Float32.bitSize {
+            self = inP as! Float32
+        } else if P.bitSize == Float16.bitSize {
+            self = Float32.init(inP as! Float16)
+        } else {
+            fatalError()
+        }
+    }
+    
+    public init(inFloat: Float32) {
+        self = inFloat
+    }
+    
+    public init(inFloat16: Float16) {
+        self = Float32.init(inFloat16)
+    }
+    
+    public static var bitSize: UInt {
+        return 32
+    }
+}
+
+public enum DataLayout {
+    case NCHW
+    case NHWC
+}
+
+protocol Variant: CustomStringConvertible, CustomDebugStringConvertible {
+}
+
+extension Tensor: Variant {
+}
+
+extension Texture: Variant {
+}
+
+extension ResultHolder: Variant {
+}
+
+extension InputTexture: Variant {
+}
+
+extension MTLTexture where Self: Variant {
+    
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Executor.swift b/metal/paddle-mobile/paddle-mobile/Executor.swift
new file mode 100644
index 0000000000000000000000000000000000000000..0dcb3151e21cc0f3968a07da39366d4ba5fd5813
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Executor.swift
@@ -0,0 +1,153 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+public class ResultHolder<P: PrecisionType> {
+    public let dim: [Int]
+    public let resultArr: [P]
+    public let elapsedTime: Double
+    public init(inDim: [Int], inResult: [P], inElapsedTime: Double) {
+        dim = inDim
+        resultArr = inResult
+        elapsedTime = inElapsedTime
+    }
+}
+
+extension ResultHolder: CustomDebugStringConvertible, CustomStringConvertible {
+    public var debugDescription: String {
+        var str = ""
+        str += "Dim: \(dim) \n value:[ "
+        if resultArr.count < 20 {
+            for d in resultArr {
+                str += " \(d) "
+            }
+        } else {
+            for d in stride(from: 0, to: resultArr.count, by: resultArr.count/20) {
+                str += " \(resultArr[d]) "
+            }
+        }
+        str += " ]"
+        return str
+    }
+    
+    public var description: String {
+        return debugDescription
+    }
+}
+
+public class Executor<P: PrecisionType> {
+    var ops: [Runable & InferShaperable] = []
+    let program: Program
+    let device: MTLDevice
+    let queue: MTLCommandQueue
+    public init(inDevice:MTLDevice, inQueue: MTLCommandQueue, inProgram: Program) throws {
+        program = inProgram
+        device = inDevice
+        queue = inQueue
+        for block in inProgram.programDesc.blocks {
+            //block.ops.count
+            for i in 0..<block.ops.count {
+                let op = block.ops[i]
+                do {
+                    let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
+                    op.inferShape()
+                    ops.append(op)
+                } catch let error {
+                    throw error
+                }
+            }
+            
+//            for op in block.ops {
+//                do {
+//                    let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: op, scope: inProgram.scope)
+//                    op.inferShape()
+//                    ops.append(op)
+//                } catch let error {
+//                    throw error
+//                }
+//            }
+        }
+    }
+    
+    public func predict(input: MTLTexture, expect: [Int], completionHandle: @escaping (ResultHolder<P>) -> Void, preProcessKernle: CusomKernel? = nil) throws {
+        guard let buffer = queue.makeCommandBuffer() else {
+            throw PaddleMobileError.predictError(message: "CommandBuffer is nil")
+        }
+        let resInput: MTLTexture
+        if let inPre = preProcessKernle {
+            do {
+                try inPre.compute(inputTexuture: input, commandBuffer: buffer)
+                resInput = inPre.outputTexture
+            } catch let error {
+                throw error
+            }
+        } else {
+            resInput = input
+        }
+        
+        let beforeDate = Date.init()
+        let inputTexture = InputTexture.init(inMTLTexture: resInput, inExpectDim: Dim.init(inDim: expect))
+        program.scope.setInput(input: inputTexture)
+ 
+        for op in ops {
+            do {
+                try op.run(device: device, buffer: buffer)
+            } catch let error {
+                throw error
+            }
+        }
+        
+        buffer.addCompletedHandler { (commandbuffer) in
+//            let inputArr = resInput.floatArray(res: { (p:P) -> P in
+//                return p
+//            })
+//            print(inputArr)
+            
+//            let stridableInput: [(index: Int, value: Float)] = input.stridableFloatArray()
+//            print(stridableInput)
+            
+//            let _: Flo? = input.logDesc(header: "input: ", stridable: true)
+//            for op in self.ops {
+//                op.delogOutput()
+//            }
+//            return
+            
+//            self.ops[2].delogOutput()
+            
+            
+            let afterDate = Date.init()
+            
+            guard let outputVar = self.program.scope.output() else {
+                fatalError("output nil")
+            }
+
+            guard let output = outputVar as? Texture<P> else {
+                fatalError("output var type error")
+            }
+            let resultHodlder = ResultHolder<P>.init(inDim: output.dim.dims, inResult: output.metalTexture.floatArray(res: { (p:P) -> P in
+                return p
+            }), inElapsedTime: afterDate.timeIntervalSince(beforeDate))
+            completionHandle(resultHodlder)
+        }
+        buffer.commit()
+    }
+    
+    public func clear() {
+        program.scope.clear()
+    }
+    
+}
+
+//public let paddle_executor: Executor = Executor.init()
diff --git a/metal/paddle-mobile/paddle-mobile/Info.plist b/metal/paddle-mobile/paddle-mobile/Info.plist
new file mode 100644
index 0000000000000000000000000000000000000000..1007fd9dd7d0af3071eced72a45c88fea7665976
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Info.plist
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+	<key>CFBundleDevelopmentRegion</key>
+	<string>$(DEVELOPMENT_LANGUAGE)</string>
+	<key>CFBundleExecutable</key>
+	<string>$(EXECUTABLE_NAME)</string>
+	<key>CFBundleIdentifier</key>
+	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
+	<key>CFBundleInfoDictionaryVersion</key>
+	<string>6.0</string>
+	<key>CFBundleName</key>
+	<string>$(PRODUCT_NAME)</string>
+	<key>CFBundlePackageType</key>
+	<string>FMWK</string>
+	<key>CFBundleShortVersionString</key>
+	<string>1.0</string>
+	<key>CFBundleVersion</key>
+	<string>$(CURRENT_PROJECT_VERSION)</string>
+	<key>NSPrincipalClass</key>
+	<string></string>
+</dict>
+</plist>
diff --git a/metal/paddle-mobile/paddle-mobile/Loader.swift b/metal/paddle-mobile/paddle-mobile/Loader.swift
new file mode 100644
index 0000000000000000000000000000000000000000..c68b68e1caffcadc2adb2b4ddf245c89b2c5a223
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Loader.swift
@@ -0,0 +1,187 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+import SwiftProtobuf
+
+public class Loader<P: PrecisionType> {
+    class ParaLoader {
+        let file: UnsafeMutablePointer<FILE>
+        let fileSize: Int
+        var nowIndex: Int
+        init(paramPath: String) throws {
+            guard let tmpFile = fopen(paramPath, "rb") else {
+                throw PaddleMobileError.loaderError(message: "open param file error" + paramPath)
+            }
+            file = tmpFile
+            fseek(file, 0, SEEK_END)
+            fileSize = ftell(file)
+            guard fileSize > 0 else {
+                throw PaddleMobileError.loaderError(message: "param file size is too small")
+            }
+            rewind(file)
+            nowIndex = 0
+        }
+        
+        func read(tensor: Tensor<P>) throws {
+            guard nowIndex <= fileSize else {
+                throw PaddleMobileError.loaderError(message: "out of the file range")
+            }
+            
+            func pointerReader<T>(type: T.Type) -> T {
+                let ptr = UnsafeMutablePointer<T>.allocate(capacity: MemoryLayout<T>.size)
+                fread(ptr, 1, MemoryLayout<T>.size, file)
+                nowIndex += MemoryLayout<T>.size
+                let pointee = ptr.pointee
+                ptr.deinitialize(count: MemoryLayout<UInt32>.size)
+                ptr.deallocate()
+                return pointee
+            }
+            
+            let _ = pointerReader(type: UInt32.self)
+            let lodLevel = pointerReader(type: UInt64.self)
+            for _ in 0..<lodLevel {
+                let size = pointerReader(type: UInt64.self)
+                for _ in 0..<Int(size/UInt64(MemoryLayout<size_t>.size)){
+                    _ = pointerReader(type: size_t.self)
+                }
+            }
+            
+            let _ = pointerReader(type: UInt32.self)
+            
+            let tensorDescSize = pointerReader(type: Int32.self)
+            
+            fseek(file, Int(tensorDescSize), SEEK_CUR)
+            nowIndex += Int(tensorDescSize)
+            
+            /*
+             这里没有根据 Data Type 去判断, 而是从外部泛型直接指定了精度
+             */
+            
+            //现在模型传入模型为  Float 类型, 这块应该根据模型来
+//            let tmpCapacity = MemoryLayout<Float>.size * tensor.numel()
+//            let tmpPointer = UnsafeMutablePointer<Float>.allocate(capacity: tmpCapacity);
+            let bytesRead = fread(tensor.data.pointer, 1, tensor.data.size, file)
+            
+            guard bytesRead == tensor.data.size else {
+                throw PaddleMobileError.loaderError(message: "param read size error")
+            }
+            
+            // TODO: use script to convert
+//            let bytesRead = fread(tmpPointer, 1, tmpCapacity, file)
+//            for i in 0..<tensor.numel() {
+//                tensor.data[i] = P.init(inFloat: tmpPointer[i])
+//            }
+//            tmpPointer.deinitialize(count: tmpCapacity)
+//            tmpPointer.deallocate()
+            
+            nowIndex += bytesRead
+        }
+        
+        deinit {
+            fclose(file)
+        }
+    }
+    public init(){}
+    public func load(device: MTLDevice, modelPath: String, paraPath: String) throws -> Program{
+        guard let modelData = try? Data.init(contentsOf: URL.init(fileURLWithPath: modelPath)) else {
+            throw PaddleMobileError.loaderError(message: "load " + modelPath + " failed !")
+        }
+        
+        do {
+            let protoProgram = try PaddleMobile_Framework_Proto_ProgramDesc.init(
+                serializedData: modelData)
+            
+            let originProgramDesc = ProgramDesc.init(protoProgram: protoProgram)
+            let programDesc = ProgramOptimize<P>.init().optimize(originProgramDesc: originProgramDesc)
+            print(programDesc)
+
+            guard let paraLoader = try? ParaLoader.init(paramPath: paraPath) else {
+                throw PaddleMobileError.loaderError(message: "load para error")
+            }
+            
+            guard programDesc.blocks.count > 0 else {
+                throw PaddleMobileError.loaderError(message: "count of blocks must greater than 0")
+            }
+            
+            // to get feed key and fetch key
+            let block = programDesc.blocks[0]
+            guard let firstOp = block.ops.first, let lastOp = block.ops.last else {
+                throw PaddleMobileError.loaderError(message: "at least two operator")
+            }
+            guard firstOp.type == gFeedType, lastOp.type == gFetchType else {
+                throw PaddleMobileError.loaderError(message: "the first op is not feed or the last op is not fetch")
+            }
+            
+            guard let inputKey = opInfos[gFeedType]?.inputs.first, let outKey = opInfos[gFetchType]?.outputs.first else {
+                throw PaddleMobileError.loaderError(message: "the feed input key or fetch output key not found")
+            }
+            guard let feedKey = firstOp.inputs[inputKey]?.first, let fetchKey = lastOp.outputs[outKey]?.first else {
+                throw PaddleMobileError.loaderError(message: "feed key or fetch key not found")
+            }
+            
+            let scope = Scope.init(inFeedKey: feedKey, inFetchKey: fetchKey)
+            
+            // to load memory
+            for block in programDesc.blocks {
+                for varDesc in block.vars {
+                    if (varDesc.type == .LodTensor) {
+                        guard let tensorDesc = varDesc.tensorDesc else {
+                            throw PaddleMobileError.loaderError(message: "get tensor desc failed")
+                        }
+                        
+//                        guard (try? tensorDesc.dataType.dataTypeSize()) == MemoryLayout<P>.size else {
+//                            throw PaddleMobileError.memoryError(message: "PrecisionType not support")
+//                        }
+                        
+                        if (varDesc.persistable
+                            && varDesc.type != .FeedMiniBatch
+                            && varDesc.type != .FetchList) {
+                            let dimArr = tensorDesc.dims
+                            
+                            guard dimArr.count > 0 else {
+                                throw PaddleMobileError.loaderError(message: "tensor desc dim size error")
+                            }
+                            
+                            let dim = Dim.init(inDim: dimArr)
+                            let tensor = Tensor<P>.init(inDim: dim, inLayout: tensorDesc.dataLayout)
+                            do {
+                                try paraLoader.read(tensor: tensor)
+                            } catch let error {
+                                throw error
+                            }
+                            tensor.convert(to: .NHWC)
+//                            tensor.initBuffer(device: device)
+                            scope[varDesc.name] = tensor
+                        } else {
+                            let dim = Dim.init(inDim: tensorDesc.NHWCDim)
+                            scope[varDesc.name] = Texture<P>.init(device: device, inDim: dim)
+                        }
+                    } else {
+                        if varDesc.name == fetchKey {
+                            scope[varDesc.name] = ResultHolder<P>.init(inDim: [], inResult: [], inElapsedTime: 0.0)
+                        } else if varDesc.name == feedKey {
+                        }
+                    }
+                }
+            }
+            
+            let program = Program.init(inProgramDesc: programDesc, inParamPath: paraPath, inScope: scope)
+            
+            return program
+        } catch _ {
+            throw PaddleMobileError.loaderError(message: "protobuf decoder error")
+        }
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Base/OpCreator.swift b/metal/paddle-mobile/paddle-mobile/Operators/Base/OpCreator.swift
new file mode 100644
index 0000000000000000000000000000000000000000..0ba02af1c51ba218982cc116e2cf8500cfa14db0
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Base/OpCreator.swift
@@ -0,0 +1,56 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+fileprivate var singletons : [String : Any] = [:]
+class OpCreator<P: PrecisionType> {
+    static var shared : OpCreator<P> {
+        let key = String(describing: P.self)
+        if let singleton = singletons[key] {
+            return singleton as! OpCreator<P>
+        } else {
+            let newSingleton = OpCreator<P>()
+            singletons[key] = newSingleton
+            return newSingleton
+        }
+    }
+    
+    func creat(device: MTLDevice, opDesc: OpDesc, scope: Scope) throws -> Runable & InferShaperable {
+        guard let opCreator = opCreators[opDesc.type] else {
+            throw PaddleMobileError.opError(message: "there is no " + opDesc.type + " yet")
+        }
+        
+        do {
+            return try opCreator(device, opDesc, scope)
+        } catch let error {
+            throw error
+        }
+    }
+    
+    let opCreators: [String : (MTLDevice, OpDesc, Scope) throws -> Runable & InferShaperable] =
+        [gConvType                  :     ConvOp<P>.creat,
+         gBatchNormType             :     BatchNormOp<P>.creat,
+         gReluType                  :     ReluOp<P>.creat,
+         gElementwiseAdd            :     ElementwiseAddOp<P>.creat,
+         gFeedType                  :     FeedOp<P>.creat,
+         gFetchType                 :     FetchOp<P>.creat,
+         gConvAddBatchNormReluType  :     ConvAddBatchNormReluOp<P>.creat,
+         gPooType                   :     PoolOp<P>.creat,
+         gSoftmaxType               :     SoftmaxOp<P>.creat,
+         gReshapeType               :     ReshapeOp<P>.creat,
+         gConvAddType               :     ConvAddOp<P>.creat]
+    
+    private init(){}
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Base/OpParam.swift b/metal/paddle-mobile/paddle-mobile/Operators/Base/OpParam.swift
new file mode 100644
index 0000000000000000000000000000000000000000..43f095d7008ad14ac71d610728e19ac6f6817800
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Base/OpParam.swift
@@ -0,0 +1,168 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+/*
+ let opInputsOutputsKey  = [gConvType         : (inputs: ["Input"], outputs: ["Output"]),
+ gBatchNormType    : (inputs: ["X"], outputs: ["Y"]),
+ gReluType         : (inputs: ["X"], outputs: ["Out"]),
+ gElementwiseAdd   : (inputs: ["X", "Y"], outputs: ["Out"])]
+ */
+
+protocol OpParam {
+    associatedtype OutputType: Variant
+    var output: OutputType { get set }
+    func outputDesc() -> String
+    
+    associatedtype ParamPrecisionType: PrecisionType
+    init(opDesc: OpDesc, inScope: Scope) throws
+    static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType
+    static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func inputMean<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func inputScale<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func inputVariance<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func inputFilter<VarType: Variant>(paraInputs: [String : [String]], from: Scope) throws -> VarType
+    static func input<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func output<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
+    static func outputY<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
+    static func inputY<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
+    static func outputOut<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType
+    static func getAttr<T>(key: String, attrs: [String : Attr]) throws -> T
+}
+
+extension OpParam {
+    func outputDesc() -> String {
+        return output.debugDescription
+    }
+    
+    static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType {
+        guard let mapKeys = map[key], mapKeys.count > 0 else {
+            throw PaddleMobileError.paramError(message: key + " not found in \(map) or maped values is empty")
+        }
+        guard let variant = from[mapKeys[0]], let v = variant as? VarType else {
+            throw PaddleMobileError.paramError(message: mapKeys[0] + " not found in scope")
+        }
+        return v
+    }
+    
+    static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorX: VarType = try getFirstTensor(key: "X", map: inputs, from: from)
+            
+            return tensorX
+        } catch let error {
+            throw error
+        }
+    }
+    
+    static func input<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorInput: VarType = try getFirstTensor(key: "Input", map: inputs, from: from)
+            return tensorInput
+        } catch let error {
+            throw error
+        }
+    }
+    
+    static func output<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorOutput: VarType = try getFirstTensor(key: "Output", map: outputs, from: from)
+            return tensorOutput
+        } catch let error {
+            throw error
+        }
+    }
+    static func outputY<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorOutputY: VarType = try getFirstTensor(key: "Y", map: outputs, from: from)
+            return tensorOutputY
+        } catch let error {
+            throw error
+        }
+    }
+    static func inputY<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorY: VarType = try getFirstTensor(key: "Y", map: inputs, from: from)
+            return tensorY
+        } catch let error {
+            throw error
+        }
+    }
+    
+    static func outputOut<VarType: Variant>(outputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let out: VarType = try getFirstTensor(key: "Out", map: outputs, from: from)
+            return out
+        } catch let error {
+            throw error
+        }
+    }
+    static func inputFilter<VarType: Variant>(paraInputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorFilter: VarType = try getFirstTensor(key: "Filter", map: paraInputs, from: from)
+            return tensorFilter
+        } catch let error {
+            throw error
+        }
+    }
+    
+    static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorBias: VarType = try getFirstTensor(key: "Bias", map: inputs, from: from)
+            return tensorBias
+        } catch let error {
+            throw error
+        }
+    }
+    
+    static func inputMean<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorMean: VarType = try getFirstTensor(key: "Mean", map: inputs, from: from)
+            return tensorMean
+        } catch let error {
+            throw error
+        }
+    }
+    
+    static func inputScale<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorScale: VarType = try getFirstTensor(key: "Scale", map: inputs, from: from)
+            return tensorScale
+        } catch let error {
+            throw error
+        }
+    }
+    
+    static func inputVariance<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType {
+        do {
+            let tensorVariance: VarType = try getFirstTensor(key: "Variance", map: inputs, from: from)
+            return tensorVariance
+        } catch let error {
+            throw error
+        }
+    }
+    
+    static func getAttr<T>(key: String, attrs: [String : Attr]) throws -> T{
+        guard let attr = attrs[key] else {
+            throw PaddleMobileError.paramError(message: "attr \(key) can't found in: \(attrs)" )
+        }
+        
+        guard let tAttr = attr as? T else {
+            throw PaddleMobileError.paramError(message: "key: \(key) attr: \(attr) type error" )
+        }
+        return tAttr
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Base/Operator.swift b/metal/paddle-mobile/paddle-mobile/Operators/Base/Operator.swift
new file mode 100644
index 0000000000000000000000000000000000000000..bc95f84d8ae98cb8e4e7151f0cf69a574699dc80
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Base/Operator.swift
@@ -0,0 +1,139 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Metal
+import Foundation
+
+protocol Fusion {
+    static func fusionNode() -> Node
+    static func change() -> [String : [(from: String, to: String)]]
+    static func fusionType() -> String
+}
+
+protocol Runable {
+    func run(device: MTLDevice, buffer: MTLCommandBuffer) throws
+    func runImpl(device: MTLDevice,buffer: MTLCommandBuffer) throws
+    func delogOutput()
+}
+
+extension Runable where Self: OperatorProtocol{
+    func run(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try runImpl(device: device, buffer: buffer)
+        } catch let error {
+            throw error
+        }
+//        print(type + ": " + para.outputDesc())
+    }
+    
+    func delogOutput() {
+        print(type + ": has no implementation" )
+    }
+}
+
+protocol Creator where Self: OperatorProtocol{
+    associatedtype OpType: OperatorProtocol & Runable & InferShaperable
+    static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType
+}
+
+extension Creator where Self: OperatorProtocol {
+    static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType {
+        do {
+            return try OpType.provide(device:device, opDesc: opDesc, inScope: inScope)
+        } catch let error {
+            throw error
+        }
+    }
+}
+
+protocol InferShaperable {
+    func inferShape()
+}
+
+protocol OperatorProtocol {
+    associatedtype ParamType
+    associatedtype KerType:  Computable where Self.KerType.ParamType == ParamType
+    var type: String { get }
+    var scope: Scope { get }
+    var inputs: [String : [String]] { get }
+    var paraInputs: [String : [String]] { get set }
+    var outpus: [String : [String]] { get }
+    var attrs: [String : Attr] { get }
+    var para: ParamType { get }
+    var kernel: KerType { get }
+    init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws
+}
+
+extension OperatorProtocol {
+    static func provide(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> Self {
+        do {
+            return try Self.init(device: device, opDesc: opDesc, inScope: inScope)
+        } catch let error {
+            throw error
+        }
+    }
+}
+
+class Operator <KernelType:  Computable , ParameterType>: OperatorProtocol where KernelType.ParamType == ParameterType {
+    typealias ParamType = ParameterType
+    typealias KerType = KernelType
+    let type: String
+    let inputs: [String : [String]]
+    var paraInputs: [String : [String]]
+    let outpus: [String : [String]]
+    let attrs: [String : Attr]
+    let para: ParamType
+    let scope: Scope
+    var kernel: KerType
+    required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
+        type = opDesc.type
+        scope = inScope
+        inputs = opDesc.inputs
+        outpus = opDesc.outputs
+        attrs =  opDesc.attrs
+        paraInputs = opDesc.paraInputs
+        do {
+            para = try ParamType.init(opDesc:opDesc, inScope: inScope)
+        } catch let error {
+            throw error
+        }
+        kernel = KernelType.init(device: device, param: para)
+    }
+}
+
+// op infos
+let gFetchType                  = "fetch"
+let gFeedType                   = "feed"
+let gConvType                   = "conv2d"
+let gBatchNormType              = "batch_norm"
+let gReluType                   = "relu"
+let gElementwiseAdd             = "elementwise_add"
+let gConvAddBatchNormReluType   = "conv_add_batchnorm_relu"
+let gPooType                    = "pool2d"
+let gSoftmaxType                = "softmax"
+let gReshapeType                = "reshape"
+let gConvAddType                = "conv_add"
+
+
+let opInfos = [gConvType                    : (inputs: ["Input"], outputs: ["Output"]),
+               gBatchNormType               : (inputs: ["X"], outputs: ["Y"]),
+               gReluType                    : (inputs: ["X"], outputs: ["Out"]),
+               gElementwiseAdd              : (inputs: ["X"], outputs: ["Out"]),
+               gFeedType                    : (inputs: ["X"], outputs: ["Out"]),
+               gFetchType                   : (inputs: ["X"], outputs: ["Out"]),
+               gConvAddBatchNormReluType    : (inputs: ["Input"], outputs: ["Out"]),
+               gPooType                     : (inputs: ["X"], outputs: ["Out"]),
+               gSoftmaxType                 : (inputs: ["X"], outputs: ["Out"]),
+               gReshapeType                 : (inputs: ["X"], outputs: ["Out"]),
+               gConvAddType                 : (inputs: ["Input"], outputs: ["Out"])]
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift
new file mode 100644
index 0000000000000000000000000000000000000000..3761dad60f0f8b20e3f95168445317a3e627ada9
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/BatchNormOp.swift
@@ -0,0 +1,62 @@
+///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License. */
+
+import Foundation
+
+class BatchNormParam<P: PrecisionType>: OpParam {
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            input = try BatchNormParam.inputX(inputs: opDesc.inputs, from: inScope)
+            output = try BatchNormParam.outputY(outputs: opDesc.outputs, from: inScope)
+            inputBias = try BatchNormParam.inputBiase(inputs: opDesc.paraInputs, from: inScope)
+            inputMean = try BatchNormParam.inputMean(inputs: opDesc.paraInputs, from: inScope)
+            inputScale = try BatchNormParam.inputScale(inputs: opDesc.paraInputs, from: inScope)
+            inputVariance = try BatchNormParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
+            epsilon = try BatchNormParam.getAttr(key: "epsilon", attrs: opDesc.attrs)
+            momentum = try BatchNormParam.getAttr(key: "momentum", attrs: opDesc.attrs)
+            is_test = try BatchNormParam.getAttr(key: "is_test", attrs: opDesc.attrs)
+        } catch let error {
+            throw error
+        }
+    }
+    let input: Texture<P>
+    var output: Texture<P>
+    let inputBias: Tensor<ParamPrecisionType>
+    let inputMean: Tensor<ParamPrecisionType>
+    let inputScale: Tensor<ParamPrecisionType>
+    let inputVariance: Tensor<ParamPrecisionType>
+    let epsilon: Float
+    let momentum: Float
+    let is_test: Bool
+}
+
+class BatchNormOp<P: PrecisionType>: Operator<BatchNormKernel<P>, BatchNormParam<P>>, Runable, Creator, InferShaperable{
+    func inferShape() {
+        para.output.dim = para.input.dim
+    }
+    typealias OpType = BatchNormOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+}
+
+
+
+
+
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift
new file mode 100644
index 0000000000000000000000000000000000000000..f24e25b054f02c7b8f12015697fd61e9a2005ef8
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddBatchNormReluOp.swift
@@ -0,0 +1,132 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam {
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            filter = try ConvAddBatchNormReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
+            input = try ConvAddBatchNormReluParam.input(inputs: opDesc.inputs, from: inScope)
+            output = try ConvAddBatchNormReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
+            stride = try ConvAddBatchNormReluParam.getAttr(key: "strides", attrs: opDesc.attrs)
+            paddings = try ConvAddBatchNormReluParam.getAttr(key: "paddings", attrs: opDesc.attrs)
+            dilations = try ConvAddBatchNormReluParam.getAttr(key: "dilations", attrs: opDesc.attrs)
+            epsilon = try ConvAddBatchNormReluParam.getAttr(key: "epsilon", attrs: opDesc.attrs)
+            
+            groups = try ConvAddBatchNormReluParam.getAttr(key: "groups", attrs: opDesc.attrs)
+            variance = try ConvAddBatchNormReluParam.inputVariance(inputs: opDesc.paraInputs, from: inScope)
+            bias = try ConvAddBatchNormReluParam.inputBiase(inputs: opDesc.paraInputs, from: inScope)
+            scale = try ConvAddBatchNormReluParam.inputScale(inputs: opDesc.paraInputs, from: inScope)
+            mean = try ConvAddBatchNormReluParam.inputMean(inputs: opDesc.paraInputs, from: inScope)
+            y = try ConvAddBatchNormReluParam.inputY(inputs: opDesc.paraInputs, from: inScope)
+        } catch let error {
+            throw error
+        }
+    }
+    
+    let input: Texture<P>
+    
+    let variance: Tensor<ParamPrecisionType>
+    let bias: Tensor<ParamPrecisionType>
+    let mean: Tensor<ParamPrecisionType>
+    let scale: Tensor<ParamPrecisionType>
+    let y: Tensor<ParamPrecisionType>
+    let filter: Tensor<ParamPrecisionType>
+    let epsilon: Float32
+    var newScale: MTLBuffer?
+    var newBiase: MTLBuffer?
+    
+    var output: Texture<P>
+    let stride: [Int32]
+    let paddings: [Int32]
+    let dilations: [Int32]
+    let groups: Int
+}
+
+class ConvAddBatchNormReluOp<P: PrecisionType>: Operator<ConvAddBatchNormReluKernel<P>, ConvAddBatchNormReluParam<P>>, Runable, Creator, InferShaperable, Fusion{
+    typealias OpType = ConvAddBatchNormReluOp<P>
+    
+    func inferShape() {
+        let inDims = para.input.dim
+        let filterDim = para.filter.dim
+        let strides = para.stride
+        let paddings = para.paddings
+        let dilations = para.dilations
+        
+        var outDim = [inDims[0]]
+        for i in 0..<strides.count {
+            let dilation: Int = Int(dilations[i])
+            let filterSize: Int = filterDim[i + 1]
+            let inputSize: Int = inDims[i + 1]
+            let padding: Int = Int(paddings[i])
+            let stride: Int = Int(strides[i])
+            let dKernel = dilation * (filterSize - 1) + 1
+            let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
+            outDim.append(outputSize)
+        }
+        outDim.append(filterDim[0])
+        para.output.dim = Dim.init(inDim: outDim)
+    }
+
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+    
+    static func fusionNode() -> Node {
+        let beginNode = Node.init(inType: gConvType)
+        _ = beginNode
+            --> Node.init(inType: gElementwiseAdd)
+            --> Node.init(inType: gBatchNormType)
+            --> Node.init(inType: gReluType)
+        return beginNode
+    }
+    
+    static func change() -> [String : [(from: String, to: String)]] {
+        return [:]
+    }
+    
+    static func fusionType() -> String {
+        return gConvAddBatchNormReluType
+    }
+    
+    func delogOutput() {
+        
+//        let _: P? = para.input.metalTexture.logDesc(header: "conv add batchnorm relu input: ", stridable: false)
+//        para.filter.logDataPointer(header: "filter data pointer: ")
+//        print("filter: \(para.filter)")
+        
+//        print("biase: \(para.y)")
+//        print("padding: \(para.paddings)")
+//        print("stride: \(para.stride)")
+        
+//        let _: P? = para.y.buffer?.logDesc(header: " biase: ", stridable: false)
+//        let _: P? = para.newBiase?.logDesc(header: "new biase: ", stridable: false)
+//        let _: P? = para.newScale?.logDesc(header: "new scale: ", stridable: false)
+        
+        let output = para.output.metalTexture.floatArray { (p: P) -> P in
+            return p
+        }
+//
+        writeToLibrary(fileName: "output_112x112x32_2", array: output)
+        print(" write done")
+        
+//        let _: P? = para.output.metalTexture.logDesc(header: "conv add batchnorm relu output: ", stridable: false)
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvAddOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddOp.swift
new file mode 100644
index 0000000000000000000000000000000000000000..40069f6550ea00e986926f40c5fc2a2d4bf22a83
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvAddOp.swift
@@ -0,0 +1,93 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class ConvAddParam<P: PrecisionType>: OpParam {
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            filter = try ConvAddParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
+            input = try ConvAddParam.input(inputs: opDesc.inputs, from: inScope)
+            output = try ConvAddParam.outputOut(outputs: opDesc.outputs, from: inScope)
+            stride = try ConvAddParam.getAttr(key: "strides", attrs: opDesc.attrs)
+            paddings = try ConvAddParam.getAttr(key: "paddings", attrs: opDesc.attrs)
+            dilations = try ConvAddParam.getAttr(key: "dilations", attrs: opDesc.attrs)
+            groups = try ConvAddParam.getAttr(key: "groups", attrs: opDesc.attrs)
+            y = try ConvAddParam.inputY(inputs: opDesc.paraInputs, from: inScope)
+        } catch let error {
+            throw error
+        }
+    }
+    
+    let input: Texture<P>
+    let y: Tensor<ParamPrecisionType>
+    let filter: Tensor<ParamPrecisionType>
+    
+    var output: Texture<P>
+    let stride: [Int32]
+    let paddings: [Int32]
+    let dilations: [Int32]
+    let groups: Int
+}
+
+class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>, Runable, Creator, InferShaperable, Fusion{
+    static func fusionNode() -> Node {
+        let beginNode = Node.init(inType: gConvType)
+        _ = beginNode
+            --> Node.init(inType: gElementwiseAdd)
+        return beginNode
+    }
+    
+    static func change() -> [String : [(from: String, to: String)]] {
+        return [:]
+    }
+    
+    static func fusionType() -> String {
+        return gConvAddType
+    }
+    
+    typealias OpType = ConvAddOp<P>
+    
+    func inferShape() {
+        let inDims = para.input.dim
+        let filterDim = para.filter.dim
+        let strides = para.stride
+        let paddings = para.paddings
+        let dilations = para.dilations
+        
+        var outDim = [inDims[0]]
+        for i in 0..<strides.count {
+            let dilation: Int = Int(dilations[i])
+            let filterSize: Int = filterDim[i + 1]
+            let inputSize: Int = inDims[i + 1]
+            let padding: Int = Int(paddings[i])
+            let stride: Int = Int(strides[i])
+            let dKernel = dilation * (filterSize - 1) + 1
+            let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
+            outDim.append(outputSize)
+        }
+        outDim.append(filterDim[0])
+        para.output.dim = Dim.init(inDim: outDim)
+    }
+    
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+    
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ConvOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ConvOp.swift
new file mode 100644
index 0000000000000000000000000000000000000000..29b0c4246e728dbc3d3b865a189c7063ac1bbdcf
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ConvOp.swift
@@ -0,0 +1,88 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class ConvParam<P: PrecisionType>: OpParam {
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            filter = try ConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
+            input = try ConvParam.input(inputs: opDesc.inputs, from: inScope)
+            output = try ConvParam.output(outputs: opDesc.outputs, from: inScope)
+            stride = try ConvParam.getAttr(key: "strides", attrs: opDesc.attrs)
+            paddings = try ConvParam.getAttr(key: "paddings", attrs: opDesc.attrs)
+            dilations = try ConvParam.getAttr(key: "dilations", attrs: opDesc.attrs)
+            groups = try ConvParam.getAttr(key: "groups", attrs: opDesc.attrs)
+            
+        } catch let error {
+            throw error
+        }
+    }
+    
+    let input: Texture<P>
+    let filter: Tensor<ParamPrecisionType>
+    var output: Texture<P>
+    let stride: [Int32]
+    let paddings: [Int32]
+    let dilations: [Int32]
+    let groups: Int
+}
+
+class ConvOp<P: PrecisionType>: Operator<ConvKernel<P>, ConvParam<P>>, Runable, Creator, InferShaperable {
+    required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            try super.init(device: device, opDesc: opDesc, inScope: inScope)
+        } catch let error {
+            throw error
+        }
+        
+    }
+    func inferShape() {
+        let inDims = para.input.dim
+        let filterDim = para.filter.dim
+        let strides = para.stride
+        let paddings = para.paddings
+        let dilations = para.dilations
+        
+        var outDim = [inDims[0]]
+        for i in 0..<strides.count {
+            let dilation: Int = Int(dilations[i])
+            let filterSize: Int = filterDim[i + 1]
+            let inputSize: Int = inDims[i + 1]
+            let padding: Int = Int(paddings[i])
+            let stride: Int = Int(strides[i])
+            let dKernel = dilation * (filterSize - 1) + 1
+            let outputSize = (inputSize + 2 * padding - dKernel) / stride + 1
+            outDim.append(outputSize)
+        }
+        outDim.append(filterDim[0])
+        para.output.dim = Dim.init(inDim: outDim)
+    }
+    
+    typealias OpType = ConvOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+    
+    func delogOutput() {
+        print("conv output : ")
+        print(para.output.metalTexture)
+//        let _: Float16? = para.output.metalTexture.logDesc()
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddOp.swift
new file mode 100644
index 0000000000000000000000000000000000000000..5ed36f86d79ffd639dc2ba76da74d24a532b1bd1
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ElementwiseAddOp.swift
@@ -0,0 +1,51 @@
+///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License. */
+
+import Foundation
+
+class ElementwiseAddParam<P: PrecisionType>: OpParam {
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            input = try ElementwiseAddParam.inputX(inputs: opDesc.inputs, from: inScope)
+            inputY = try ElementwiseAddParam.inputY(inputs: opDesc.paraInputs, from: inScope)
+            
+            output = try ElementwiseAddParam.outputOut(outputs: opDesc.outputs, from: inScope)
+            axis = try ElementwiseAddParam.getAttr(key: "axis", attrs: opDesc.attrs)
+        } catch let error {
+            throw error
+        }
+    }
+    let input: Texture<P>
+    let inputY: Tensor<P>
+    var output: Texture<P>
+    let axis: Int
+}
+
+class ElementwiseAddOp<P: PrecisionType>: Operator<ElementwiseAddKernel<P>, ElementwiseAddParam<P>>, Runable, Creator, InferShaperable{
+    
+    func inferShape() {
+        para.output.dim = para.input.dim
+    }
+    
+    typealias OpType = ElementwiseAddOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+    }
+}
+
+
+
+
+
+
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift
new file mode 100644
index 0000000000000000000000000000000000000000..c81d9e786c91408d2412b30eaec089904df75751
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/FeedOp.swift
@@ -0,0 +1,68 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class FeedParam<P: PrecisionType>: OpParam{
+    var output: Texture<P>
+    var input: InputTexture {
+        return scope.input() as! InputTexture
+    }
+    let scope: Scope
+    
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        scope = inScope
+        do {
+            output = try FeedParam.outputOut(outputs: opDesc.outputs, from: inScope)
+        } catch let error {
+            throw error
+        }
+    }
+    
+    typealias ParamPrecisionType = P
+}
+
+class FeedOp<P: PrecisionType>: Operator<Texture2DTo2DArrayKernel<P>, FeedParam<P>>, Runable, Creator, InferShaperable {
+    typealias OpType = FeedOp<P>
+    
+    func inferShape() {
+        //        print("feed  input: \(para.input.expectDim)")
+        print("feed output: \(para.output.dim)")
+        //        para.output.dim =
+        //        para.output.dim = para.input.expectDim
+    }
+    
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+        
+//        let resizeKernel = ResizeKernel<P>.init(device: device)
+//        let resizeParam = ResizeParam.init(input: para.input.mtlTexture, output: para.output.metalTexture, expectDim: para.input.expectDim)
+//        do {
+//            try resizeKernel.compute(commandBuffer: buffer, param: resizeParam)
+//        } catch let error {
+//            throw error
+//        }
+    }
+    
+    func delogOutput() {
+//        para.input.mtlTexture.logDesc()
+//        let _: P? = para.input.mtlTexture.logDesc(header: "feed input: ", stridable: true)
+//        let _: P? = para.output.metalTexture.logDesc(header: "feed output: ", stridable: false)
+    }
+}
+
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/FetchOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/FetchOp.swift
new file mode 100644
index 0000000000000000000000000000000000000000..2964b89e5ddabbbbd4f2df032efa5ef2db82ec96
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/FetchOp.swift
@@ -0,0 +1,54 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class FetchParam<P: PrecisionType>: OpParam{
+    var output: Texture<P>
+    let input: Texture<P>
+    let scope: Scope
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        scope = inScope
+        do {
+            input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope)
+            output = input
+        } catch let error {
+            throw error
+        }
+    }
+    
+    typealias ParamPrecisionType = P
+}
+
+class FetchKernel<P: PrecisionType>: Kernel, Computable {
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: FetchParam<P>) throws {
+    }
+    
+    required init(device: MTLDevice, param: FetchParam<P>) {
+        super.init(device: device, inFunctionName: "texture2d_to_2d_array")
+    }
+}
+
+class FetchOp<P: PrecisionType>: Operator< FetchKernel<P>, FetchParam<P>>, Runable, Creator, InferShaperable{
+    func inferShape() {
+        print(para.input.dim)
+    }
+    
+    typealias OpType = FetchOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        scope.setOutput(output: para.output)
+    }
+}
+
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
new file mode 100644
index 0000000000000000000000000000000000000000..bae452dec331957ceda5a6f503802352f63a6dbe
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/BatchNormKernel.swift
@@ -0,0 +1,67 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class BatchNormKernel<P: PrecisionType>: Kernel, Computable {
+    var newScale: MTLBuffer
+    var newBias: MTLBuffer
+    
+    required init(device: MTLDevice, param: BatchNormParam<P>) {
+        guard let newScale = device.makeBuffer(length: param.inputScale.buffer.length) else {
+            fatalError()
+        }
+        guard let newBias = device.makeBuffer(length: param.inputBias.buffer.length) else {
+            fatalError()
+        }
+        self.newScale = newScale
+        self.newBias = newBias
+        
+        super.init(device: device, inFunctionName: "batchnorm")
+        
+        let varianceBuffer : MTLBuffer = param.inputVariance.buffer
+        
+        var invStd: [Float32] = Array(repeating: 0, count: varianceBuffer.length)
+        let varianceContents = varianceBuffer.contents().assumingMemoryBound(to: P.self)
+        for i in 0..<(varianceBuffer.length / MemoryLayout<P>.stride) {
+            invStd[i] = 1 / (Float32(varianceContents[i]) + param.epsilon).squareRoot()
+        }
+        
+        let newScaleContents = newScale.contents().assumingMemoryBound(to: P.self)
+        let newBiasContents = newBias.contents().assumingMemoryBound(to: P.self)
+        let scale : MTLBuffer = param.inputScale.buffer
+        let scaleContents = scale.contents().assumingMemoryBound(to: P.self)
+        let bias : MTLBuffer = param.inputBias.buffer
+        let biasContents = bias.contents().assumingMemoryBound(to: P.self)
+        let meanContents = param.inputMean.buffer.contents().assumingMemoryBound(to: P.self)
+        
+        for i in 0..<(newScale.length / MemoryLayout<P>.stride) {
+            newScaleContents[i] = P(invStd[i] * Float32(scaleContents[i]))
+            newBiasContents[i] = P(Float32(biasContents[i]) - Float32(meanContents[i]) * invStd[i] * Float32(scaleContents[i]))
+        }
+    }
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: BatchNormParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encoder is nil")
+        }
+        print("BatchNorm compute")
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.setBuffer(newScale, offset: 0, index: 0)
+        encoder.setBuffer(newBias, offset: 0, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift
new file mode 100644
index 0000000000000000000000000000000000000000..0ffe90272fe36fa30d58c7c6bd1e287d49f0e92a
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddBatchNormReluKernel.swift
@@ -0,0 +1,138 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+struct ConvAddBatchNormReluTestParam: TestParam {
+    let inputTexture: MTLTexture
+    let outputTexture: MTLTexture
+    var metalParam: MetalConvParam
+    let filterBuffer: MTLBuffer
+    let biaseBuffer: MTLBuffer
+    let newScaleBuffer: MTLBuffer
+    let newBiaseBuffer: MTLBuffer
+    let filterSize: (width: Int, height: Int, channel: Int)
+    init(inInputTexture: MTLTexture, inOutputTexture: MTLTexture, inMetalParam: MetalConvParam, inFilterBuffer: MTLBuffer, inBiaseBuffer: MTLBuffer, inNewScaleBuffer: MTLBuffer, inNewBiaseBuffer: MTLBuffer, inFilterSize: (width: Int, height: Int, channel: Int)) {
+        inputTexture = inInputTexture
+        outputTexture = inOutputTexture
+        metalParam = inMetalParam
+        filterBuffer = inFilterBuffer
+        biaseBuffer = inBiaseBuffer
+        newScaleBuffer = inNewScaleBuffer
+        newBiaseBuffer = inNewBiaseBuffer
+        filterSize = inFilterSize
+    }
+}
+
+class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
+    required init(device: MTLDevice, testParam: ConvAddBatchNormReluTestParam) {
+        if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 {
+            super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1")
+        } else if testParam.filterSize.channel == 1 {
+            super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3")
+        } else {
+            super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3")
+        }
+    }
+    
+    var metalParam: MetalConvParam!
+
+    required init(device: MTLDevice, param: ConvAddBatchNormReluParam<P>) {
+        
+        if param.filter.width == 1 && param.filter.height == 1 {
+            super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1")
+        } else if param.filter.channel == 1 {
+            super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3")
+        } else {
+            super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3")
+        }
+        
+        param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
+        param.y.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
+
+        param.variance.initBuffer(device: device)
+        param.mean.initBuffer(device: device)
+        param.scale.initBuffer(device: device)
+        param.bias.initBuffer(device: device)
+        
+        let offsetX = param.filter.width/2 - Int(param.paddings[0])
+        let offsetY = param.filter.height/2 - Int(param.paddings[1])
+        
+        print("offset x: \(offsetX)")
+        print("offset y: \(offsetY)")
+        
+        let offsetZ = 0.0
+        metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]))
+        
+        var invs: [P] = []
+        let varianceContents = param.variance.buffer.contents().assumingMemoryBound(to: P.self)
+        
+        for i in 0..<param.variance.buffer.length/MemoryLayout<P>.stride {            
+            let inv = 1.0/pow(Float32.init(varianceContents[i]) + param.epsilon, 0.5)
+            invs.append(P(inv))
+        }
+        
+        let newScale: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: param.scale.buffer.length)
+        let newBiase: UnsafeMutablePointer<P> = UnsafeMutablePointer<P>.allocate(capacity: param.bias.buffer.length)
+        
+        let scaleContents = param.scale.buffer.contents().assumingMemoryBound(to: P.self)
+        let biaseContents = param.bias.buffer.contents().assumingMemoryBound(to: P.self)
+        let meanContents = param.mean.buffer.contents().assumingMemoryBound(to: P.self)
+        for i in 0..<param.scale.buffer.length/MemoryLayout<P>.stride {
+            newScale[i] = invs[i] * scaleContents[i]
+            newBiase[i] = biaseContents[i] - meanContents[i] * invs[i] * scaleContents[i]
+        }
+        param.newBiase = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)
+        param.newScale = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)
+        
+        newScale.deinitialize(count: param.scale.buffer.length)
+        newScale.deallocate()
+        
+        newBiase.deinitialize(count: param.bias.buffer.length)
+        newBiase.deallocate()
+    }
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: ConvAddBatchNormReluParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encode is nil")
+        }
+
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
+        encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
+        encoder.setBuffer(param.y.buffer, offset: 0, index: 2)
+        encoder.setBuffer(param.newScale!, offset: 0, index: 3)
+        encoder.setBuffer(param.newBiase!, offset: 0, index: 4)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
+    }
+    
+    public func test(commandBuffer: MTLCommandBuffer, param: ConvAddBatchNormReluTestParam) {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            fatalError()
+        }
+        
+        encoder.setTexture(param.inputTexture, index: 0)
+        encoder.setTexture(param.outputTexture, index: 1)
+        var inMetalParam = param.metalParam
+        encoder.setBytes(&inMetalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
+        encoder.setBuffer(param.filterBuffer, offset: 0, index: 1)
+        encoder.setBuffer(param.biaseBuffer, offset: 0, index: 2)
+        encoder.setBuffer(param.newScaleBuffer, offset: 0, index: 3)
+        encoder.setBuffer(param.newBiaseBuffer, offset: 0, index: 4)
+        encoder.dispatch(computePipline: pipline, outTexture: param.outputTexture)
+        encoder.endEncoding()
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift
new file mode 100644
index 0000000000000000000000000000000000000000..81f3aacba8dded3341237e05f9afbc1e04f70596
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvAddKernel.swift
@@ -0,0 +1,47 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class ConvAddKernel<P: PrecisionType>: Kernel, Computable {
+    var metalParam: MetalConvParam!
+    required init(device: MTLDevice, param: ConvAddParam<P>) {
+        super.init(device: device, inFunctionName: "conv_add_1x1")
+        let offsetX = param.filter.width/2 - Int(param.paddings[0])
+        let offsetY = param.filter.height/2 - Int(param.paddings[1])
+        
+        param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
+        param.y.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
+        
+        print("offset x: \(offsetX)")
+        print("offset y: \(offsetY)")
+        
+        let offsetZ = 0.0
+        metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]))
+    }
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: ConvAddParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encode is nil")
+        }
+        
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
+        encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
+        encoder.setBuffer(param.y.buffer, offset: 0, index: 2)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal
new file mode 100644
index 0000000000000000000000000000000000000000..9d0c6de35ed23b14a05a9c3e6398931556d535a0
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.metal
@@ -0,0 +1,400 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+#include <metal_stdlib>
+using namespace metal;
+
+struct MetalConvParam {
+    short offsetX;
+    short offsetY;
+    short offsetZ;
+    ushort strideX;
+    ushort strideY;
+};
+
+
+kernel void conv_add_batch_norm_relu_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<half, access::write> outTexture [[texture(1)]],
+                                         constant MetalConvParam &param [[buffer(0)]],
+                                         const device half4 *weights [[buffer(1)]],
+                                         const device half4 *biase [[buffer(2)]],
+                                         const device float4 *new_scale [[buffer(3)]],
+                                         const device float4 *new_biase [[buffer(4)]],
+                                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    
+    
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 1;
+    
+    uint input_arr_size = inTexture.get_array_size();
+    uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
+    
+    half4 output = half4(0.0);
+    
+    half4 input;
+    for (uint i = 0; i < input_arr_size; ++i) {
+        input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
+        half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
+        output.x += dot(input, weight_x);
+        
+        half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
+        output.y += dot(input, weight_y);
+        
+        half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
+        output.z += dot(input, weight_z);
+        
+        half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
+        output.w += dot(input, weight_w);
+    }
+    
+    output = half4(fmax((float4(output) + float4(biase[gid.z])) * new_scale[gid.z] + new_biase[gid.z], 0.0));
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+kernel void conv_add_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<half, access::write> outTexture [[texture(1)]],
+                                         constant MetalConvParam &param [[buffer(0)]],
+                                         const device half4 *weights [[buffer(1)]],
+                                         const device half4 *biase [[buffer(2)]],
+                                         const device float4 *new_scale [[buffer(3)]],
+                                         const device float4 *new_biase [[buffer(4)]],
+                                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 9;
+    uint input_arr_size = inTexture.get_array_size();
+    uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
+    
+    half4 output = half4(0.0);
+    
+    half4 input[9];
+    for (uint i = 0; i < input_arr_size; ++i) {
+        input[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), i);
+        input[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), i);
+        input[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), i);
+        input[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), i);
+        input[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), i);
+        input[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), i);
+        input[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), i);
+        input[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), i);
+        input[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), i);
+        for (int j = 0; j < 9; ++j) {
+            half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.x += dot(input[j], weight_x);
+            
+            half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.y += dot(input[j], weight_y);
+            
+            half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.z += dot(input[j], weight_z);
+            
+            half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.w += dot(input[j], weight_w);
+        }
+    }
+    output = half4(fmax((float4(output) + float4(biase[gid.z])) * new_scale[gid.z] + new_biase[gid.z], 0.0));
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+kernel void conv_add_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
+                         texture2d_array<half, access::write> outTexture [[texture(1)]],
+                         constant MetalConvParam &param [[buffer(0)]],
+                         const device half4 *weights [[buffer(1)]],
+                         const device half4 *biase [[buffer(2)]],
+                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 1;
+    
+    uint input_arr_size = inTexture.get_array_size();
+    uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
+    
+    half4 output = half4(0.0);
+    
+    half4 input;
+    for (uint i = 0; i < input_arr_size; ++i) {
+        input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
+        half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
+        output.x += dot(input, weight_x);
+        
+        half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
+        output.y += dot(input, weight_y);
+        
+        half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
+        output.z += dot(input, weight_z);
+        
+        half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
+        output.w += dot(input, weight_w);
+    }
+    output = output + biase[gid.z];
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+kernel void depthwise_conv_add_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
+                                                   texture2d_array<half, access::write> outTexture [[texture(1)]],
+                                                   constant MetalConvParam &param [[buffer(0)]],
+                                                   const device half *weights [[buffer(1)]],
+                                                   const device half4 *biase [[buffer(2)]],
+                                                   const device float4 *new_scale [[buffer(3)]],
+                                                   const device float4 *new_biase [[buffer(4)]],
+                                                   uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    uint output_slice = gid.z;
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 9;
+    uint weithTo = gid.z * kernelHXW * 4;
+    half4 output = half4(0.0);
+    half4 inputs[9];
+    inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), output_slice);
+    inputs[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), output_slice);
+    inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), output_slice);
+    inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), output_slice);
+    inputs[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), output_slice);
+    inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), output_slice);
+    inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), output_slice);
+    inputs[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), output_slice);
+    inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), output_slice);
+    for (int j = 0; j < 9; ++j) {
+        half4 input = inputs[j];
+        output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
+        output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
+        output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
+        output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
+    }
+    output = half4(fmax((float4(output) + float4(biase[gid.z])) * new_scale[gid.z] + new_biase[gid.z], 0.0));
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+
+/*---------------------------------------------*/
+
+
+
+kernel void conv_add_batch_norm_relu_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<float, access::write> outTexture [[texture(1)]],
+                                         constant MetalConvParam &param [[buffer(0)]],
+                                         const device float4 *weights [[buffer(1)]],
+                                         const device float4 *biase [[buffer(2)]],
+                                         const device float4 *new_scale [[buffer(3)]],
+                                         const device float4 *new_biase [[buffer(4)]],
+                                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 1;
+    
+    uint input_arr_size = inTexture.get_array_size();
+    uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
+    
+    float4 output = float4(0.0);
+    
+    float4 input;
+    for (uint i = 0; i < input_arr_size; ++i) {
+        input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
+        float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
+        output.x += dot(input, weight_x);
+        
+        float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
+        output.y += dot(input, weight_y);
+        
+        float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
+        output.z += dot(input, weight_z);
+        
+        float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
+        output.w += dot(input, weight_w);
+    }
+    output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+kernel void conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<float, access::write> outTexture [[texture(1)]],
+                                         constant MetalConvParam &param [[buffer(0)]],
+                                         const device float4 *weights [[buffer(1)]],
+                                         const device float4 *biase [[buffer(2)]],
+                                         const device float4 *new_scale [[buffer(3)]],
+                                         const device float4 *new_biase [[buffer(4)]],
+                                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 9;
+    uint input_arr_size = inTexture.get_array_size();
+    uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
+    
+    float4 output = float4(0.0);
+    
+    float4 input[9];
+    for (uint i = 0; i < input_arr_size; ++i) {
+        input[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), i);
+        input[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), i);
+        input[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), i);
+        input[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), i);
+        input[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), i);
+        input[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), i);
+        input[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), i);
+        input[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), i);
+        input[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), i);
+        for (int j = 0; j < 9; ++j) {
+            float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.x += dot(input[j], weight_x);
+            
+            float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.y += dot(input[j], weight_y);
+            
+            float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.z += dot(input[j], weight_z);
+            
+            float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
+            output.w += dot(input[j], weight_w);
+        }
+    }
+    output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<float, access::write> outTexture [[texture(1)]],
+                                         constant MetalConvParam &param [[buffer(0)]],
+                                         const device float4 *weights [[buffer(1)]],
+                                         const device float4 *biase [[buffer(2)]],
+                                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 1;
+    
+    uint input_arr_size = inTexture.get_array_size();
+    uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
+    
+    float4 output = float4(0.0);
+    
+    float4 input;
+    for (uint i = 0; i < input_arr_size; ++i) {
+        input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
+        float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size  + i];
+        output.x += dot(input, weight_x);
+        
+        float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size  + i];
+        output.y += dot(input, weight_y);
+        
+        float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size  + i];
+        output.z += dot(input, weight_z);
+        
+        float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
+        output.w += dot(input, weight_w);
+    }
+    output = output + biase[gid.z];
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
+                                         texture2d_array<float, access::write> outTexture [[texture(1)]],
+                                         constant MetalConvParam &param [[buffer(0)]],
+                                         const device float *weights [[buffer(1)]],
+                                         const device float4 *biase [[buffer(2)]],
+                                         const device float4 *new_scale [[buffer(3)]],
+                                         const device float4 *new_biase [[buffer(4)]],
+                                         uint3 gid [[thread_position_in_grid]]) {
+    
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) {
+        return;
+    }
+    uint output_slice = gid.z;
+    ushort2 stride = ushort2(param.strideX, param.strideY);
+    ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
+    constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint kernelHXW = 9;
+    uint weithTo = gid.z * kernelHXW * 4;
+    float4 output = float4(0.0);
+    float4 inputs[9];
+    inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y - 1), output_slice);
+    inputs[1] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y - 1), output_slice);
+    inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y - 1), output_slice);
+    inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y), output_slice);
+    inputs[4] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y), output_slice);
+    inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y), output_slice);
+    inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1,    posInInput.y + 1), output_slice);
+    inputs[7] = inTexture.sample(sample, float2(posInInput.x,        posInInput.y + 1), output_slice);
+    inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1,    posInInput.y + 1), output_slice);
+    for (int j = 0; j < 9; ++j) {
+        float4 input = inputs[j];
+        output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
+        output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
+        output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
+        output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
+    }
+    output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
+    outTexture.write(output, gid.xy, gid.z);
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift
new file mode 100644
index 0000000000000000000000000000000000000000..92c43fe3218aa0c3ecfabd9a8d85c8107ecad273
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ConvKernel.swift
@@ -0,0 +1,51 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+
+public struct MetalConvParam {
+    let offsetX: Int16
+    let offsetY: Int16
+    let offsetZ: Int16
+    let strideX: UInt16
+    let strideY: UInt16
+    let paddedZ: UInt16
+}
+
+class ConvKernel<P: PrecisionType>: Kernel, Computable {
+    var metalParam: MetalConvParam!
+    required init(device: MTLDevice, param: ConvParam<P>) {
+        super.init(device: device, inFunctionName: "conv_add_1x1")
+        let offsetX = param.filter.dim[2]/2 - Int(param.paddings[0])
+        let offsetY = param.filter.dim[1]/2 - Int(param.paddings[1])
+        let offsetZ = 0.0
+        param.filter.initBuffer(device: device, precision: Tensor.BufferPrecision.Float32)
+        
+        metalParam = MetalConvParam.init(offsetX: Int16(offsetX), offsetY: Int16(offsetY), offsetZ: Int16(offsetZ), strideX: UInt16(param.stride[0]), strideY: UInt16(param.stride[1]), paddedZ: UInt16(param.input.metalTexture.arrayLength * 4 - param.input.dim[3]))
+    }
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: ConvParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encode is nil")
+        }
+        
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.setBytes(&metalParam, length: MemoryLayout<MetalConvParam>.size, index: 0)
+        encoder.setBuffer(param.filter.buffer, offset: 0, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift
new file mode 100644
index 0000000000000000000000000000000000000000..361e77950841f2fa2b54884a2fbf394714f10902
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ElementwiseAddKernel.swift
@@ -0,0 +1,26 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+
+class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable {
+    required init(device: MTLDevice, param: ElementwiseAddParam<P>) {
+        super.init(device: device, inFunctionName: "elementwise_add")
+    }
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: ElementwiseAddParam<P>) throws {
+        
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernel.swift
new file mode 100644
index 0000000000000000000000000000000000000000..8f97d61e83fc71efca8a4d41705b3eb56d7dbdb3
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernel.swift
@@ -0,0 +1,86 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Metal
+import Foundation
+
+public protocol TestParam {
+}
+
+public protocol Testable {
+    associatedtype TestParamType: TestParam
+    func test(commandBuffer: MTLCommandBuffer, param: TestParamType)
+    init(device: MTLDevice, testParam: TestParamType)
+}
+
+
+protocol Computable {
+    associatedtype ParamType: OpParam
+    func compute(commandBuffer: MTLCommandBuffer, param: ParamType) throws
+    init(device: MTLDevice, param: ParamType)
+}
+
+protocol KernelProtocol {
+    var pipline: MTLComputePipelineState { get set }
+    var functionName: String { get set }
+   
+}
+
+open class Kernel {
+    let pipline: MTLComputePipelineState
+    let functionName: String
+    public init(device: MTLDevice, inFunctionName: String, usePaddleMobileLib: Bool = true) {
+        pipline = device.pipeLine(funcName: inFunctionName, inPaddleMobileLib: usePaddleMobileLib)
+        functionName = inFunctionName
+    }
+}
+
+open class CusomKernel: Kernel {
+    public struct Shape {
+        public let width: Int
+        public let height: Int
+        public let channel: Int
+        public init(inWidth: Int, inHeight: Int, inChannel: Int){
+            width = inWidth
+            height = inHeight
+            channel = inChannel
+        }
+    }
+    let outputTexture: MTLTexture
+    public init(device: MTLDevice, inFunctionName: String, outputDim: Shape, usePaddleMobileLib: Bool = false) {
+        let textureDesc = MTLTextureDescriptor.init()
+        textureDesc.textureType = .type2D
+        textureDesc.width = outputDim.width
+        textureDesc.height = outputDim.height
+        textureDesc.depth = (outputDim.channel + 3) / 4
+        textureDesc.pixelFormat = .rgba32Float
+        textureDesc.usage = [.shaderRead, .shaderWrite]
+        textureDesc.storageMode = .shared
+        outputTexture = device.makeTexture(descriptor: textureDesc) ?! " make texture error "
+
+        super.init(device: device, inFunctionName: inFunctionName, usePaddleMobileLib: usePaddleMobileLib)
+    }
+    
+    func compute(inputTexuture: MTLTexture, commandBuffer: MTLCommandBuffer) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encode is nil")
+        }
+        encoder.setTexture(inputTexuture, index: 0)
+        encoder.setTexture(outputTexture, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: outputTexture)
+        encoder.endEncoding()
+    }
+    
+}
+
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal
new file mode 100644
index 0000000000000000000000000000000000000000..92ee1184520d7b1df2577c1fc52cc3257de7be79
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Kernels.metal
@@ -0,0 +1,252 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+#include <metal_stdlib>
+using namespace metal;
+
+struct OutputDim {
+    ushort width;
+    ushort height;
+    ushort strideX;
+    ushort strideY;
+};
+
+kernel void resize(texture2d<half, access::read> inTexture [[texture(0)]],
+                   texture2d_array<half, access::write> outTexture [[texture(1)]],
+                   constant OutputDim &params [[buffer(0)]],
+                   uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    
+    constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const uint2 pos = gid.xy * uint2(params.strideX, params.strideY);
+    const half4 input = inTexture.read(pos);
+    outTexture.write(half4(input.x, input.y, input.z, input.w), gid.xy, gid.z);
+}
+
+kernel void relu(texture2d_array<half, access::sample> inTexture [[texture(0)]],
+                 texture2d_array<half, access::write> outTexture [[texture(1)]],
+                 uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const half4 input = inTexture.read(gid.xy, gid.z);
+    const float4 relu = fmax((float4)input, 0.0);
+    outTexture.write(half4(relu), gid.xy, gid.z);
+}
+
+kernel void elementwise_add(texture2d_array<half, access::read> inTexture [[texture(0)]],
+                            texture2d_array<half, access::write> outTexture [[texture(1)]],
+                            const device half4 *biasTerms [[buffer(0)]],
+                            uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
+    const half4 input = inTexture.read(gid.xy, gid.z);
+    outTexture.write(input, gid.xy, gid.z);
+}
+
+kernel void batchnorm(texture2d_array<half, access::read> inTexture [[texture(0)]],
+                      texture2d_array<half, access::write> outTexture [[texture(1)]],
+                      const device half4 * newScale [[buffer(0)]],
+                      const device half4 * newBias [[buffer(1)]],
+                      uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    const half4 input = inTexture.read(gid.xy, gid.z);
+    half4 output = input * newScale[gid.z] + newBias[gid.z];
+    outTexture.write(output, gid.xy, gid.z);
+}
+
+//kernel void texture2d_to_2d_array(texture2d<half, access::read> inTexture [[texture(0)]],
+//                               texture2d_array<half, access::write> outTexture [[texture(1)]],
+//                               uint3 gid [[thread_position_in_grid]]) {
+//    if (gid.x >= inTexture.get_width() ||
+//        gid.y >= inTexture.get_height()){
+//        return;
+//    }
+//    const half4 input = inTexture.read(gid.xy);
+//    outTexture.write(input, gid.xy, 0);
+//}
+
+kernel void texture2d_to_2d_array(texture2d<float, access::read> inTexture [[texture(0)]],
+                                  texture2d_array<float, access::write> outTexture [[texture(1)]],
+                                  uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= inTexture.get_width() ||
+        gid.y >= inTexture.get_height()){
+        return;
+    }
+    const float4 input = inTexture.read(gid.xy);
+    outTexture.write(input, gid.xy, 0);
+}
+
+
+kernel void texture2d_to_2d_array_half(texture2d<half, access::read> inTexture [[texture(0)]],
+                                  texture2d_array<half, access::write> outTexture [[texture(1)]],
+                                  uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= inTexture.get_width() ||
+        gid.y >= inTexture.get_height()){
+        return;
+    }
+    const half4 input = inTexture.read(gid.xy);
+    outTexture.write(input, gid.xy, 0);
+}
+
+struct PoolParam {
+    int ksizeX;
+    int ksizeY;
+    int strideX;
+    int strideY;
+    int paddingX;
+    int paddingY;
+    int poolType;
+};
+
+kernel void pool(texture2d_array<float, access::read> inTexture [[texture(0)]],
+                 texture2d_array<float, access::write> outTexture [[texture(1)]],
+                 constant PoolParam &pm [[buffer(0)]],
+                 uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    int xmin = gid.x * pm.strideX - pm.paddingX;
+    int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
+    xmin = max(xmin, 0);
+    int ymin = gid.y * pm.strideX - pm.paddingX;
+    int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
+    ymin = max(ymin, 0);
+    
+    float4 r = 0;
+    if (pm.poolType == 0) {
+        r = inTexture.read(uint2(xmin, ymin), gid.z);
+        for (int x = xmin; x < xmax; x++) {
+            for (int y = ymin; y < ymax; y++) {
+                r = fmax(r, inTexture.read(uint2(x, y), gid.z));
+            }
+        }
+    } else if (pm.poolType == 1) {
+        for (int x = xmin; x < xmax; x++) {
+            for (int y = ymin; y < ymax; y++) {
+                r += inTexture.read(uint2(x, y), gid.z);
+            }
+        }
+        r /= pm.ksizeX * pm.ksizeY;
+    }
+    outTexture.write(r, gid.xy, gid.z);
+}
+
+
+kernel void pool_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
+                 texture2d_array<half, access::write> outTexture [[texture(1)]],
+                 constant PoolParam &pm [[buffer(0)]],
+                 uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    int xmin = gid.x * pm.strideX - pm.paddingX;
+    int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
+    xmin = max(xmin, 0);
+    int ymin = gid.y * pm.strideX - pm.paddingX;
+    int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
+    ymin = max(ymin, 0);
+    
+    half4 r = 0;
+    if (pm.poolType == 0) {
+        r = inTexture.read(uint2(xmin, ymin), gid.z);
+        for (int x = xmin; x < xmax; x++) {
+            for (int y = ymin; y < ymax; y++) {
+                r = fmax(r, inTexture.read(uint2(x, y), gid.z));
+            }
+        }
+    } else if (pm.poolType == 1) {
+        for (int x = xmin; x < xmax; x++) {
+            for (int y = ymin; y < ymax; y++) {
+                r += inTexture.read(uint2(x, y), gid.z);
+            }
+        }
+        r /= pm.ksizeX * pm.ksizeY;
+    }
+    outTexture.write(r, gid.xy, gid.z);
+}
+
+kernel void reshape(texture2d_array<float, access::read> inTexture [[texture(0)]],
+                    texture2d_array<float, access::write> outTexture [[texture(1)]],
+                    uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    
+    float4 r = inTexture.read(uint2(0, 0), gid.z);
+    outTexture.write(r, gid.xy, gid.z);
+}
+
+kernel void reshape_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
+                    texture2d_array<half, access::write> outTexture [[texture(1)]],
+                    uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    
+    half4 r = inTexture.read(uint2(0, 0), gid.z);
+    outTexture.write(r, gid.xy, gid.z);
+}
+
+kernel void softmax(texture2d_array<float, access::read> inTexture [[texture(0)]],
+                    texture2d_array<float, access::write> outTexture [[texture(1)]],
+                    uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    int zsize = inTexture.get_array_size();
+    float maxv = inTexture.read(uint2(0, 0), 0)[0];
+    for (int z = 0; z < zsize; z++) {
+        float4 r = inTexture.read(uint2(0, 0), z);
+        maxv = max(maxv, max(max(r[0], r[1]), max(r[2], r[3])));
+    }
+    float sum = 0;
+    for (int z = 0; z < zsize; z++) {
+        float4 r = inTexture.read(uint2(0, 0), z);
+        sum += exp(r[0] - maxv) + exp(r[1] - maxv) + exp(r[2] - maxv) + exp(r[3] - maxv);
+    }
+    float4 rr = inTexture.read(gid.xy, gid.z);
+    rr = exp(rr - maxv) / sum;
+    outTexture.write(rr, gid.xy, gid.z);
+}
+
+
+kernel void softmax_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
+                    texture2d_array<half, access::write> outTexture [[texture(1)]],
+                    uint3 gid [[thread_position_in_grid]]) {
+    if (gid.x >= outTexture.get_width() ||
+        gid.y >= outTexture.get_height() ||
+        gid.z >= outTexture.get_array_size()) return;
+    int zsize = inTexture.get_array_size();
+    half maxv = inTexture.read(uint2(0, 0), 0)[0];
+    for (int z = 0; z < zsize; z++) {
+        half4 r = inTexture.read(uint2(0, 0), z);
+        maxv = max(maxv, max(max(r[0], r[1]), max(r[2], r[3])));
+    }
+    float sum = 0;
+    for (int z = 0; z < zsize; z++) {
+        half4 r = inTexture.read(uint2(0, 0), z);
+        sum += exp(r[0] - maxv) + exp(r[1] - maxv) + exp(r[2] - maxv) + exp(r[3] - maxv);
+    }
+    half4 rr = inTexture.read(gid.xy, gid.z);
+    rr = exp(rr - maxv) / sum;
+    outTexture.write(rr, gid.xy, gid.z);
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PoolKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PoolKernel.swift
new file mode 100644
index 0000000000000000000000000000000000000000..983a3acb9943f2e549b07d095c7dd4a23c1e96d9
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/PoolKernel.swift
@@ -0,0 +1,60 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+struct PoolMetalParam {
+    let ksizeX: Int32
+    let ksizeY: Int32
+    let strideX: Int32
+    let strideY: Int32
+    let paddingX: Int32
+    let paddingY: Int32
+    let poolType: Int32
+}
+
+class PoolKernel<P: PrecisionType>: Kernel, Computable{
+    func compute(commandBuffer: MTLCommandBuffer, param: PoolParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encoder is nil")
+        }
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        var poolType: Int32
+        switch param.poolType {
+        case "max":
+            poolType = 0
+        case "avg":
+            poolType = 1
+        default:
+            throw PaddleMobileError.predictError(message: " unknown pooltype " + param.poolType)
+        }
+        var pmp = PoolMetalParam.init(
+            ksizeX: param.ksize[0],
+            ksizeY: param.ksize[1],
+            strideX: param.stride[0],
+            strideY: param.stride[1],
+            paddingX: param.padding[0],
+            paddingY: param.padding[1],
+            poolType: poolType
+        )
+        encoder.setBytes(&pmp, length: MemoryLayout<PoolMetalParam>.size, index: 0)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
+    }
+    
+    required init(device: MTLDevice, param: PoolParam<P>) {
+        super.init(device: device, inFunctionName: "pool")
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReluKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReluKernel.swift
new file mode 100644
index 0000000000000000000000000000000000000000..3c669cf4d965f7842070c4d38427f6d1d7440db5
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReluKernel.swift
@@ -0,0 +1,31 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class ReluKernel<P: PrecisionType>: Kernel, Computable{
+    func compute(commandBuffer: MTLCommandBuffer, param: ReluParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encode is nil")
+        }
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
+    }
+    
+    required init(device: MTLDevice, param: ReluParam<P>) {
+        super.init(device: device, inFunctionName: "relu")
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift
new file mode 100644
index 0000000000000000000000000000000000000000..438c89e59eb7e9a2ef315997b9d8d1f3a44a5462
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ReshapeKernel.swift
@@ -0,0 +1,31 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class ReshapeKernel<P: PrecisionType>: Kernel, Computable{
+    required init(device: MTLDevice, param: ReshapeParam<P>) {
+        super.init(device: device, inFunctionName: "reshape")
+    }
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: ReshapeParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encoder is nil")
+        }
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ResizeKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ResizeKernel.swift
new file mode 100644
index 0000000000000000000000000000000000000000..d2795111ad1f43c759b95aa52ed34085a4ac147a
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/ResizeKernel.swift
@@ -0,0 +1,62 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+//
+//import Foundation
+//import MetalPerformanceShaders
+//
+//
+//struct ResizeParam: OpParam{
+//    typealias OutputType = <#type#>
+//    
+//    typealias ParamPrecisionType = <#type#>
+//    
+//    let input: MTLTexture
+//    let output: MTLTexture
+//    let expectDim: Dim
+//}
+//
+//struct OutputDim {
+//    let width: UInt16
+//    let height: UInt16
+//    let strideX: UInt16
+//    let strideY: UInt16
+//}
+//
+//class ResizeKernel<P: PrecisionType>: Kernel, Computable{
+//    var lanczos: MPSImageLanczosScale
+//    required init(device: MTLDevice, param: ResizeParam) {
+//        lanczos = MPSImageLanczosScale.init(device: device)
+//        super.init(device: device, inFunctionName: "resize")
+//    }
+//    func compute(commandBuffer: MTLCommandBuffer, param: ResizeParam) throws {
+////        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+////            throw PaddleMobileError.predictError(message: " encode is nil")
+////        }
+//        lanczos.encode(commandBuffer: commandBuffer, sourceTexture: param.input, destinationTexture: param.output)
+//        
+////        encoder.setTexture(param.input, index: 0)
+////        encoder.setTexture(param.output, index: 1)
+////        let strideX = param.input.width/param.expectDim[2]
+////        let strideY = param.input.height/param.expectDim[1]
+////        var outputDim = OutputDim.init(width: UInt16(param.expectDim[1]), height: UInt16(param.expectDim[2]), strideX: UInt16(strideX), strideY: UInt16(strideY))
+////        encoder.setBytes(&outputDim, length: MemoryLayout<OutputDim>.size, index: 0)
+////        encoder.dispatch(computePipline: pipline, outTexture: param.output)
+////        encoder.endEncoding()
+//    }
+//    
+//
+//    
+//    
+//}
+
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SoftmaxKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SoftmaxKernel.swift
new file mode 100644
index 0000000000000000000000000000000000000000..b94f0286f43ec482353ff278c6c104da77f47315
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/SoftmaxKernel.swift
@@ -0,0 +1,32 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class SoftmaxKernel<P: PrecisionType>: Kernel, Computable{
+    
+    func compute(commandBuffer: MTLCommandBuffer, param: SoftmaxParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encoder is nil")
+        }
+        encoder.setTexture(param.input.metalTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.output.metalTexture)
+        encoder.endEncoding()
+    }
+    
+    required init(device: MTLDevice, param: SoftmaxParam<P>) {
+        super.init(device: device, inFunctionName: "softmax")
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Texture2DTo2DArrayKernel.swift b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Texture2DTo2DArrayKernel.swift
new file mode 100644
index 0000000000000000000000000000000000000000..b524c3ac80fac6fa98ac6c9d4e680fee1af4e46a
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/Kernels/Texture2DTo2DArrayKernel.swift
@@ -0,0 +1,37 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+struct Texture2DTo2DArrayParam {
+    let input: MTLTexture
+    let output: MTLTexture
+    let expectDim: Dim
+}
+
+class Texture2DTo2DArrayKernel<P: PrecisionType>: Kernel, Computable{
+    func compute(commandBuffer: MTLCommandBuffer, param: FeedParam<P>) throws {
+        guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
+            throw PaddleMobileError.predictError(message: " encode is nil")
+        }
+        encoder.setTexture(param.input.mtlTexture, index: 0)
+        encoder.setTexture(param.output.metalTexture, index: 1)
+        encoder.dispatch(computePipline: pipline, outTexture: param.input.mtlTexture)
+        encoder.endEncoding()
+    }
+    
+    required init(device: MTLDevice, param: FeedParam<P>) {
+        super.init(device: device, inFunctionName: "texture2d_to_2d_array")
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift
new file mode 100644
index 0000000000000000000000000000000000000000..07676defe71ec18560df4be630cd04008cd1aad6
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/PoolOp.swift
@@ -0,0 +1,68 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class PoolParam<P: PrecisionType>: OpParam {
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            input = try PoolParam.inputX(inputs: opDesc.inputs, from: inScope)
+            output = try PoolParam.outputOut(outputs: opDesc.outputs, from: inScope)
+            poolType = try PoolParam.getAttr(key: "pooling_type", attrs: opDesc.attrs)
+            ksize = try PoolParam.getAttr(key: "ksize", attrs: opDesc.attrs)
+            stride = try PoolParam.getAttr(key: "strides", attrs: opDesc.attrs)
+            padding = try PoolParam.getAttr(key: "paddings", attrs: opDesc.attrs)
+            ceilMode = try PoolParam.getAttr(key: "ceil_mode", attrs: opDesc.attrs)
+            globalPooling = try PoolParam.getAttr(key: "global_pooling", attrs: opDesc.attrs)
+        } catch let error {
+            throw error
+        }
+//        let buffer = input.metalTexture.buffer.contents().assumingMemoryBound(to: P.self)
+    }
+    let input: Texture<P>
+    var output: Texture<P>
+    var ksize: [Int32]
+    var stride: [Int32]
+    var padding: [Int32]
+    var poolType: String
+    var ceilMode: Bool
+    var globalPooling: Bool
+}
+
+class PoolOp<P: PrecisionType>: Operator<PoolKernel<P>, PoolParam<P>>, Runable, Creator, InferShaperable{
+    
+    func inferShape() {
+        // para.output.dim = para.input.dim
+    }
+    
+    typealias OpType = PoolOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+    
+    func delogOutput() {
+        print("pool2d delog")
+        let _: P? = para.input.metalTexture.logDesc(header: "pool2d input: ", stridable: true)
+        print(para.ksize)
+        print(para.stride)
+        print(para.padding)
+        print(para.poolType)
+        let _: P? = para.output.metalTexture.logDesc(header: "pool2d output: ", stridable: true)
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
new file mode 100644
index 0000000000000000000000000000000000000000..f65e402cdd2b6356199a2104f99556cd4fdd3b6a
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ReluOp.swift
@@ -0,0 +1,48 @@
+///* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// 
+// http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License. */
+
+import Foundation
+
+class ReluParam<P: PrecisionType>: OpParam {
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            input = try ReluParam.inputX(inputs: opDesc.inputs, from: inScope)
+            output = try ReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
+        } catch let error {
+            throw error
+        }
+    }
+    let input: Texture<P>
+    var output: Texture<P>
+}
+
+class ReluOp<P: PrecisionType>: Operator<ReluKernel<P>, ReluParam<P>>, Runable, Creator, InferShaperable{
+    
+    func inferShape() {
+        para.output.dim = para.input.dim
+    }
+    
+    typealias OpType = ReluOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+}
+
+
+
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
new file mode 100644
index 0000000000000000000000000000000000000000..759ffd4b8b46673e5245f8bbc67dbcc0956666aa
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/ReshapeOp.swift
@@ -0,0 +1,50 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class ReshapeParam<P: PrecisionType>: OpParam {
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            input = try ReshapeParam.inputX(inputs: opDesc.inputs, from: inScope)
+            output = try ReshapeParam.outputOut(outputs: opDesc.outputs, from: inScope)
+        } catch let error {
+            throw error
+        }
+    }
+    let input: Texture<P>
+    var output: Texture<P>
+}
+
+class ReshapeOp<P: PrecisionType>: Operator<ReshapeKernel<P>, ReshapeParam<P>>, Runable, Creator, InferShaperable{
+    
+    func inferShape() {
+        // para.output.dim = para.input.dim
+    }
+    
+    typealias OpType = ReshapeOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+    func delogOutput() {
+        print("reshape delog")
+        let _: P? = para.input.metalTexture.logDesc(header: "reshape input: ", stridable: false)
+        let _: P? = para.output.metalTexture.logDesc(header: "reshape output: ", stridable: false)
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Operators/SoftmaxOp.swift b/metal/paddle-mobile/paddle-mobile/Operators/SoftmaxOp.swift
new file mode 100644
index 0000000000000000000000000000000000000000..d323b21cfa7729876a78702d0098c267132b4ab1
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Operators/SoftmaxOp.swift
@@ -0,0 +1,50 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class SoftmaxParam<P: PrecisionType>: OpParam {
+    typealias ParamPrecisionType = P
+    required init(opDesc: OpDesc, inScope: Scope) throws {
+        do {
+            input = try SoftmaxParam.inputX(inputs: opDesc.inputs, from: inScope)
+            output = try SoftmaxParam.outputOut(outputs: opDesc.outputs, from: inScope)
+        } catch let error {
+            throw error
+        }
+    }
+    let input: Texture<P>
+    var output: Texture<P>
+}
+
+class SoftmaxOp<P: PrecisionType>: Operator<SoftmaxKernel<P>, SoftmaxParam<P>>, Runable, Creator, InferShaperable{
+    
+    func inferShape() {
+        // para.output.dim = para.input.dim
+    }
+    
+    typealias OpType = SoftmaxOp<P>
+    func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
+        do {
+            try kernel.compute(commandBuffer: buffer, param: para)
+        } catch let error {
+            throw error
+        }
+    }
+    func delogOutput() {
+        print("softmax delog")
+        let _: P? = para.input.metalTexture.logDesc(header: "softmax input: ", stridable: false)
+        let _: P? = para.output.metalTexture.logDesc(header: "softmax output: ", stridable: false)
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Program/Attribute.swift b/metal/paddle-mobile/paddle-mobile/Program/Attribute.swift
new file mode 100644
index 0000000000000000000000000000000000000000..c26fd2132e6134dbbd05af08835229a31c231b9d
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Program/Attribute.swift
@@ -0,0 +1,61 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+protocol Attr {
+}
+
+extension Bool: Attr {
+}
+
+extension Int: Attr {
+}
+
+extension Float: Attr {
+}
+
+extension Int64: Attr {
+}
+
+extension Array: Attr {
+}
+
+extension String: Attr {
+}
+
+func attrWithProtoDesc(attrDesc: PaddleMobile_Framework_Proto_OpDesc.Attr) -> Attr {
+    switch attrDesc.type {
+    case .boolean:
+        return attrDesc.b
+    case .int:
+        return Int(attrDesc.i)
+    case .string:
+        return attrDesc.s
+    case .long:
+        return attrDesc.l
+    case .float:
+        return attrDesc.f
+    case .booleans:
+        return attrDesc.bools
+    case .floats:
+        return attrDesc.floats
+    case .ints:
+        return attrDesc.ints
+    case .strings:
+        return attrDesc.strings
+    default:
+        fatalError(" not support this attr type: \(attrDesc.type)")
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Program/BlockDesc.swift b/metal/paddle-mobile/paddle-mobile/Program/BlockDesc.swift
new file mode 100644
index 0000000000000000000000000000000000000000..8e1915a4975d5e444c2a5c0d0ee9e19d3cbe7577
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Program/BlockDesc.swift
@@ -0,0 +1,67 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+struct BlockDesc {
+    let index: Int
+    let parentIndex: Int
+    let vars: [VarDesc]
+    let ops: [OpDesc]
+    init(block: PaddleMobile_Framework_Proto_BlockDesc) {
+        index = Int(block.idx)
+        parentIndex = Int(block.parentIdx)
+        var vars: [VarDesc] = []
+        for varOfBlock in block.vars {
+            vars.append(VarDesc.init(protoVarDesc: varOfBlock))
+        }
+        vars.sort { $0.name < $1.name }
+        self.vars = vars
+        var ops: [OpDesc] = []
+        for op in block.ops {
+            ops.append(OpDesc.init(protoOpDesc: op))
+        }
+        self.ops = ops
+    }
+    
+    init(inVars: [VarDesc], inOps: [OpDesc]) {
+        vars = inVars
+        ops = inOps
+        index = 0
+        parentIndex = 0
+    }
+    
+}
+
+extension BlockDesc: CustomStringConvertible, CustomDebugStringConvertible {
+    var description: String {
+        var str = ""
+        
+        for op in ops {
+            str += op.description
+        }
+        
+        for varDesc in vars {
+            str += varDesc.description
+        }
+        
+        return str
+    }
+    
+    var debugDescription: String {
+        return description
+    }
+    
+    
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Program/OpDesc.swift b/metal/paddle-mobile/paddle-mobile/Program/OpDesc.swift
new file mode 100644
index 0000000000000000000000000000000000000000..73f81152316ad6812f705979b9c2358ee03eb3c8
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Program/OpDesc.swift
@@ -0,0 +1,81 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+struct OpDesc {
+    let inputs: [String : [String]]
+    var paraInputs: [String : [String]]
+    var outputs: [String : [String]]
+    let unusedOutputs: [String : [String]]
+    var attrs: [String : Attr] = [:]
+    var type: String
+    init(protoOpDesc: PaddleMobile_Framework_Proto_OpDesc) {
+        type = protoOpDesc.type
+        let creator = { (vars: [PaddleMobile_Framework_Proto_OpDesc.Var], canAdd: (String) -> Bool) -> [String : [String]] in
+            var map: [String : [String]] = [:]
+            for opDescVar  in vars {
+                if (canAdd(opDescVar.parameter)) {
+                    map[opDescVar.parameter] = opDescVar.arguments
+                }
+            }
+            return map
+        }
+        
+        inputs = creator(protoOpDesc.inputs) {
+            opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false
+        }
+        
+        paraInputs = creator(protoOpDesc.inputs) {
+            !(opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false)
+        }
+        
+        outputs = creator(protoOpDesc.outputs) {
+            opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false
+        }
+        
+        unusedOutputs = creator(protoOpDesc.outputs) {
+            !(opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false)
+        }
+        
+        for attr in protoOpDesc.attrs {
+            if (attr.type != .block) {
+                attrs[attr.name] = attrWithProtoDesc(attrDesc: attr)
+            }
+        }
+    }
+}
+
+extension OpDesc: CustomStringConvertible, CustomDebugStringConvertible {
+    var description: String {
+        var str = ""
+        str += "op type: \(type): \n"
+        str += "    op inputs: \n"
+        str += "        \(inputs) \n"
+        str += "    op para inputs: \n"
+        str += "        \(paraInputs) \n"
+        str += "    op para outputs: \n"
+        str += "        \(outputs) \n"
+        str += "    op attrs: \n"
+        str += "        \(attrs) \n"
+        
+        return str
+    }
+    
+    var debugDescription: String {
+        return description
+    }
+    
+    
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Program/Program.swift b/metal/paddle-mobile/paddle-mobile/Program/Program.swift
new file mode 100644
index 0000000000000000000000000000000000000000..1481677b198f802cd5f29a967513b2df2107bc47
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Program/Program.swift
@@ -0,0 +1,26 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+public struct Program {
+    let paramPath: String
+    let programDesc: ProgramDesc
+    let scope: Scope
+    init(inProgramDesc: ProgramDesc, inParamPath: String, inScope: Scope) {
+        programDesc = inProgramDesc
+        paramPath = inParamPath
+        scope = inScope
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Program/ProgramDesc.swift b/metal/paddle-mobile/paddle-mobile/Program/ProgramDesc.swift
new file mode 100644
index 0000000000000000000000000000000000000000..ef094a8a20790b4e0cf47eaea04bb7d4f7a2d046
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Program/ProgramDesc.swift
@@ -0,0 +1,44 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+public struct ProgramDesc {
+    var blocks: [BlockDesc] = []
+    init(protoProgram: PaddleMobile_Framework_Proto_ProgramDesc) {
+        for block in protoProgram.blocks {
+            self.blocks.append(BlockDesc.init(block: block))
+        }
+    }
+    
+    init() {
+    }
+}
+
+extension ProgramDesc: CustomStringConvertible, CustomDebugStringConvertible {
+    public var description: String {
+        var str: String = ""
+        for i in 0..<blocks.count {
+            str += "block - \(i): \n"
+            str += blocks[i].description
+        }
+        return str
+    }
+    
+    public var debugDescription: String {
+        return description
+    }
+    
+    
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Program/ProgramOptimize.swift b/metal/paddle-mobile/paddle-mobile/Program/ProgramOptimize.swift
new file mode 100644
index 0000000000000000000000000000000000000000..d819cdad533e444c327e95baff7bf87e902d6bff
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Program/ProgramOptimize.swift
@@ -0,0 +1,218 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+precedencegroup ChainNode {
+    associativity: left
+    higherThan: MultiplicationPrecedence
+}
+
+infix operator --> : ChainNode
+
+class Node {
+    var inputs: [Node] = []
+    var outputs: [Node] = []
+    var type: String
+    var opDesc: OpDesc?
+    init(inOpDesc: OpDesc) {
+        type = inOpDesc.type
+        opDesc = inOpDesc
+    }
+    
+    init(inType: String) {
+        type = inType
+    }
+    
+    static func -->(lNode: Node, rNode: Node) -> Node {
+        lNode.outputs.append(rNode)
+        rNode.inputs.append(lNode)
+        return rNode
+    }
+    
+    func depth(begin: UInt = 1) -> UInt {
+        var beginMax: UInt = 1
+        for output in outputs {
+            let subDepth = output.depth(begin: begin + 1)
+            beginMax = max(begin, subDepth)
+        }
+        beginMax = max(begin, beginMax)
+        return beginMax
+    }
+    
+    func to(depth: UInt) -> Node {
+        let beginNode = Node.init(inType: type)
+        to(depth: depth - 1, withNode: beginNode)
+        return beginNode
+    }
+    
+    func folderWith(fusion: Fusion.Type, removedNodes: inout [Node]) {
+        let fusionNode = fusion.fusionNode()
+        let change = fusion.change()
+        let inOutputs = outputs
+        outputs.removeAll()
+        opDesc?.outputs.removeAll()
+        for i in 0..<inOutputs.count {
+            inOutputs[i].folderWith(beginNode: self, matchNode: fusionNode.outputs[i], change: change, removedNodes: &removedNodes)
+        }
+        opDesc?.type = fusion.fusionType()
+        type = fusion.fusionType()
+    }
+    
+    private func folderWith(beginNode: Node, matchNode: Node, change: [String : [(from: String, to: String)]], removedNodes: inout [Node]) {
+        guard let inOpdesc = opDesc else {
+            fatalError()
+        }
+        
+        for attr in inOpdesc.attrs {
+            beginNode.opDesc?.attrs[attr.key] = attr.value
+//            print(beginNode.opDesc?.attrs)
+        }
+        
+        for paraInput in inOpdesc.paraInputs {
+            if let inChanges = change[type] {
+                for keyChange in inChanges {
+                    if keyChange.from == paraInput.key {
+                        beginNode.opDesc?.paraInputs[keyChange.to] = paraInput.value
+                    } else {
+                        beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value
+                    }
+                }
+            } else {
+                beginNode.opDesc?.paraInputs[paraInput.key] = paraInput.value
+            }
+        }
+        
+        if matchNode.outputs.count == 0 {
+            beginNode.outputs.append(contentsOf: outputs)
+            beginNode.opDesc?.outputs = inOpdesc.outputs
+            
+        }
+        removedNodes.append(self)
+        
+        for i in 0..<matchNode.outputs.count {
+            outputs[i].folderWith(beginNode: beginNode, matchNode: matchNode.outputs[i], change: change, removedNodes: &removedNodes)
+        }
+        
+    }
+    
+    private func to(depth: UInt, withNode: Node) {
+        if depth < 1 {
+            return
+        }
+        
+        for output in outputs {
+            let node = Node.init(inType: output.type)
+            withNode.outputs.append(node)
+            output.to(depth: depth - 1, withNode: node)
+        }
+    }
+    
+    
+}
+
+extension Node: Equatable {
+    static func == (lhs: Node, rhs: Node) -> Bool {
+        if lhs.outputs.count != rhs.outputs.count {
+            return false
+        }
+        
+        if lhs.type != rhs.type {
+            return false
+        }
+        
+        for i in 0..<lhs.outputs.count {
+            if lhs.outputs[i] != rhs.outputs[i] {
+                return false
+            }
+        }
+        return true
+    }
+    
+}
+
+class ProgramOptimize<P: PrecisionType> {
+    let fusionOps: [Fusion.Type] = [ConvAddBatchNormReluOp<P>.self, ConvAddOp<P>.self]
+    func optimize(originProgramDesc: ProgramDesc) -> ProgramDesc {
+        
+        guard originProgramDesc.blocks.count == 1 else {
+            fatalError(" not support yet")
+        }
+        
+        var mapForNodeChain: [String : Node] = [:]
+        var nodes: [Node] = []
+        var typeMapNodes: [String : [Node]] = [:]
+        let block = originProgramDesc.blocks[0]
+            for opDesc in block.ops {
+                guard let opInputKeys = opInfos[opDesc.type]?.inputs, let outputKeys = opInfos[opDesc.type]?.outputs else {
+                    fatalError()
+                }
+                
+                let node = Node.init(inOpDesc: opDesc)
+                for inputKey in opInputKeys {
+                    if let inputs = opDesc.inputs[inputKey] {
+                        for input in inputs {
+                            if let inputNode = mapForNodeChain[input] {
+                                _ = inputNode --> node
+                            }
+                        }
+                    }
+                }
+                
+                for outputKey in outputKeys {
+                    if let outputs = opDesc.outputs[outputKey] {
+                        for output in outputs {
+                            mapForNodeChain[output] = node
+                        }
+                    }
+                }
+                
+                nodes.append(node)
+                
+                if var inNodes = typeMapNodes[opDesc.type] {
+                    inNodes.append(node)
+                    typeMapNodes[opDesc.type] = inNodes
+                } else {
+                    typeMapNodes[opDesc.type] = [node]
+                }
+            }
+            
+            for fusion in fusionOps {
+                let fusionNode = fusion.fusionNode()
+                let depth = fusionNode.depth()
+                if let toMatchNodes = typeMapNodes[fusionNode.type] {
+                    for node in toMatchNodes {
+                        let toNode = node.to(depth: depth)
+                        if toNode == fusionNode {   // match
+                            var removeNodes: [Node] = []
+                            node.folderWith(fusion: fusion, removedNodes: &removeNodes)
+                            for removeNode in removeNodes {
+                                nodes.remove(element: removeNode)
+                            }
+                        }
+                    }
+                }
+            }
+        
+        var ops: [OpDesc] = []
+        for node in nodes {
+            ops.append(node.opDesc!)
+        }
+        
+        var newProgramDesc = ProgramDesc.init()
+        let newBlock = BlockDesc.init(inVars: block.vars, inOps: ops)
+        newProgramDesc.blocks.append(newBlock)
+        return newProgramDesc
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Program/Scope.swift b/metal/paddle-mobile/paddle-mobile/Program/Scope.swift
new file mode 100644
index 0000000000000000000000000000000000000000..77e32908b30ad7a843a583b47c2a11b76d19f3b9
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Program/Scope.swift
@@ -0,0 +1,55 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class Scope {
+    let feedKey: String
+    let fetchKey: String
+    func setInput(input: Variant) {
+        vars[feedKey] = input
+    }
+    
+    func setOutput(output: Variant) {
+        vars[fetchKey] = output
+    }
+    
+    func input() -> Variant? {
+        return vars[feedKey];
+    }
+    
+    func output() -> Variant? {
+        return vars[fetchKey];
+    }
+    
+    init(inFeedKey: String, inFetchKey: String) {
+        feedKey = inFeedKey
+        fetchKey = inFetchKey
+    }
+    
+    var vars: [String : Variant] = [:]
+    subscript(key: String) -> Variant?{
+        get {
+            return vars[key]
+        }
+        set {
+            vars[key] = newValue
+        }
+        
+    }
+
+    func clear(){
+        vars.removeAll()
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Program/TensorDesc.swift b/metal/paddle-mobile/paddle-mobile/Program/TensorDesc.swift
new file mode 100644
index 0000000000000000000000000000000000000000..e564821ab6a68fc96f00aeb10f3b2fba26d9600e
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Program/TensorDesc.swift
@@ -0,0 +1,60 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+struct TensorDesc {
+    let dims: [Int]
+    let dataType: VarTypeType
+    let dataLayout: DataLayout = .NCHW
+    var NCHWDim: [Int] {
+        get {
+            if dims.count != 4 {
+                return dims
+            }
+            if dataLayout == .NCHW {
+                return dims
+            } else if dataLayout == .NHWC{
+                var resultDims = dims
+                resultDims.swapAt(1, 3)
+                return resultDims
+            } else {
+                fatalError(" not support other layout")
+            }
+        }
+    }
+    
+    var NHWCDim: [Int] {
+        get {
+            if dims.count != 4 {
+                return dims
+            }
+            if dataLayout == .NHWC {
+                return dims
+            } else if dataLayout == .NCHW{
+                var resultDims = dims
+                resultDims.swapAt(1, 3)
+                return resultDims
+            } else {
+                fatalError(" not support other layout")
+            }
+        }
+    }
+    
+    init(protoTensorDesc: PaddleMobile_Framework_Proto_VarType.TensorDesc) {
+        dims = protoTensorDesc.dims.map{ Int($0) > 0 ? Int($0) : 1 }
+        dataType = VarTypeType.init(rawValue: protoTensorDesc.dataType.rawValue) ?? .ErrorType
+    }
+    
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Program/VarDesc.swift b/metal/paddle-mobile/paddle-mobile/Program/VarDesc.swift
new file mode 100644
index 0000000000000000000000000000000000000000..58411828c0c94316da089fc1e2442c87bd154594
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Program/VarDesc.swift
@@ -0,0 +1,98 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+enum VarTypeType: Int {
+    case ErrorType = -1,
+    Bool = 0,
+    Int16 = 1,
+    Int32 = 2,
+    Int64 = 3,
+    FP16 = 4,
+    FP32 = 5,
+    FP64 = 6,
+    LodTensor = 7,
+    SelectedRows = 8,
+    FeedMiniBatch = 9,
+    FetchList = 10,
+    StepScopes = 11,
+    StepLodRankTable = 12,
+    StepLodTensorArray = 13,
+    StepPlaceList = 14,
+    Reader = 15,
+    Channel = 16,
+    Raw = 17,
+    Tuple = 18
+    
+    func dataTypeSize() throws -> Int {
+        switch self {
+        case .FP16:
+            return 2
+        case .FP32:
+            return 4
+        case .FP64:
+            return 8
+        case .Int32:
+            return 4
+        case .Int64:
+            return 8
+        case .Bool:
+            return 1
+        default:
+            throw PaddleMobileError.memoryError(message: "not support \(self) type to get size ")
+        }
+    }
+}
+
+struct VarDesc {
+    let name: String
+    let persistable: Bool
+    let type: VarTypeType
+    let tensorDesc: TensorDesc?
+    init(protoVarDesc: PaddleMobile_Framework_Proto_VarDesc) {
+        type = VarTypeType.init(rawValue: protoVarDesc.type.type.rawValue) ?? .ErrorType
+        name = protoVarDesc.name
+        persistable = protoVarDesc.persistable
+        switch type {
+        case .SelectedRows:
+            tensorDesc = TensorDesc.init(protoTensorDesc: protoVarDesc.type.selectedRows)
+        case .LodTensor:
+            tensorDesc = TensorDesc.init(protoTensorDesc: protoVarDesc.type.lodTensor.tensor)
+        case .StepLodTensorArray:
+            tensorDesc = TensorDesc.init(protoTensorDesc: protoVarDesc.type.tensorArray.tensor);
+        default:
+            tensorDesc = .none
+        }
+    }
+}
+
+extension VarDesc: CustomStringConvertible, CustomDebugStringConvertible {
+    var description: String {
+        var str = ""
+        str += "var name \(name): \n"
+        if let inTensorDesc = tensorDesc {
+            str += " dim size: \(inTensorDesc.dims.count) \n"
+            str += "    dim: \(inTensorDesc.dims) \n"
+        } else {
+            str += " no dim info"
+        }
+        
+        return str
+    }
+    
+    var debugDescription: String {
+        return description
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Program/framework.pb.swift b/metal/paddle-mobile/paddle-mobile/Program/framework.pb.swift
new file mode 100644
index 0000000000000000000000000000000000000000..df4af3bcc91853e507321d46d3edfd04045f29ab
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Program/framework.pb.swift
@@ -0,0 +1,1821 @@
+// DO NOT EDIT.
+//
+// Generated by the Swift generator plugin for the protocol buffer compiler.
+// Source: framework.proto
+//
+// For information on using the generated types, please see the documenation:
+//   https://github.com/apple/swift-protobuf/
+
+// Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+//
+//Licensed under the Apache License, Version 2.0 (the "License");
+//you may not use this file except in compliance with the License.
+//You may obtain a copy of the License at
+//
+//http://www.apache.org/licenses/LICENSE-2.0
+//
+//Unless required by applicable law or agreed to in writing, software
+//distributed under the License is distributed on an "AS IS" BASIS,
+//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//See the License for the specific language governing permissions and
+//limitations under the License. 
+
+import Foundation
+import SwiftProtobuf
+
+// If the compiler emits an error on this type, it is because this file
+// was generated by a version of the `protoc` Swift plug-in that is
+// incompatible with the version of SwiftProtobuf to which you are linking.
+// Please ensure that your are building against the same version of the API
+// that was used to generate this file.
+fileprivate struct _GeneratedWithProtocGenSwiftVersion: SwiftProtobuf.ProtobufAPIVersionCheck {
+  struct _2: SwiftProtobuf.ProtobufAPIVersion_2 {}
+  typealias Version = _2
+}
+
+enum PaddleMobile_Framework_Proto_AttrType: SwiftProtobuf.Enum {
+  typealias RawValue = Int
+  case int // = 0
+  case float // = 1
+  case string // = 2
+  case ints // = 3
+  case floats // = 4
+  case strings // = 5
+  case boolean // = 6
+  case booleans // = 7
+  case block // = 8
+  case long // = 9
+
+  init() {
+    self = .int
+  }
+
+  init?(rawValue: Int) {
+    switch rawValue {
+    case 0: self = .int
+    case 1: self = .float
+    case 2: self = .string
+    case 3: self = .ints
+    case 4: self = .floats
+    case 5: self = .strings
+    case 6: self = .boolean
+    case 7: self = .booleans
+    case 8: self = .block
+    case 9: self = .long
+    default: return nil
+    }
+  }
+
+  var rawValue: Int {
+    switch self {
+    case .int: return 0
+    case .float: return 1
+    case .string: return 2
+    case .ints: return 3
+    case .floats: return 4
+    case .strings: return 5
+    case .boolean: return 6
+    case .booleans: return 7
+    case .block: return 8
+    case .long: return 9
+    }
+  }
+
+}
+
+/// OpDesc describes an instance of a C++ framework::OperatorBase
+/// derived class type.
+struct PaddleMobile_Framework_Proto_OpDesc {
+  // SwiftProtobuf.Message conformance is added in an extension below. See the
+  // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+  // methods supported on all messages.
+
+  var type: String {
+    get {return _type ?? String()}
+    set {_type = newValue}
+  }
+  /// Returns true if `type` has been explicitly set.
+  var hasType: Bool {return self._type != nil}
+  /// Clears the value of `type`. Subsequent reads from it will return its default value.
+  mutating func clearType() {self._type = nil}
+
+  var inputs: [PaddleMobile_Framework_Proto_OpDesc.Var] = []
+
+  var outputs: [PaddleMobile_Framework_Proto_OpDesc.Var] = []
+
+  var attrs: [PaddleMobile_Framework_Proto_OpDesc.Attr] = []
+
+  var isTarget: Bool {
+    get {return _isTarget ?? false}
+    set {_isTarget = newValue}
+  }
+  /// Returns true if `isTarget` has been explicitly set.
+  var hasIsTarget: Bool {return self._isTarget != nil}
+  /// Clears the value of `isTarget`. Subsequent reads from it will return its default value.
+  mutating func clearIsTarget() {self._isTarget = nil}
+
+  var unknownFields = SwiftProtobuf.UnknownStorage()
+
+  struct Attr {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    var name: String {
+      get {return _name ?? String()}
+      set {_name = newValue}
+    }
+    /// Returns true if `name` has been explicitly set.
+    var hasName: Bool {return self._name != nil}
+    /// Clears the value of `name`. Subsequent reads from it will return its default value.
+    mutating func clearName() {self._name = nil}
+
+    var type: PaddleMobile_Framework_Proto_AttrType {
+      get {return _type ?? .int}
+      set {_type = newValue}
+    }
+    /// Returns true if `type` has been explicitly set.
+    var hasType: Bool {return self._type != nil}
+    /// Clears the value of `type`. Subsequent reads from it will return its default value.
+    mutating func clearType() {self._type = nil}
+
+    var i: Int32 {
+      get {return _i ?? 0}
+      set {_i = newValue}
+    }
+    /// Returns true if `i` has been explicitly set.
+    var hasI: Bool {return self._i != nil}
+    /// Clears the value of `i`. Subsequent reads from it will return its default value.
+    mutating func clearI() {self._i = nil}
+
+    var f: Float {
+      get {return _f ?? 0}
+      set {_f = newValue}
+    }
+    /// Returns true if `f` has been explicitly set.
+    var hasF: Bool {return self._f != nil}
+    /// Clears the value of `f`. Subsequent reads from it will return its default value.
+    mutating func clearF() {self._f = nil}
+
+    var s: String {
+      get {return _s ?? String()}
+      set {_s = newValue}
+    }
+    /// Returns true if `s` has been explicitly set.
+    var hasS: Bool {return self._s != nil}
+    /// Clears the value of `s`. Subsequent reads from it will return its default value.
+    mutating func clearS() {self._s = nil}
+
+    var ints: [Int32] = []
+
+    var floats: [Float] = []
+
+    var strings: [String] = []
+
+    var b: Bool {
+      get {return _b ?? false}
+      set {_b = newValue}
+    }
+    /// Returns true if `b` has been explicitly set.
+    var hasB: Bool {return self._b != nil}
+    /// Clears the value of `b`. Subsequent reads from it will return its default value.
+    mutating func clearB() {self._b = nil}
+
+    var bools: [Bool] = []
+
+    var blockIdx: Int32 {
+      get {return _blockIdx ?? 0}
+      set {_blockIdx = newValue}
+    }
+    /// Returns true if `blockIdx` has been explicitly set.
+    var hasBlockIdx: Bool {return self._blockIdx != nil}
+    /// Clears the value of `blockIdx`. Subsequent reads from it will return its default value.
+    mutating func clearBlockIdx() {self._blockIdx = nil}
+
+    var l: Int64 {
+      get {return _l ?? 0}
+      set {_l = newValue}
+    }
+    /// Returns true if `l` has been explicitly set.
+    var hasL: Bool {return self._l != nil}
+    /// Clears the value of `l`. Subsequent reads from it will return its default value.
+    mutating func clearL() {self._l = nil}
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+
+    fileprivate var _name: String? = nil
+    fileprivate var _type: PaddleMobile_Framework_Proto_AttrType? = nil
+    fileprivate var _i: Int32? = nil
+    fileprivate var _f: Float? = nil
+    fileprivate var _s: String? = nil
+    fileprivate var _b: Bool? = nil
+    fileprivate var _blockIdx: Int32? = nil
+    fileprivate var _l: Int64? = nil
+  }
+
+  struct Var {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    var parameter: String {
+      get {return _parameter ?? String()}
+      set {_parameter = newValue}
+    }
+    /// Returns true if `parameter` has been explicitly set.
+    var hasParameter: Bool {return self._parameter != nil}
+    /// Clears the value of `parameter`. Subsequent reads from it will return its default value.
+    mutating func clearParameter() {self._parameter = nil}
+
+    var arguments: [String] = []
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+
+    fileprivate var _parameter: String? = nil
+  }
+
+  init() {}
+
+  fileprivate var _type: String? = nil
+  fileprivate var _isTarget: Bool? = nil
+}
+
+/// OpProto describes a C++ framework::OperatorBase derived class.
+struct PaddleMobile_Framework_Proto_OpProto {
+  // SwiftProtobuf.Message conformance is added in an extension below. See the
+  // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+  // methods supported on all messages.
+
+  var type: String {
+    get {return _type ?? String()}
+    set {_type = newValue}
+  }
+  /// Returns true if `type` has been explicitly set.
+  var hasType: Bool {return self._type != nil}
+  /// Clears the value of `type`. Subsequent reads from it will return its default value.
+  mutating func clearType() {self._type = nil}
+
+  var inputs: [PaddleMobile_Framework_Proto_OpProto.Var] = []
+
+  var outputs: [PaddleMobile_Framework_Proto_OpProto.Var] = []
+
+  var attrs: [PaddleMobile_Framework_Proto_OpProto.Attr] = []
+
+  var comment: String {
+    get {return _comment ?? String()}
+    set {_comment = newValue}
+  }
+  /// Returns true if `comment` has been explicitly set.
+  var hasComment: Bool {return self._comment != nil}
+  /// Clears the value of `comment`. Subsequent reads from it will return its default value.
+  mutating func clearComment() {self._comment = nil}
+
+  var unknownFields = SwiftProtobuf.UnknownStorage()
+
+  /// VarProto describes the C++ type framework::Variable.
+  struct Var {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    var name: String {
+      get {return _name ?? String()}
+      set {_name = newValue}
+    }
+    /// Returns true if `name` has been explicitly set.
+    var hasName: Bool {return self._name != nil}
+    /// Clears the value of `name`. Subsequent reads from it will return its default value.
+    mutating func clearName() {self._name = nil}
+
+    var comment: String {
+      get {return _comment ?? String()}
+      set {_comment = newValue}
+    }
+    /// Returns true if `comment` has been explicitly set.
+    var hasComment: Bool {return self._comment != nil}
+    /// Clears the value of `comment`. Subsequent reads from it will return its default value.
+    mutating func clearComment() {self._comment = nil}
+
+    var duplicable: Bool {
+      get {return _duplicable ?? false}
+      set {_duplicable = newValue}
+    }
+    /// Returns true if `duplicable` has been explicitly set.
+    var hasDuplicable: Bool {return self._duplicable != nil}
+    /// Clears the value of `duplicable`. Subsequent reads from it will return its default value.
+    mutating func clearDuplicable() {self._duplicable = nil}
+
+    var intermediate: Bool {
+      get {return _intermediate ?? false}
+      set {_intermediate = newValue}
+    }
+    /// Returns true if `intermediate` has been explicitly set.
+    var hasIntermediate: Bool {return self._intermediate != nil}
+    /// Clears the value of `intermediate`. Subsequent reads from it will return its default value.
+    mutating func clearIntermediate() {self._intermediate = nil}
+
+    var dispensable: Bool {
+      get {return _dispensable ?? false}
+      set {_dispensable = newValue}
+    }
+    /// Returns true if `dispensable` has been explicitly set.
+    var hasDispensable: Bool {return self._dispensable != nil}
+    /// Clears the value of `dispensable`. Subsequent reads from it will return its default value.
+    mutating func clearDispensable() {self._dispensable = nil}
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+
+    fileprivate var _name: String? = nil
+    fileprivate var _comment: String? = nil
+    fileprivate var _duplicable: Bool? = nil
+    fileprivate var _intermediate: Bool? = nil
+    fileprivate var _dispensable: Bool? = nil
+  }
+
+  /// AttrProto describes the C++ type Attribute.
+  struct Attr {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    var name: String {
+      get {return _name ?? String()}
+      set {_name = newValue}
+    }
+    /// Returns true if `name` has been explicitly set.
+    var hasName: Bool {return self._name != nil}
+    /// Clears the value of `name`. Subsequent reads from it will return its default value.
+    mutating func clearName() {self._name = nil}
+
+    var type: PaddleMobile_Framework_Proto_AttrType {
+      get {return _type ?? .int}
+      set {_type = newValue}
+    }
+    /// Returns true if `type` has been explicitly set.
+    var hasType: Bool {return self._type != nil}
+    /// Clears the value of `type`. Subsequent reads from it will return its default value.
+    mutating func clearType() {self._type = nil}
+
+    var comment: String {
+      get {return _comment ?? String()}
+      set {_comment = newValue}
+    }
+    /// Returns true if `comment` has been explicitly set.
+    var hasComment: Bool {return self._comment != nil}
+    /// Clears the value of `comment`. Subsequent reads from it will return its default value.
+    mutating func clearComment() {self._comment = nil}
+
+    /// If that attribute is generated, it means the Paddle third
+    /// language binding has responsibility to fill that
+    /// attribute. End-User should not set that attribute.
+    var generated: Bool {
+      get {return _generated ?? false}
+      set {_generated = newValue}
+    }
+    /// Returns true if `generated` has been explicitly set.
+    var hasGenerated: Bool {return self._generated != nil}
+    /// Clears the value of `generated`. Subsequent reads from it will return its default value.
+    mutating func clearGenerated() {self._generated = nil}
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+
+    fileprivate var _name: String? = nil
+    fileprivate var _type: PaddleMobile_Framework_Proto_AttrType? = nil
+    fileprivate var _comment: String? = nil
+    fileprivate var _generated: Bool? = nil
+  }
+
+  init() {}
+
+  fileprivate var _type: String? = nil
+  fileprivate var _comment: String? = nil
+}
+
+struct PaddleMobile_Framework_Proto_VarType {
+  // SwiftProtobuf.Message conformance is added in an extension below. See the
+  // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+  // methods supported on all messages.
+
+  var type: PaddleMobile_Framework_Proto_VarType.TypeEnum {
+    get {return _storage._type ?? .bool}
+    set {_uniqueStorage()._type = newValue}
+  }
+  /// Returns true if `type` has been explicitly set.
+  var hasType: Bool {return _storage._type != nil}
+  /// Clears the value of `type`. Subsequent reads from it will return its default value.
+  mutating func clearType() {_storage._type = nil}
+
+  var selectedRows: PaddleMobile_Framework_Proto_VarType.TensorDesc {
+    get {return _storage._selectedRows ?? PaddleMobile_Framework_Proto_VarType.TensorDesc()}
+    set {_uniqueStorage()._selectedRows = newValue}
+  }
+  /// Returns true if `selectedRows` has been explicitly set.
+  var hasSelectedRows: Bool {return _storage._selectedRows != nil}
+  /// Clears the value of `selectedRows`. Subsequent reads from it will return its default value.
+  mutating func clearSelectedRows() {_storage._selectedRows = nil}
+
+  var lodTensor: PaddleMobile_Framework_Proto_VarType.LoDTensorDesc {
+    get {return _storage._lodTensor ?? PaddleMobile_Framework_Proto_VarType.LoDTensorDesc()}
+    set {_uniqueStorage()._lodTensor = newValue}
+  }
+  /// Returns true if `lodTensor` has been explicitly set.
+  var hasLodTensor: Bool {return _storage._lodTensor != nil}
+  /// Clears the value of `lodTensor`. Subsequent reads from it will return its default value.
+  mutating func clearLodTensor() {_storage._lodTensor = nil}
+
+  var tensorArray: PaddleMobile_Framework_Proto_VarType.LoDTensorArrayDesc {
+    get {return _storage._tensorArray ?? PaddleMobile_Framework_Proto_VarType.LoDTensorArrayDesc()}
+    set {_uniqueStorage()._tensorArray = newValue}
+  }
+  /// Returns true if `tensorArray` has been explicitly set.
+  var hasTensorArray: Bool {return _storage._tensorArray != nil}
+  /// Clears the value of `tensorArray`. Subsequent reads from it will return its default value.
+  mutating func clearTensorArray() {_storage._tensorArray = nil}
+
+  var reader: PaddleMobile_Framework_Proto_VarType.ReaderDesc {
+    get {return _storage._reader ?? PaddleMobile_Framework_Proto_VarType.ReaderDesc()}
+    set {_uniqueStorage()._reader = newValue}
+  }
+  /// Returns true if `reader` has been explicitly set.
+  var hasReader: Bool {return _storage._reader != nil}
+  /// Clears the value of `reader`. Subsequent reads from it will return its default value.
+  mutating func clearReader() {_storage._reader = nil}
+
+  var channel: PaddleMobile_Framework_Proto_VarType.ChannelDesc {
+    get {return _storage._channel ?? PaddleMobile_Framework_Proto_VarType.ChannelDesc()}
+    set {_uniqueStorage()._channel = newValue}
+  }
+  /// Returns true if `channel` has been explicitly set.
+  var hasChannel: Bool {return _storage._channel != nil}
+  /// Clears the value of `channel`. Subsequent reads from it will return its default value.
+  mutating func clearChannel() {_storage._channel = nil}
+
+  var tuple: PaddleMobile_Framework_Proto_VarType.Tuple {
+    get {return _storage._tuple ?? PaddleMobile_Framework_Proto_VarType.Tuple()}
+    set {_uniqueStorage()._tuple = newValue}
+  }
+  /// Returns true if `tuple` has been explicitly set.
+  var hasTuple: Bool {return _storage._tuple != nil}
+  /// Clears the value of `tuple`. Subsequent reads from it will return its default value.
+  mutating func clearTuple() {_storage._tuple = nil}
+
+  var unknownFields = SwiftProtobuf.UnknownStorage()
+
+  enum TypeEnum: SwiftProtobuf.Enum {
+    typealias RawValue = Int
+
+    /// Pod Types
+    case bool // = 0
+    case int16 // = 1
+    case int32 // = 2
+    case int64 // = 3
+    case fp16 // = 4
+    case fp32 // = 5
+    case fp64 // = 6
+
+    /// Other types that may need additional descriptions
+    case lodTensor // = 7
+    case selectedRows // = 8
+    case feedMinibatch // = 9
+    case fetchList // = 10
+    case stepScopes // = 11
+    case lodRankTable // = 12
+    case lodTensorArray // = 13
+    case placeList // = 14
+    case reader // = 15
+    case channel // = 16
+
+    /// Any runtime decided variable type is raw
+    /// raw variables should manage their own allocations
+    /// in operators like nccl_op
+    case raw // = 17
+    case tuple // = 18
+
+    init() {
+      self = .bool
+    }
+
+    init?(rawValue: Int) {
+      switch rawValue {
+      case 0: self = .bool
+      case 1: self = .int16
+      case 2: self = .int32
+      case 3: self = .int64
+      case 4: self = .fp16
+      case 5: self = .fp32
+      case 6: self = .fp64
+      case 7: self = .lodTensor
+      case 8: self = .selectedRows
+      case 9: self = .feedMinibatch
+      case 10: self = .fetchList
+      case 11: self = .stepScopes
+      case 12: self = .lodRankTable
+      case 13: self = .lodTensorArray
+      case 14: self = .placeList
+      case 15: self = .reader
+      case 16: self = .channel
+      case 17: self = .raw
+      case 18: self = .tuple
+      default: return nil
+      }
+    }
+
+    var rawValue: Int {
+      switch self {
+      case .bool: return 0
+      case .int16: return 1
+      case .int32: return 2
+      case .int64: return 3
+      case .fp16: return 4
+      case .fp32: return 5
+      case .fp64: return 6
+      case .lodTensor: return 7
+      case .selectedRows: return 8
+      case .feedMinibatch: return 9
+      case .fetchList: return 10
+      case .stepScopes: return 11
+      case .lodRankTable: return 12
+      case .lodTensorArray: return 13
+      case .placeList: return 14
+      case .reader: return 15
+      case .channel: return 16
+      case .raw: return 17
+      case .tuple: return 18
+      }
+    }
+
+  }
+
+  struct TensorDesc {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    /// Should only be PODType. Is enforced in C++
+    var dataType: PaddleMobile_Framework_Proto_VarType.TypeEnum {
+      get {return _dataType ?? .bool}
+      set {_dataType = newValue}
+    }
+    /// Returns true if `dataType` has been explicitly set.
+    var hasDataType: Bool {return self._dataType != nil}
+    /// Clears the value of `dataType`. Subsequent reads from it will return its default value.
+    mutating func clearDataType() {self._dataType = nil}
+
+    /// [UNK, 640, 480] is saved as [-1, 640, 480]
+    var dims: [Int64] = []
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+
+    fileprivate var _dataType: PaddleMobile_Framework_Proto_VarType.TypeEnum? = nil
+  }
+
+  struct LoDTensorDesc {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    var tensor: PaddleMobile_Framework_Proto_VarType.TensorDesc {
+      get {return _storage._tensor ?? PaddleMobile_Framework_Proto_VarType.TensorDesc()}
+      set {_uniqueStorage()._tensor = newValue}
+    }
+    /// Returns true if `tensor` has been explicitly set.
+    var hasTensor: Bool {return _storage._tensor != nil}
+    /// Clears the value of `tensor`. Subsequent reads from it will return its default value.
+    mutating func clearTensor() {_storage._tensor = nil}
+
+    var lodLevel: Int32 {
+      get {return _storage._lodLevel ?? 0}
+      set {_uniqueStorage()._lodLevel = newValue}
+    }
+    /// Returns true if `lodLevel` has been explicitly set.
+    var hasLodLevel: Bool {return _storage._lodLevel != nil}
+    /// Clears the value of `lodLevel`. Subsequent reads from it will return its default value.
+    mutating func clearLodLevel() {_storage._lodLevel = nil}
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+
+    fileprivate var _storage = _StorageClass.defaultInstance
+  }
+
+  struct LoDTensorArrayDesc {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    var tensor: PaddleMobile_Framework_Proto_VarType.TensorDesc {
+      get {return _storage._tensor ?? PaddleMobile_Framework_Proto_VarType.TensorDesc()}
+      set {_uniqueStorage()._tensor = newValue}
+    }
+    /// Returns true if `tensor` has been explicitly set.
+    var hasTensor: Bool {return _storage._tensor != nil}
+    /// Clears the value of `tensor`. Subsequent reads from it will return its default value.
+    mutating func clearTensor() {_storage._tensor = nil}
+
+    var lodLevel: Int32 {
+      get {return _storage._lodLevel ?? 0}
+      set {_uniqueStorage()._lodLevel = newValue}
+    }
+    /// Returns true if `lodLevel` has been explicitly set.
+    var hasLodLevel: Bool {return _storage._lodLevel != nil}
+    /// Clears the value of `lodLevel`. Subsequent reads from it will return its default value.
+    mutating func clearLodLevel() {_storage._lodLevel = nil}
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+
+    fileprivate var _storage = _StorageClass.defaultInstance
+  }
+
+  struct ReaderDesc {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    var lodTensor: [PaddleMobile_Framework_Proto_VarType.LoDTensorDesc] = []
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+  }
+
+  struct ChannelDesc {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    var dataType: PaddleMobile_Framework_Proto_VarType.TypeEnum {
+      get {return _dataType ?? .bool}
+      set {_dataType = newValue}
+    }
+    /// Returns true if `dataType` has been explicitly set.
+    var hasDataType: Bool {return self._dataType != nil}
+    /// Clears the value of `dataType`. Subsequent reads from it will return its default value.
+    mutating func clearDataType() {self._dataType = nil}
+
+    var capacity: Int64 {
+      get {return _capacity ?? 0}
+      set {_capacity = newValue}
+    }
+    /// Returns true if `capacity` has been explicitly set.
+    var hasCapacity: Bool {return self._capacity != nil}
+    /// Clears the value of `capacity`. Subsequent reads from it will return its default value.
+    mutating func clearCapacity() {self._capacity = nil}
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+
+    fileprivate var _dataType: PaddleMobile_Framework_Proto_VarType.TypeEnum? = nil
+    fileprivate var _capacity: Int64? = nil
+  }
+
+  struct Tuple {
+    // SwiftProtobuf.Message conformance is added in an extension below. See the
+    // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+    // methods supported on all messages.
+
+    var elementType: [PaddleMobile_Framework_Proto_VarType.TypeEnum] = []
+
+    var unknownFields = SwiftProtobuf.UnknownStorage()
+
+    init() {}
+  }
+
+  init() {}
+
+  fileprivate var _storage = _StorageClass.defaultInstance
+}
+
+struct PaddleMobile_Framework_Proto_VarDesc {
+  // SwiftProtobuf.Message conformance is added in an extension below. See the
+  // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+  // methods supported on all messages.
+
+  var name: String {
+    get {return _storage._name ?? String()}
+    set {_uniqueStorage()._name = newValue}
+  }
+  /// Returns true if `name` has been explicitly set.
+  var hasName: Bool {return _storage._name != nil}
+  /// Clears the value of `name`. Subsequent reads from it will return its default value.
+  mutating func clearName() {_storage._name = nil}
+
+  var type: PaddleMobile_Framework_Proto_VarType {
+    get {return _storage._type ?? PaddleMobile_Framework_Proto_VarType()}
+    set {_uniqueStorage()._type = newValue}
+  }
+  /// Returns true if `type` has been explicitly set.
+  var hasType: Bool {return _storage._type != nil}
+  /// Clears the value of `type`. Subsequent reads from it will return its default value.
+  mutating func clearType() {_storage._type = nil}
+
+  var persistable: Bool {
+    get {return _storage._persistable ?? false}
+    set {_uniqueStorage()._persistable = newValue}
+  }
+  /// Returns true if `persistable` has been explicitly set.
+  var hasPersistable: Bool {return _storage._persistable != nil}
+  /// Clears the value of `persistable`. Subsequent reads from it will return its default value.
+  mutating func clearPersistable() {_storage._persistable = nil}
+
+  var unknownFields = SwiftProtobuf.UnknownStorage()
+
+  init() {}
+
+  fileprivate var _storage = _StorageClass.defaultInstance
+}
+
+struct PaddleMobile_Framework_Proto_BlockDesc {
+  // SwiftProtobuf.Message conformance is added in an extension below. See the
+  // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+  // methods supported on all messages.
+
+  var idx: Int32 {
+    get {return _idx ?? 0}
+    set {_idx = newValue}
+  }
+  /// Returns true if `idx` has been explicitly set.
+  var hasIdx: Bool {return self._idx != nil}
+  /// Clears the value of `idx`. Subsequent reads from it will return its default value.
+  mutating func clearIdx() {self._idx = nil}
+
+  var parentIdx: Int32 {
+    get {return _parentIdx ?? 0}
+    set {_parentIdx = newValue}
+  }
+  /// Returns true if `parentIdx` has been explicitly set.
+  var hasParentIdx: Bool {return self._parentIdx != nil}
+  /// Clears the value of `parentIdx`. Subsequent reads from it will return its default value.
+  mutating func clearParentIdx() {self._parentIdx = nil}
+
+  var vars: [PaddleMobile_Framework_Proto_VarDesc] = []
+
+  var ops: [PaddleMobile_Framework_Proto_OpDesc] = []
+
+  var forwardBlockIdx: Int32 {
+    get {return _forwardBlockIdx ?? -1}
+    set {_forwardBlockIdx = newValue}
+  }
+  /// Returns true if `forwardBlockIdx` has been explicitly set.
+  var hasForwardBlockIdx: Bool {return self._forwardBlockIdx != nil}
+  /// Clears the value of `forwardBlockIdx`. Subsequent reads from it will return its default value.
+  mutating func clearForwardBlockIdx() {self._forwardBlockIdx = nil}
+
+  var unknownFields = SwiftProtobuf.UnknownStorage()
+
+  init() {}
+
+  fileprivate var _idx: Int32? = nil
+  fileprivate var _parentIdx: Int32? = nil
+  fileprivate var _forwardBlockIdx: Int32? = nil
+}
+
+/// Please refer to
+/// https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/program.md
+/// for more details.
+/// TODO(panyx0718): A model can have multiple programs. Need a
+/// way to distinguish them. Maybe ID or name?
+struct PaddleMobile_Framework_Proto_ProgramDesc {
+  // SwiftProtobuf.Message conformance is added in an extension below. See the
+  // `Message` and `Message+*Additions` files in the SwiftProtobuf library for
+  // methods supported on all messages.
+
+  var blocks: [PaddleMobile_Framework_Proto_BlockDesc] = []
+
+  var unknownFields = SwiftProtobuf.UnknownStorage()
+
+  init() {}
+}
+
+// MARK: - Code below here is support for the SwiftProtobuf runtime.
+
+fileprivate let _protobuf_package = "paddle_mobile.framework.proto"
+
+extension PaddleMobile_Framework_Proto_AttrType: SwiftProtobuf._ProtoNameProviding {
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    0: .same(proto: "INT"),
+    1: .same(proto: "FLOAT"),
+    2: .same(proto: "STRING"),
+    3: .same(proto: "INTS"),
+    4: .same(proto: "FLOATS"),
+    5: .same(proto: "STRINGS"),
+    6: .same(proto: "BOOLEAN"),
+    7: .same(proto: "BOOLEANS"),
+    8: .same(proto: "BLOCK"),
+    9: .same(proto: "LONG"),
+  ]
+}
+
+extension PaddleMobile_Framework_Proto_OpDesc: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = _protobuf_package + ".OpDesc"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    3: .same(proto: "type"),
+    1: .same(proto: "inputs"),
+    2: .same(proto: "outputs"),
+    4: .same(proto: "attrs"),
+    5: .standard(proto: "is_target"),
+  ]
+
+  public var isInitialized: Bool {
+    if self._type == nil {return false}
+    if !SwiftProtobuf.Internal.areAllInitialized(self.inputs) {return false}
+    if !SwiftProtobuf.Internal.areAllInitialized(self.outputs) {return false}
+    if !SwiftProtobuf.Internal.areAllInitialized(self.attrs) {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeRepeatedMessageField(value: &self.inputs)
+      case 2: try decoder.decodeRepeatedMessageField(value: &self.outputs)
+      case 3: try decoder.decodeSingularStringField(value: &self._type)
+      case 4: try decoder.decodeRepeatedMessageField(value: &self.attrs)
+      case 5: try decoder.decodeSingularBoolField(value: &self._isTarget)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if !self.inputs.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.inputs, fieldNumber: 1)
+    }
+    if !self.outputs.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.outputs, fieldNumber: 2)
+    }
+    if let v = self._type {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 3)
+    }
+    if !self.attrs.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.attrs, fieldNumber: 4)
+    }
+    if let v = self._isTarget {
+      try visitor.visitSingularBoolField(value: v, fieldNumber: 5)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_OpDesc) -> Bool {
+    if self._type != other._type {return false}
+    if self.inputs != other.inputs {return false}
+    if self.outputs != other.outputs {return false}
+    if self.attrs != other.attrs {return false}
+    if self._isTarget != other._isTarget {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_OpDesc.Attr: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_OpDesc.protoMessageName + ".Attr"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "name"),
+    2: .same(proto: "type"),
+    3: .same(proto: "i"),
+    4: .same(proto: "f"),
+    5: .same(proto: "s"),
+    6: .same(proto: "ints"),
+    7: .same(proto: "floats"),
+    8: .same(proto: "strings"),
+    10: .same(proto: "b"),
+    11: .same(proto: "bools"),
+    12: .standard(proto: "block_idx"),
+    13: .same(proto: "l"),
+  ]
+
+  public var isInitialized: Bool {
+    if self._name == nil {return false}
+    if self._type == nil {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeSingularStringField(value: &self._name)
+      case 2: try decoder.decodeSingularEnumField(value: &self._type)
+      case 3: try decoder.decodeSingularInt32Field(value: &self._i)
+      case 4: try decoder.decodeSingularFloatField(value: &self._f)
+      case 5: try decoder.decodeSingularStringField(value: &self._s)
+      case 6: try decoder.decodeRepeatedInt32Field(value: &self.ints)
+      case 7: try decoder.decodeRepeatedFloatField(value: &self.floats)
+      case 8: try decoder.decodeRepeatedStringField(value: &self.strings)
+      case 10: try decoder.decodeSingularBoolField(value: &self._b)
+      case 11: try decoder.decodeRepeatedBoolField(value: &self.bools)
+      case 12: try decoder.decodeSingularInt32Field(value: &self._blockIdx)
+      case 13: try decoder.decodeSingularInt64Field(value: &self._l)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if let v = self._name {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 1)
+    }
+    if let v = self._type {
+      try visitor.visitSingularEnumField(value: v, fieldNumber: 2)
+    }
+    if let v = self._i {
+      try visitor.visitSingularInt32Field(value: v, fieldNumber: 3)
+    }
+    if let v = self._f {
+      try visitor.visitSingularFloatField(value: v, fieldNumber: 4)
+    }
+    if let v = self._s {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 5)
+    }
+    if !self.ints.isEmpty {
+      try visitor.visitRepeatedInt32Field(value: self.ints, fieldNumber: 6)
+    }
+    if !self.floats.isEmpty {
+      try visitor.visitRepeatedFloatField(value: self.floats, fieldNumber: 7)
+    }
+    if !self.strings.isEmpty {
+      try visitor.visitRepeatedStringField(value: self.strings, fieldNumber: 8)
+    }
+    if let v = self._b {
+      try visitor.visitSingularBoolField(value: v, fieldNumber: 10)
+    }
+    if !self.bools.isEmpty {
+      try visitor.visitRepeatedBoolField(value: self.bools, fieldNumber: 11)
+    }
+    if let v = self._blockIdx {
+      try visitor.visitSingularInt32Field(value: v, fieldNumber: 12)
+    }
+    if let v = self._l {
+      try visitor.visitSingularInt64Field(value: v, fieldNumber: 13)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_OpDesc.Attr) -> Bool {
+    if self._name != other._name {return false}
+    if self._type != other._type {return false}
+    if self._i != other._i {return false}
+    if self._f != other._f {return false}
+    if self._s != other._s {return false}
+    if self.ints != other.ints {return false}
+    if self.floats != other.floats {return false}
+    if self.strings != other.strings {return false}
+    if self._b != other._b {return false}
+    if self.bools != other.bools {return false}
+    if self._blockIdx != other._blockIdx {return false}
+    if self._l != other._l {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_OpDesc.Var: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_OpDesc.protoMessageName + ".Var"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "parameter"),
+    2: .same(proto: "arguments"),
+  ]
+
+  public var isInitialized: Bool {
+    if self._parameter == nil {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeSingularStringField(value: &self._parameter)
+      case 2: try decoder.decodeRepeatedStringField(value: &self.arguments)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if let v = self._parameter {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 1)
+    }
+    if !self.arguments.isEmpty {
+      try visitor.visitRepeatedStringField(value: self.arguments, fieldNumber: 2)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_OpDesc.Var) -> Bool {
+    if self._parameter != other._parameter {return false}
+    if self.arguments != other.arguments {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_OpProto: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = _protobuf_package + ".OpProto"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "type"),
+    2: .same(proto: "inputs"),
+    3: .same(proto: "outputs"),
+    4: .same(proto: "attrs"),
+    5: .same(proto: "comment"),
+  ]
+
+  public var isInitialized: Bool {
+    if self._type == nil {return false}
+    if self._comment == nil {return false}
+    if !SwiftProtobuf.Internal.areAllInitialized(self.inputs) {return false}
+    if !SwiftProtobuf.Internal.areAllInitialized(self.outputs) {return false}
+    if !SwiftProtobuf.Internal.areAllInitialized(self.attrs) {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeSingularStringField(value: &self._type)
+      case 2: try decoder.decodeRepeatedMessageField(value: &self.inputs)
+      case 3: try decoder.decodeRepeatedMessageField(value: &self.outputs)
+      case 4: try decoder.decodeRepeatedMessageField(value: &self.attrs)
+      case 5: try decoder.decodeSingularStringField(value: &self._comment)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if let v = self._type {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 1)
+    }
+    if !self.inputs.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.inputs, fieldNumber: 2)
+    }
+    if !self.outputs.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.outputs, fieldNumber: 3)
+    }
+    if !self.attrs.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.attrs, fieldNumber: 4)
+    }
+    if let v = self._comment {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 5)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_OpProto) -> Bool {
+    if self._type != other._type {return false}
+    if self.inputs != other.inputs {return false}
+    if self.outputs != other.outputs {return false}
+    if self.attrs != other.attrs {return false}
+    if self._comment != other._comment {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_OpProto.Var: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_OpProto.protoMessageName + ".Var"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "name"),
+    2: .same(proto: "comment"),
+    3: .same(proto: "duplicable"),
+    4: .same(proto: "intermediate"),
+    5: .same(proto: "dispensable"),
+  ]
+
+  public var isInitialized: Bool {
+    if self._name == nil {return false}
+    if self._comment == nil {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeSingularStringField(value: &self._name)
+      case 2: try decoder.decodeSingularStringField(value: &self._comment)
+      case 3: try decoder.decodeSingularBoolField(value: &self._duplicable)
+      case 4: try decoder.decodeSingularBoolField(value: &self._intermediate)
+      case 5: try decoder.decodeSingularBoolField(value: &self._dispensable)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if let v = self._name {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 1)
+    }
+    if let v = self._comment {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 2)
+    }
+    if let v = self._duplicable {
+      try visitor.visitSingularBoolField(value: v, fieldNumber: 3)
+    }
+    if let v = self._intermediate {
+      try visitor.visitSingularBoolField(value: v, fieldNumber: 4)
+    }
+    if let v = self._dispensable {
+      try visitor.visitSingularBoolField(value: v, fieldNumber: 5)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_OpProto.Var) -> Bool {
+    if self._name != other._name {return false}
+    if self._comment != other._comment {return false}
+    if self._duplicable != other._duplicable {return false}
+    if self._intermediate != other._intermediate {return false}
+    if self._dispensable != other._dispensable {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_OpProto.Attr: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_OpProto.protoMessageName + ".Attr"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "name"),
+    2: .same(proto: "type"),
+    3: .same(proto: "comment"),
+    4: .same(proto: "generated"),
+  ]
+
+  public var isInitialized: Bool {
+    if self._name == nil {return false}
+    if self._type == nil {return false}
+    if self._comment == nil {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeSingularStringField(value: &self._name)
+      case 2: try decoder.decodeSingularEnumField(value: &self._type)
+      case 3: try decoder.decodeSingularStringField(value: &self._comment)
+      case 4: try decoder.decodeSingularBoolField(value: &self._generated)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if let v = self._name {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 1)
+    }
+    if let v = self._type {
+      try visitor.visitSingularEnumField(value: v, fieldNumber: 2)
+    }
+    if let v = self._comment {
+      try visitor.visitSingularStringField(value: v, fieldNumber: 3)
+    }
+    if let v = self._generated {
+      try visitor.visitSingularBoolField(value: v, fieldNumber: 4)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_OpProto.Attr) -> Bool {
+    if self._name != other._name {return false}
+    if self._type != other._type {return false}
+    if self._comment != other._comment {return false}
+    if self._generated != other._generated {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_VarType: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = _protobuf_package + ".VarType"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "type"),
+    2: .standard(proto: "selected_rows"),
+    3: .standard(proto: "lod_tensor"),
+    4: .standard(proto: "tensor_array"),
+    5: .same(proto: "reader"),
+    6: .same(proto: "channel"),
+    7: .same(proto: "tuple"),
+  ]
+
+  fileprivate class _StorageClass {
+    var _type: PaddleMobile_Framework_Proto_VarType.TypeEnum? = nil
+    var _selectedRows: PaddleMobile_Framework_Proto_VarType.TensorDesc? = nil
+    var _lodTensor: PaddleMobile_Framework_Proto_VarType.LoDTensorDesc? = nil
+    var _tensorArray: PaddleMobile_Framework_Proto_VarType.LoDTensorArrayDesc? = nil
+    var _reader: PaddleMobile_Framework_Proto_VarType.ReaderDesc? = nil
+    var _channel: PaddleMobile_Framework_Proto_VarType.ChannelDesc? = nil
+    var _tuple: PaddleMobile_Framework_Proto_VarType.Tuple? = nil
+
+    static let defaultInstance = _StorageClass()
+
+    private init() {}
+
+    init(copying source: _StorageClass) {
+      _type = source._type
+      _selectedRows = source._selectedRows
+      _lodTensor = source._lodTensor
+      _tensorArray = source._tensorArray
+      _reader = source._reader
+      _channel = source._channel
+      _tuple = source._tuple
+    }
+  }
+
+  fileprivate mutating func _uniqueStorage() -> _StorageClass {
+    if !isKnownUniquelyReferenced(&_storage) {
+      _storage = _StorageClass(copying: _storage)
+    }
+    return _storage
+  }
+
+  public var isInitialized: Bool {
+    return withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      if _storage._type == nil {return false}
+      if let v = _storage._selectedRows, !v.isInitialized {return false}
+      if let v = _storage._lodTensor, !v.isInitialized {return false}
+      if let v = _storage._tensorArray, !v.isInitialized {return false}
+      if let v = _storage._reader, !v.isInitialized {return false}
+      if let v = _storage._channel, !v.isInitialized {return false}
+      return true
+    }
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    _ = _uniqueStorage()
+    try withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      while let fieldNumber = try decoder.nextFieldNumber() {
+        switch fieldNumber {
+        case 1: try decoder.decodeSingularEnumField(value: &_storage._type)
+        case 2: try decoder.decodeSingularMessageField(value: &_storage._selectedRows)
+        case 3: try decoder.decodeSingularMessageField(value: &_storage._lodTensor)
+        case 4: try decoder.decodeSingularMessageField(value: &_storage._tensorArray)
+        case 5: try decoder.decodeSingularMessageField(value: &_storage._reader)
+        case 6: try decoder.decodeSingularMessageField(value: &_storage._channel)
+        case 7: try decoder.decodeSingularMessageField(value: &_storage._tuple)
+        default: break
+        }
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    try withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      if let v = _storage._type {
+        try visitor.visitSingularEnumField(value: v, fieldNumber: 1)
+      }
+      if let v = _storage._selectedRows {
+        try visitor.visitSingularMessageField(value: v, fieldNumber: 2)
+      }
+      if let v = _storage._lodTensor {
+        try visitor.visitSingularMessageField(value: v, fieldNumber: 3)
+      }
+      if let v = _storage._tensorArray {
+        try visitor.visitSingularMessageField(value: v, fieldNumber: 4)
+      }
+      if let v = _storage._reader {
+        try visitor.visitSingularMessageField(value: v, fieldNumber: 5)
+      }
+      if let v = _storage._channel {
+        try visitor.visitSingularMessageField(value: v, fieldNumber: 6)
+      }
+      if let v = _storage._tuple {
+        try visitor.visitSingularMessageField(value: v, fieldNumber: 7)
+      }
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_VarType) -> Bool {
+    if _storage !== other._storage {
+      let storagesAreEqual: Bool = withExtendedLifetime((_storage, other._storage)) { (_args: (_StorageClass, _StorageClass)) in
+        let _storage = _args.0
+        let other_storage = _args.1
+        if _storage._type != other_storage._type {return false}
+        if _storage._selectedRows != other_storage._selectedRows {return false}
+        if _storage._lodTensor != other_storage._lodTensor {return false}
+        if _storage._tensorArray != other_storage._tensorArray {return false}
+        if _storage._reader != other_storage._reader {return false}
+        if _storage._channel != other_storage._channel {return false}
+        if _storage._tuple != other_storage._tuple {return false}
+        return true
+      }
+      if !storagesAreEqual {return false}
+    }
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_VarType.TypeEnum: SwiftProtobuf._ProtoNameProviding {
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    0: .same(proto: "BOOL"),
+    1: .same(proto: "INT16"),
+    2: .same(proto: "INT32"),
+    3: .same(proto: "INT64"),
+    4: .same(proto: "FP16"),
+    5: .same(proto: "FP32"),
+    6: .same(proto: "FP64"),
+    7: .same(proto: "LOD_TENSOR"),
+    8: .same(proto: "SELECTED_ROWS"),
+    9: .same(proto: "FEED_MINIBATCH"),
+    10: .same(proto: "FETCH_LIST"),
+    11: .same(proto: "STEP_SCOPES"),
+    12: .same(proto: "LOD_RANK_TABLE"),
+    13: .same(proto: "LOD_TENSOR_ARRAY"),
+    14: .same(proto: "PLACE_LIST"),
+    15: .same(proto: "READER"),
+    16: .same(proto: "CHANNEL"),
+    17: .same(proto: "RAW"),
+    18: .same(proto: "TUPLE"),
+  ]
+}
+
+extension PaddleMobile_Framework_Proto_VarType.TensorDesc: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_VarType.protoMessageName + ".TensorDesc"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .standard(proto: "data_type"),
+    2: .same(proto: "dims"),
+  ]
+
+  public var isInitialized: Bool {
+    if self._dataType == nil {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeSingularEnumField(value: &self._dataType)
+      case 2: try decoder.decodeRepeatedInt64Field(value: &self.dims)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if let v = self._dataType {
+      try visitor.visitSingularEnumField(value: v, fieldNumber: 1)
+    }
+    if !self.dims.isEmpty {
+      try visitor.visitRepeatedInt64Field(value: self.dims, fieldNumber: 2)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_VarType.TensorDesc) -> Bool {
+    if self._dataType != other._dataType {return false}
+    if self.dims != other.dims {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_VarType.LoDTensorDesc: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_VarType.protoMessageName + ".LoDTensorDesc"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "tensor"),
+    2: .standard(proto: "lod_level"),
+  ]
+
+  fileprivate class _StorageClass {
+    var _tensor: PaddleMobile_Framework_Proto_VarType.TensorDesc? = nil
+    var _lodLevel: Int32? = nil
+
+    static let defaultInstance = _StorageClass()
+
+    private init() {}
+
+    init(copying source: _StorageClass) {
+      _tensor = source._tensor
+      _lodLevel = source._lodLevel
+    }
+  }
+
+  fileprivate mutating func _uniqueStorage() -> _StorageClass {
+    if !isKnownUniquelyReferenced(&_storage) {
+      _storage = _StorageClass(copying: _storage)
+    }
+    return _storage
+  }
+
+  public var isInitialized: Bool {
+    return withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      if _storage._tensor == nil {return false}
+      if let v = _storage._tensor, !v.isInitialized {return false}
+      return true
+    }
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    _ = _uniqueStorage()
+    try withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      while let fieldNumber = try decoder.nextFieldNumber() {
+        switch fieldNumber {
+        case 1: try decoder.decodeSingularMessageField(value: &_storage._tensor)
+        case 2: try decoder.decodeSingularInt32Field(value: &_storage._lodLevel)
+        default: break
+        }
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    try withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      if let v = _storage._tensor {
+        try visitor.visitSingularMessageField(value: v, fieldNumber: 1)
+      }
+      if let v = _storage._lodLevel {
+        try visitor.visitSingularInt32Field(value: v, fieldNumber: 2)
+      }
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_VarType.LoDTensorDesc) -> Bool {
+    if _storage !== other._storage {
+      let storagesAreEqual: Bool = withExtendedLifetime((_storage, other._storage)) { (_args: (_StorageClass, _StorageClass)) in
+        let _storage = _args.0
+        let other_storage = _args.1
+        if _storage._tensor != other_storage._tensor {return false}
+        if _storage._lodLevel != other_storage._lodLevel {return false}
+        return true
+      }
+      if !storagesAreEqual {return false}
+    }
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_VarType.LoDTensorArrayDesc: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_VarType.protoMessageName + ".LoDTensorArrayDesc"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "tensor"),
+    2: .standard(proto: "lod_level"),
+  ]
+
+  fileprivate class _StorageClass {
+    var _tensor: PaddleMobile_Framework_Proto_VarType.TensorDesc? = nil
+    var _lodLevel: Int32? = nil
+
+    static let defaultInstance = _StorageClass()
+
+    private init() {}
+
+    init(copying source: _StorageClass) {
+      _tensor = source._tensor
+      _lodLevel = source._lodLevel
+    }
+  }
+
+  fileprivate mutating func _uniqueStorage() -> _StorageClass {
+    if !isKnownUniquelyReferenced(&_storage) {
+      _storage = _StorageClass(copying: _storage)
+    }
+    return _storage
+  }
+
+  public var isInitialized: Bool {
+    return withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      if _storage._tensor == nil {return false}
+      if let v = _storage._tensor, !v.isInitialized {return false}
+      return true
+    }
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    _ = _uniqueStorage()
+    try withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      while let fieldNumber = try decoder.nextFieldNumber() {
+        switch fieldNumber {
+        case 1: try decoder.decodeSingularMessageField(value: &_storage._tensor)
+        case 2: try decoder.decodeSingularInt32Field(value: &_storage._lodLevel)
+        default: break
+        }
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    try withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      if let v = _storage._tensor {
+        try visitor.visitSingularMessageField(value: v, fieldNumber: 1)
+      }
+      if let v = _storage._lodLevel {
+        try visitor.visitSingularInt32Field(value: v, fieldNumber: 2)
+      }
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_VarType.LoDTensorArrayDesc) -> Bool {
+    if _storage !== other._storage {
+      let storagesAreEqual: Bool = withExtendedLifetime((_storage, other._storage)) { (_args: (_StorageClass, _StorageClass)) in
+        let _storage = _args.0
+        let other_storage = _args.1
+        if _storage._tensor != other_storage._tensor {return false}
+        if _storage._lodLevel != other_storage._lodLevel {return false}
+        return true
+      }
+      if !storagesAreEqual {return false}
+    }
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_VarType.ReaderDesc: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_VarType.protoMessageName + ".ReaderDesc"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .standard(proto: "lod_tensor"),
+  ]
+
+  public var isInitialized: Bool {
+    if !SwiftProtobuf.Internal.areAllInitialized(self.lodTensor) {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeRepeatedMessageField(value: &self.lodTensor)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if !self.lodTensor.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.lodTensor, fieldNumber: 1)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_VarType.ReaderDesc) -> Bool {
+    if self.lodTensor != other.lodTensor {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_VarType.ChannelDesc: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_VarType.protoMessageName + ".ChannelDesc"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .standard(proto: "data_type"),
+    2: .same(proto: "capacity"),
+  ]
+
+  public var isInitialized: Bool {
+    if self._dataType == nil {return false}
+    if self._capacity == nil {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeSingularEnumField(value: &self._dataType)
+      case 2: try decoder.decodeSingularInt64Field(value: &self._capacity)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if let v = self._dataType {
+      try visitor.visitSingularEnumField(value: v, fieldNumber: 1)
+    }
+    if let v = self._capacity {
+      try visitor.visitSingularInt64Field(value: v, fieldNumber: 2)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_VarType.ChannelDesc) -> Bool {
+    if self._dataType != other._dataType {return false}
+    if self._capacity != other._capacity {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_VarType.Tuple: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = PaddleMobile_Framework_Proto_VarType.protoMessageName + ".Tuple"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .standard(proto: "element_type"),
+  ]
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeRepeatedEnumField(value: &self.elementType)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if !self.elementType.isEmpty {
+      try visitor.visitRepeatedEnumField(value: self.elementType, fieldNumber: 1)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_VarType.Tuple) -> Bool {
+    if self.elementType != other.elementType {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_VarDesc: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = _protobuf_package + ".VarDesc"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "name"),
+    2: .same(proto: "type"),
+    3: .same(proto: "persistable"),
+  ]
+
+  fileprivate class _StorageClass {
+    var _name: String? = nil
+    var _type: PaddleMobile_Framework_Proto_VarType? = nil
+    var _persistable: Bool? = nil
+
+    static let defaultInstance = _StorageClass()
+
+    private init() {}
+
+    init(copying source: _StorageClass) {
+      _name = source._name
+      _type = source._type
+      _persistable = source._persistable
+    }
+  }
+
+  fileprivate mutating func _uniqueStorage() -> _StorageClass {
+    if !isKnownUniquelyReferenced(&_storage) {
+      _storage = _StorageClass(copying: _storage)
+    }
+    return _storage
+  }
+
+  public var isInitialized: Bool {
+    return withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      if _storage._name == nil {return false}
+      if _storage._type == nil {return false}
+      if let v = _storage._type, !v.isInitialized {return false}
+      return true
+    }
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    _ = _uniqueStorage()
+    try withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      while let fieldNumber = try decoder.nextFieldNumber() {
+        switch fieldNumber {
+        case 1: try decoder.decodeSingularStringField(value: &_storage._name)
+        case 2: try decoder.decodeSingularMessageField(value: &_storage._type)
+        case 3: try decoder.decodeSingularBoolField(value: &_storage._persistable)
+        default: break
+        }
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    try withExtendedLifetime(_storage) { (_storage: _StorageClass) in
+      if let v = _storage._name {
+        try visitor.visitSingularStringField(value: v, fieldNumber: 1)
+      }
+      if let v = _storage._type {
+        try visitor.visitSingularMessageField(value: v, fieldNumber: 2)
+      }
+      if let v = _storage._persistable {
+        try visitor.visitSingularBoolField(value: v, fieldNumber: 3)
+      }
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_VarDesc) -> Bool {
+    if _storage !== other._storage {
+      let storagesAreEqual: Bool = withExtendedLifetime((_storage, other._storage)) { (_args: (_StorageClass, _StorageClass)) in
+        let _storage = _args.0
+        let other_storage = _args.1
+        if _storage._name != other_storage._name {return false}
+        if _storage._type != other_storage._type {return false}
+        if _storage._persistable != other_storage._persistable {return false}
+        return true
+      }
+      if !storagesAreEqual {return false}
+    }
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_BlockDesc: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = _protobuf_package + ".BlockDesc"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "idx"),
+    2: .standard(proto: "parent_idx"),
+    3: .same(proto: "vars"),
+    4: .same(proto: "ops"),
+    5: .standard(proto: "forward_block_idx"),
+  ]
+
+  public var isInitialized: Bool {
+    if self._idx == nil {return false}
+    if self._parentIdx == nil {return false}
+    if !SwiftProtobuf.Internal.areAllInitialized(self.vars) {return false}
+    if !SwiftProtobuf.Internal.areAllInitialized(self.ops) {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeSingularInt32Field(value: &self._idx)
+      case 2: try decoder.decodeSingularInt32Field(value: &self._parentIdx)
+      case 3: try decoder.decodeRepeatedMessageField(value: &self.vars)
+      case 4: try decoder.decodeRepeatedMessageField(value: &self.ops)
+      case 5: try decoder.decodeSingularInt32Field(value: &self._forwardBlockIdx)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if let v = self._idx {
+      try visitor.visitSingularInt32Field(value: v, fieldNumber: 1)
+    }
+    if let v = self._parentIdx {
+      try visitor.visitSingularInt32Field(value: v, fieldNumber: 2)
+    }
+    if !self.vars.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.vars, fieldNumber: 3)
+    }
+    if !self.ops.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.ops, fieldNumber: 4)
+    }
+    if let v = self._forwardBlockIdx {
+      try visitor.visitSingularInt32Field(value: v, fieldNumber: 5)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_BlockDesc) -> Bool {
+    if self._idx != other._idx {return false}
+    if self._parentIdx != other._parentIdx {return false}
+    if self.vars != other.vars {return false}
+    if self.ops != other.ops {return false}
+    if self._forwardBlockIdx != other._forwardBlockIdx {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
+
+extension PaddleMobile_Framework_Proto_ProgramDesc: SwiftProtobuf.Message, SwiftProtobuf._MessageImplementationBase, SwiftProtobuf._ProtoNameProviding {
+  static let protoMessageName: String = _protobuf_package + ".ProgramDesc"
+  static let _protobuf_nameMap: SwiftProtobuf._NameMap = [
+    1: .same(proto: "blocks"),
+  ]
+
+  public var isInitialized: Bool {
+    if !SwiftProtobuf.Internal.areAllInitialized(self.blocks) {return false}
+    return true
+  }
+
+  mutating func decodeMessage<D: SwiftProtobuf.Decoder>(decoder: inout D) throws {
+    while let fieldNumber = try decoder.nextFieldNumber() {
+      switch fieldNumber {
+      case 1: try decoder.decodeRepeatedMessageField(value: &self.blocks)
+      default: break
+      }
+    }
+  }
+
+  func traverse<V: SwiftProtobuf.Visitor>(visitor: inout V) throws {
+    if !self.blocks.isEmpty {
+      try visitor.visitRepeatedMessageField(value: self.blocks, fieldNumber: 1)
+    }
+    try unknownFields.traverse(visitor: &visitor)
+  }
+
+  func _protobuf_generated_isEqualTo(other: PaddleMobile_Framework_Proto_ProgramDesc) -> Bool {
+    if self.blocks != other.blocks {return false}
+    if unknownFields != other.unknownFields {return false}
+    return true
+  }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/framework/Dim.swift b/metal/paddle-mobile/paddle-mobile/framework/Dim.swift
new file mode 100644
index 0000000000000000000000000000000000000000..672484cd9d055bbe65a61d41017199dd79d6cdb2
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/framework/Dim.swift
@@ -0,0 +1,53 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+public struct Dim {
+    public init(inDim: [Int]) {
+        dims = inDim
+    }
+    
+    mutating func swapeDimAt(index1: Int, index2: Int) {
+        dims.swapAt(index1, index2)
+    }
+    
+    func cout() -> Int {
+        return dims.count
+    }
+    
+    func numel() -> Int {
+        return dims.reduce(1) { $0 * $1 }
+    }
+    
+    static func ==(left: Dim, right: Dim) -> Bool {
+        return left.dims == right.dims;
+    }
+    
+    subscript(index: Int) -> Int {
+        return dims[index];
+    }
+    
+    
+    private(set) var dims: [Int]
+    private init(){
+        fatalError()
+    }
+}
+
+extension Dim: CustomStringConvertible {
+    public var description: String {
+        return "\(dims)"
+    }
+}
diff --git a/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift b/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift
new file mode 100644
index 0000000000000000000000000000000000000000..7ffcd97f4418f17cd7085c5d03e8b58b45c623fd
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/framework/Tensor.swift
@@ -0,0 +1,262 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Accelerate
+import Foundation
+
+protocol Tensorial: CustomStringConvertible, CustomDebugStringConvertible{
+    var dim: Dim { get set }
+    func numel() -> Int
+    var layout: DataLayout { get }
+}
+
+extension Tensorial {
+    func numel() -> Int {
+        return dim.numel()
+    }
+}
+
+class Tensor<P: PrecisionType>: Tensorial {
+    enum BufferPrecision {
+        case Float32, Float16
+    }
+    
+    var data: Data
+    var dim: Dim
+    var buffer: MTLBuffer!
+    private(set) var layout: DataLayout
+    
+    class Data {
+        init(inSize: Int, inPointer: UnsafeMutablePointer<P>) {
+            size = inSize
+            pointer = inPointer
+        }
+        let size: Int
+        var pointer: UnsafeMutablePointer<P>
+        subscript(index: Int) -> P{
+            get {
+                return pointer[index]
+            }
+            set {
+                pointer[index] = newValue
+            }
+        }
+        func release() {
+            pointer.deinitialize(count: size)
+            pointer.deallocate()
+        }
+        deinit {
+//            release()
+        }
+    }
+ 
+    required init(inDim: Dim, inLayout: DataLayout = .NCHW) {
+        dim = inDim
+        let size = inDim.numel() * MemoryLayout<P>.size
+        let pointer = UnsafeMutablePointer<P>.allocate(capacity: size)
+        data = Data.init(inSize: size, inPointer: pointer)
+        layout = inLayout
+    }
+    
+    func convert(to: DataLayout) {
+        guard to != layout else {
+            return
+        }
+        
+        guard dim.cout() == 4 else {
+            return
+        }
+        
+        guard layout == .NCHW && to == .NHWC else {
+            // other not support
+            return
+        }
+        let newPointer = UnsafeMutablePointer<P>.allocate(capacity: data.size)
+        
+        if layout == .NCHW {
+            NCHW2NHWC(newPtr: newPointer)
+        }
+        
+        data.release()
+        data.pointer = newPointer
+        layout = to
+    }
+    
+    func float32ToFloat16(input: UnsafeMutablePointer<Float32>, output: UnsafeMutableRawPointer, count: Int) {
+        var float32Buffer = vImage_Buffer(data: input,  height: 1, width: UInt(count), rowBytes: count * 4)
+        var float16buffer = vImage_Buffer(data: output, height: 1, width: UInt(count), rowBytes: count * 2)
+        guard vImageConvert_PlanarFtoPlanar16F(&float32Buffer, &float16buffer, 0) == kvImageNoError else {
+            fatalError(" float 32 to float 16 error ! ")
+        }
+    }
+    
+    func initBuffer(device: MTLDevice, precision: BufferPrecision = .Float32) {
+        guard let floatPointer = data.pointer as? UnsafeMutablePointer<Float32> else {
+            fatalError(" not support yet ")
+        }
+        
+        
+        let precisionSize: Int
+        switch precision {
+        case .Float32:
+            precisionSize = 4
+        case .Float16:
+            precisionSize = 2
+        }
+        
+        if dim.cout() == 4 {
+            if layout == .NHWC {
+                let C = dim[3]
+                let cSlices = (C + 3) / 4
+                let paddedC = cSlices * 4
+                let count = paddedC * dim[0] * dim[1] * dim[2]
+                if C == paddedC {
+                    buffer = device.makeBuffer(length: count * precisionSize)
+                    switch precision {
+                    case .Float32:
+                        buffer?.contents().copyMemory(from: data.pointer, byteCount: count * MemoryLayout<P>.stride)
+                    case .Float16:
+                        float32ToFloat16(input: floatPointer, output: buffer.contents(), count: count)
+                    }
+                } else if C == 1 {
+                    buffer = device.makeBuffer(length: numel() * precisionSize)
+                    switch precision {
+                    case .Float32:
+                        buffer?.contents().copyMemory(from: data.pointer, byteCount: numel() * MemoryLayout<P>.stride)
+                    case .Float16:
+                        float32ToFloat16(input: floatPointer, output: buffer.contents(), count: numel())
+                    }
+                } else {
+                    buffer = device.makeBuffer(length: count * precisionSize)
+                    let convertedPointer = UnsafeMutablePointer<Float32>.allocate(capacity: count)
+                    var tmpPointer = floatPointer
+                    var dstPtr = convertedPointer
+                    for _ in 0..<dim[0] * dim[1] * dim[2] {
+                        for j in 0..<paddedC {
+                            if j < C {
+                                dstPtr[j] = tmpPointer[j]
+                            }
+                        }
+                        tmpPointer += C
+                        dstPtr += paddedC
+                    }
+                    
+                    switch precision {
+                    case .Float32:
+                        buffer?.contents().copyMemory(from: convertedPointer, byteCount: count * MemoryLayout<P>.stride)
+                    case .Float16:
+                        float32ToFloat16(input: convertedPointer, output: buffer.contents(), count: count)
+                    }
+                    
+                    convertedPointer.deinitialize(count: count)
+                    convertedPointer.deallocate()
+                }
+            }
+        } else if dim.cout() == 1 {
+            buffer = device.makeBuffer(length: numel() * precisionSize)
+            switch precision {
+            case .Float32:
+                buffer?.contents().copyMemory(from: data.pointer, byteCount: numel() * MemoryLayout<P>.stride)
+            case .Float16:
+                float32ToFloat16(input: floatPointer, output: buffer.contents(), count: numel())
+            }
+        } else {
+            fatalError(" not support !")
+        }
+        //TODO: release
+        data.release()
+    }
+    
+    var width: Int {
+        get {
+            if dim.cout() == 4 {
+                return dim[1]
+            } else {
+                fatalError()
+            }
+        }
+    }
+    
+    var height: Int {
+        get {
+            if dim.cout() == 4 {
+                return dim[2]
+            } else {
+                fatalError()
+            }
+        }
+    }
+    
+    var channel: Int {
+        get {
+            if dim.cout() == 4 {
+                return dim[3]
+            } else {
+                fatalError()
+            }
+        }
+    }
+
+    
+    func NCHW2NHWC(newPtr: UnsafeMutablePointer<P>) {
+        let N = dim[0]
+        let C = dim[1]
+        let H = dim[2]
+        let W = dim[3]
+        let HXW = H * W
+        let CXHXW = C * H * W
+        
+        var index: Int = 0
+        for n in 0..<N {
+            for h in 0..<H{
+                for w in 0..<W{
+                    for c in 0..<C{
+                        newPtr[index] = data.pointer[n * CXHXW + c * HXW + h * W + w]
+                        index += 1
+                    }
+                }
+            }
+        }
+        dim.swapeDimAt(index1: 1, index2: 3)
+    }
+}
+
+
+extension Tensor {
+    
+    var debugDescription: String {
+        var str = "dim: \(dim) \n"
+        str += "MTLBuffer: \(self.buffer) \n"
+        for i in 0..<buffer.length/MemoryLayout<P>.size {
+            str += " \(buffer.contents().assumingMemoryBound(to: P.self)[i])"
+        }
+        return str
+    }
+    
+    func logDataPointer(header: String = "") {
+        print(header)
+        var str = ""
+        str += "data size: \(data.size) \n"
+        str += "dim: \(dim) \n"
+        for i in 0..<numel() {
+            str += " \(data.pointer[i])"
+        }
+        print(str)
+    }
+    
+    var description: String {
+        return debugDescription
+    }
+    
+}
diff --git a/metal/paddle-mobile/paddle-mobile/framework/Texture.swift b/metal/paddle-mobile/paddle-mobile/framework/Texture.swift
new file mode 100644
index 0000000000000000000000000000000000000000..81894664c5dc4acb1a5edd4485543bb20a285ea4
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/framework/Texture.swift
@@ -0,0 +1,142 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Metal
+import Foundation
+
+class InputTexture {
+    let mtlTexture: MTLTexture
+    let expectDim: Dim
+    init(inMTLTexture: MTLTexture, inExpectDim: Dim) {
+        mtlTexture = inMTLTexture
+        expectDim = inExpectDim
+    }
+    
+}
+
+extension InputTexture {
+    var description: String {
+        get{
+            return mtlTexture.description
+        }
+    }
+    
+    var debugDescription: String {
+        get {
+            return mtlTexture.debugDescription ?? " MetalTexture "
+        }
+    }
+}
+
+public class Texture<P: PrecisionType>: Tensorial {
+    var dim: Dim
+    let textureDesc: MTLTextureDescriptor
+    var metalTexture: MTLTexture
+    
+    init(device: MTLDevice, inDim: Dim, inLayout: DataLayout = .NHWC) {
+        dim = inDim
+        layout = inLayout
+        let tmpTextureDes = MTLTextureDescriptor.init()
+        if inDim.cout() == 1 {
+            tmpTextureDes.width = inDim[0]
+            tmpTextureDes.textureType = .type1D
+        } else if inDim.cout() == 4 {
+            tmpTextureDes.height = inDim[1]
+            tmpTextureDes.width = inDim[2]
+//            print("n : \(inDim[0])")
+//            print(inDim[3] * inDim[0])
+            tmpTextureDes.depth = 1
+            tmpTextureDes.arrayLength = (inDim[3] * inDim[0] + 3)/4
+            tmpTextureDes.textureType = .type2DArray
+        } else if inDim.cout() == 2 {
+            tmpTextureDes.height = 1
+            tmpTextureDes.width = 1
+            tmpTextureDes.depth = 1
+            tmpTextureDes.arrayLength = (inDim[0] * inDim[1] + 3)/4
+            tmpTextureDes.textureType = .type2DArray
+        } else {
+            fatalError(" not suuprt ")
+        }
+        
+        if MemoryLayout<P>.size == 1 {
+            tmpTextureDes.pixelFormat = .rgba8Unorm
+        } else if MemoryLayout<P>.size == 2 {
+            tmpTextureDes.pixelFormat = .rgba16Float
+        } else if MemoryLayout<P>.size == 4 {
+//            tmpTextureDes.pixelFormat = .r32Float
+            tmpTextureDes.pixelFormat = .rgba32Float
+
+        }
+//        tmpTextureDes.pixelFormat = .rgba16Float
+
+        tmpTextureDes.usage = [.shaderRead, .shaderWrite]
+        tmpTextureDes.storageMode = .shared
+        textureDesc = tmpTextureDes
+        metalTexture = device.makeTexture(descriptor: tmpTextureDes) ?! " texture nil "
+    }
+    
+//    required public init(inDim: Dim, inLayout: DataLayout = .NHWC, inTexture: MTLTexture) {
+//        dim = inDim
+//        layout = inLayout
+//        metalTexture = inTexture
+//        let tmpTextureDes = MTLTextureDescriptor.init()
+//        
+//        if inDim.cout() == 1 {
+//            tmpTextureDes.width = inDim[0]
+//            tmpTextureDes.textureType = .type1D
+//        } else if inDim.cout() == 2 {
+//            tmpTextureDes.height = inDim[0]
+//            tmpTextureDes.width = inDim[1]
+//            tmpTextureDes.textureType = .type2D
+//        } else if inDim.cout() == 3 {
+//            fatalError(" not support texture dim 3")
+//        } else if inDim.cout() == 4 {
+//            tmpTextureDes.height = inDim[1]
+//            tmpTextureDes.width = inDim[2]
+//            tmpTextureDes.depth = inDim[3] * inDim[1]
+//            tmpTextureDes.textureType = .type2DArray
+//        }
+//        
+//        tmpTextureDes.pixelFormat = .r32Float
+//        tmpTextureDes.storageMode = .shared
+//        textureDesc = tmpTextureDes
+//        let device = MTLCreateSystemDefaultDevice()
+//        metalTexture = device!.makeTexture(descriptor: tmpTextureDes)!
+//    }
+    
+//    init() {
+//        dim = Dim.init(inDim: [])
+//        layout = .NCHW
+//        let device = MTLCreateSystemDefaultDevice()
+//        textureDesc = MTLTextureDescriptor.init()
+//        metalTexture = device!.makeTexture(descriptor: textureDesc)!
+//    }
+    
+    private(set) var layout: DataLayout
+}
+
+extension Texture {
+    public var description: String {
+        return debugDescription
+    }
+    
+    public var debugDescription: String{
+        var str = ""
+        str += "Dim: \(dim) \n value:[ "
+        str += "\(metalTexture)"
+        str += " ]"
+        return str
+    }
+    
+}
diff --git a/metal/paddle-mobile/paddle-mobile/paddle_mobile.h b/metal/paddle-mobile/paddle-mobile/paddle_mobile.h
new file mode 100644
index 0000000000000000000000000000000000000000..ffa44be38a4c3a1f3109c51b3d15506591f2de2e
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/paddle_mobile.h
@@ -0,0 +1,25 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+#pragma once
+
+#import <UIKit/UIKit.h>
+
+//! Project version number for paddle_mobile.
+FOUNDATION_EXPORT double paddle_mobileVersionNumber;
+
+//! Project version string for paddle_mobile.
+FOUNDATION_EXPORT const unsigned char paddle_mobileVersionString[];
+
+
diff --git a/src/common/types.cpp b/src/common/types.cpp
index b6387503856f438acd74b8d147da13a2b009f2a1..e06e9965c4108988ed9e6675f7a012631e81049f 100644
--- a/src/common/types.cpp
+++ b/src/common/types.cpp
@@ -50,6 +50,9 @@ const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU =
     "fusion_elementwise_add_relu";
 const char *G_OP_TYPE_FUSION_FC_RELU = "fusion_fc_relu";
 const char *G_OP_TYPE_REGION = "region";
+const char *G_OP_TYPE_FUSION_CONV_BN = "fusion_conv_bn";
+const char *G_OP_TYPE_CONV_TRANSPOSE = "conv2d_transpose";
+const char *G_OP_TYPE_PRELU = "prelu";
 
 std::unordered_map<
     std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
@@ -57,6 +60,7 @@ std::unordered_map<
         {G_OP_TYPE_CONV, {{"Input"}, {"Output"}}},
         {G_OP_TYPE_FUSION_DWCONV_BN_RELU, {{"Input"}, {"Out"}}},
         {G_OP_TYPE_FUSION_CONV_BN_RELU, {{"Input"}, {"Out"}}},
+        {G_OP_TYPE_PRELU, {{"X", "Alpha"}, {"Out"}}},
         {G_OP_TYPE_FUSION_CONV_ADD, {{"Input"}, {"Out"}}},
         {G_OP_TYPE_RELU, {{"X"}, {"Out"}}},
         {G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}},
@@ -85,6 +89,8 @@ std::unordered_map<
         {G_OP_TYPE_FUSION_POOL_BN, {{"X"}, {"Y"}}},
         {G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU, {{"X", "Y"}, {"Out"}}},
         {G_OP_TYPE_FUSION_FC_RELU, {{"X", "Y", "Z"}, {"Out"}}},
-        {G_OP_TYPE_REGION, {{"X"}, {"Out"}}}};
+        {G_OP_TYPE_REGION, {{"X"}, {"Out"}}},
+        {G_OP_TYPE_FUSION_CONV_BN, {{"Input"}, {"Y"}}},
+        {G_OP_TYPE_CONV_TRANSPOSE, {{"Input"}, {"Output"}}}};
 
 }  // namespace paddle_mobile
diff --git a/src/common/types.h b/src/common/types.h
index 6066879305d5ea7d1b6dcb0bb618c234338cc171..bab169977135ce4f572bf4242837ed39588cc97b 100644
--- a/src/common/types.h
+++ b/src/common/types.h
@@ -113,6 +113,9 @@ extern const char *G_OP_TYPE_FUSION_POOL_BN;
 extern const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU;
 extern const char *G_OP_TYPE_FUSION_FC_RELU;
 extern const char *G_OP_TYPE_REGION;
+extern const char *G_OP_TYPE_FUSION_CONV_BN;
+extern const char *G_OP_TYPE_CONV_TRANSPOSE;
+extern const char *G_OP_TYPE_PRELU;
 
 extern std::unordered_map<
     std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
diff --git a/src/fpga/api.cpp b/src/fpga/api.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..01c610ce5b445bc603da3c0dc43ad21c35d95ae6
--- /dev/null
+++ b/src/fpga/api.cpp
@@ -0,0 +1,168 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <algorithm>
+#include <cmath>
+#include <cstdio>
+#include <cstring>
+
+#include "api.h"
+
+#define FPGA_TEST_MODE
+#ifdef FPGA_TEST_MODE
+#include "common/log.h"
+#endif
+
+namespace paddle_mobile {
+namespace fpga {
+
+static int fd = -1;
+static const char *device_path = "/dev/fpgadrv0";
+
+static inline int do_ioctl(int req, const void *arg) {
+#ifdef PADDLE_MOBILE_OS_LINUX
+  return ioctl(req, (unsigned int64_t)arg);
+#else
+  return -1;
+#endif
+}
+
+int open_device() {
+  if (fd == -1) {
+    fd = open(device_path, O_RDWR);
+  }
+  return fd;
+}
+
+// memory management;
+void *fpga_malloc(size_t size) {
+#ifdef PADDLE_MOBILE_OS_LINUX
+  return reinterpret_cast<void *>(
+      mmap64(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0));
+#else
+  return malloc(size);
+#endif
+}
+
+void fpga_free(void *ptr) {
+#ifdef PADDLE_MOBILE_OS_LINUX
+  munmap(ptr, 0);
+#else
+  free(ptr);
+#endif
+}
+
+void fpga_copy(void *dest, const void *src, size_t num) {
+  memcpy(dest, src, num);
+}
+
+int ComputeFpgaConv(const struct ConvArgs &args) {
+#ifdef FPGA_TEST_MODE
+  DLOG << "   relu_enabled:" << args.relu_enabled
+       << "   sb_address:" << args.sb_address
+       << "   filter_address:" << args.filter_address
+       << "   filter_num:" << args.filter_num
+       << "   group_num:" << args.group_num;
+  DLOG << "   image_address:" << args.image.address
+       << "   image_scale_address:" << args.image.scale_address
+       << "   image_channels:" << args.image.channels
+       << "   image_height:" << args.image.height
+       << "   image_width:" << args.image.width
+       << "   pad_height:" << args.image.pad_height
+       << "   pad_width:" << args.image.pad_width;
+  DLOG << "   kernel_height:" << args.kernel.height
+       << "   kernel_width:" << args.kernel.width
+       << "   stride_h:" << args.kernel.stride_h
+       << "   stride_w:" << args.kernel.stride_w;
+  DLOG << "   out_address:" << args.output.address
+       << "   out_scale_address:" << args.output.scale_address;
+#endif
+
+  return do_ioctl(IOCTL_CONFIG_CONV, &args);
+}
+
+int ComputeFpgaPool(const struct PoolingArgs &args) {
+#ifdef FPGA_TEST_MODE
+  DLOG << "   image_address:" << args.image.address
+       << "   image_scale_address:" << args.image.scale_address
+       << "   image_channels:" << args.image.channels
+       << "   image_height:" << args.image.height
+       << "   image_width:" << args.image.width
+       << "   pad_height:" << args.image.pad_height
+       << "   pad_width:" << args.image.pad_width;
+  DLOG << "   kernel_height:" << args.kernel.height
+       << "   kernel_width:" << args.kernel.width
+       << "   stride_h:" << args.kernel.stride_h
+       << "   stride_w:" << args.kernel.stride_w;
+  DLOG << "   out_address:" << args.output.address
+       << "   out_scale_address:" << args.output.scale_address;
+#endif
+
+  return do_ioctl(IOCTL_CONFIG_POOLING, &args);
+}
+
+int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
+#ifdef FPGA_TEST_MODE
+  DLOG << "   relu_enabled:" << args.relu_enabled << "   const0:" << args.const0
+       << "   const1:" << args.const1;
+  DLOG << "   image0_address:" << args.image0.address
+       << "   image0_scale_address:" << args.image0.scale_address
+       << "   image0_channels:" << args.image0.channels
+       << "   image0_height:" << args.image0.height
+       << "   image0_width:" << args.image0.width
+       << "   pad0_height:" << args.image0.pad_height
+       << "   pad0_width:" << args.image0.pad_width;
+  DLOG << "   image1_address:" << args.image1.address
+       << "   image1_scale_address:" << args.image1.scale_address
+       << "   image1_channels:" << args.image1.channels
+       << "   image1_height:" << args.image1.height
+       << "   image1_width:" << args.image1.width
+       << "   pad1_height:" << args.image1.pad_height
+       << "   pad_width:" << args.image1.pad_width;
+  DLOG << "   out_address:" << args.output.address
+       << "   out_scale_address:" << args.output.scale_address;
+#endif
+
+  return do_ioctl(IOCTL_CONFIG_EW, &args);
+}
+int PerformBypass(const struct BypassArgs &args) {
+#ifdef FPGA_TEST_MODE
+  DLOG << "   layout_type:" << args.layout_type
+       << "   convert_type:" << args.convert_type;
+  DLOG << "   image_address:" << args.image.address
+       << "   image_scale_address:" << args.image.scale_address
+       << "   image_channels:" << args.image.channels
+       << "   image_height:" << args.image.height
+       << "   image_width:" << args.image.width
+       << "   pad_height:" << args.image.pad_height
+       << "   pad_width:" << args.image.pad_width;
+  DLOG << "   out_address:" << args.output.address
+       << "   out_scale_address:" << args.output.scale_address;
+#endif
+
+  return do_ioctl(IOCTL_CONFIG_BYPASS, &args);
+}
+
+}  // namespace fpga
+}  // namespace paddle_mobile
diff --git a/src/fpga/api/fpga_api.h b/src/fpga/api.h
similarity index 100%
rename from src/fpga/api/fpga_api.h
rename to src/fpga/api.h
diff --git a/src/fpga/api/fpga_api.cpp b/src/fpga/api/fpga_api.cpp
deleted file mode 100644
index 779c846d1f3c465e5113f805b2b3856a1a7894c5..0000000000000000000000000000000000000000
--- a/src/fpga/api/fpga_api.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include <errno.h>
-#include <fcntl.h>
-#include <pthread.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <algorithm>
-#include <cmath>
-#include <cstdio>
-#include <cstring>
-
-#include "fpga/api/fpga_api.h"
-
-namespace paddle_mobile {
-namespace fpga {
-
-static int fd = -1;
-static const char *device_path = "/dev/fpgadrv0";
-
-static inline int do_ioctl(int req, const void *arg) {
-  return ioctl(req, (unsigned int64_t)arg);
-}
-
-int open_device() {
-  if (fd == -1) {
-    fd = open(device_path, O_RDWR);
-  }
-  return fd;
-}
-
-// memory management;
-void *fpga_malloc(size_t size) {
-  return reinterpret_cast<void *>(
-      mmap64(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0));
-}
-
-void fpga_free(void *ptr) { munmap(ptr, 0); }
-
-void fpga_copy(void *dest, const void *src, size_t num) {
-  memcpy(dest, src, num);
-}
-
-int ComputeFpgaConv(const struct ConvArgs &args) {
-  return do_ioctl(IOCTL_CONFIG_CONV, &args);
-}
-int ComputeFpgaPool(const struct PoolingArgs &args) {
-  return do_ioctl(IOCTL_CONFIG_POOLING, &args);
-}
-int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
-  return do_ioctl(IOCTL_CONFIG_EW, &args);
-}
-int PerformBypass(const struct BypassArgs &args) {
-  return do_ioctl(IOCTL_CONFIG_BYPASS, &args);
-}
-
-}  // namespace fpga
-}  // namespace paddle_mobile
diff --git a/src/fpga/fpga_quantilization.cpp b/src/fpga/fpga_quantilization.cpp
deleted file mode 100644
index 34033a60a683183695a79bfafbaf14223e2eebf2..0000000000000000000000000000000000000000
--- a/src/fpga/fpga_quantilization.cpp
+++ /dev/null
@@ -1,95 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "fpga/fpga_quantilization.h"
-#include <algorithm>
-
-namespace paddle_mobile {
-namespace fpga {
-
-template <typename Dtype>
-static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel,
-                       int height, int width) {
-  int offset_height = 0;
-
-  for (int n = 0; n < num; n++) {
-    int amount_per_row = width * channel;
-    for (int c = 0; c < channel; c++) {
-      for (int h = 0; h < height; h++) {
-        int offset_height = h * amount_per_row;
-        for (int w = 0; w < width; w++) {
-          *(data_out + offset_height + w * channel + c) = *(data_in++);
-        }
-      }
-    }
-    data_out += num;
-  }
-}
-
-template <typename Dtype>
-static Dtype find_max(Dtype* data, int num) {
-  Dtype max = 0;
-  for (int i = 0; i < num; ++i) {
-    max = std::max(max, data[i]);
-  }
-  return max;
-}
-
-template <typename Dtype>
-framework::Tensor* quantilize_filter(framework::Tensor* filter) {
-  float scale = 0;
-  float fix_range = static_cast<float>((1 << (8 - 1)) - 1);
-
-  const int batch_size = filter->dims()[0];
-  const int channel = filter->dims()[1];
-  const int height = filter->dims()[2];
-  const int width = filter->dims()[3];
-
-  int8_t* int_data = nullptr;
-  int8_t* tmp_data = new int[filter->numel()];
-
-  // 32bit filter -> 8bit filter;
-  if (filter->type() == typeid(float)) {
-    float* float_data = filter->data<float>();
-    float max = find_max(float_data, filter->numel());
-
-    scale = (max / fix_range);
-
-    framework::Tensor* filter = filter;
-    framework::Tensor* quant_filter = new framework::Tensor();
-
-    int_data = quant_filter->mutable_data<int8_t>();
-    for (int i = 0; i < filter->numel(); ++i) {
-      tmp_data[i] = (int8_t)float_data[i] * scale;
-    }
-    filter = quant_filter;
-  } else {
-    int8_t max = find_max(filter->data<int8_t>(), filter->numel());
-    scale = (max / fix_range);
-
-    int_data = filter->data<int8_t>();
-    for (int i = 0; i < filter->numel(); ++i) {
-      tmp_data[i] = int_data[i];
-    }
-    int_data = filter->mutable_data<int8_t>();
-  }
-  // NCHW -> NHWC;
-  chw_to_hwc<int8_t>(tmp_data, int_data, batch_size, channel, height, width);
-  delete tmp_data;
-  *(filter->fpga_args().scale_pointer()) = scale;
-  return filter;
-}
-
-}  // namespace fpga
-}  // namespace paddle_mobile
diff --git a/src/fpga/quantization.cpp b/src/fpga/quantization.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..44994d4c353490b533110d0965fb63b4fb5c7aa2
--- /dev/null
+++ b/src/fpga/quantization.cpp
@@ -0,0 +1,92 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "fpga/quantization.h"
+#include <algorithm>
+
+namespace paddle_mobile {
+namespace fpga {
+
+template <typename Dtype>
+static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int64_t num,
+                       int64_t channel, int64_t height, int64_t width) {
+  for (int n = 0; n < num; n++) {
+    int64_t amount_per_row = width * channel;
+    for (int c = 0; c < channel; c++) {
+      for (int h = 0; h < height; h++) {
+        int64_t offset_height = h * amount_per_row;
+        for (int w = 0; w < width; w++) {
+          *(data_out + offset_height + w * channel + c) = *(data_in++);
+        }
+      }
+    }
+    data_out += num;
+  }
+}
+
+template <typename Dtype>
+static Dtype find_max(Dtype* data, int64_t num) {
+  Dtype max = 0;
+  for (int i = 0; i < num; ++i) {
+    Dtype value = data[i];
+    Dtype abs = value > 0 ? value : -value;
+    max = std::max(max, abs);
+  }
+  return max;
+}
+
+// template <typename Dtype>
+void quantize_filter(framework::Tensor* filter) {
+  DLOG << "quantilize_filter........" << filter->dims();
+
+  float scale = 0;
+  auto fix_range = static_cast<float>(std::pow(2, 8 - 1) - 1);
+
+  auto* tmp_data = new int8_t[filter->numel()];
+
+  // 32bit filter -> 8bit filter;
+  if (filter->type() == typeid(float)) {
+    auto* float_data = filter->data<float>();
+    auto max = find_max<float>(float_data, filter->numel());
+
+    scale = (fix_range / max);
+    DLOG << "scale:" << scale;
+
+    for (int i = 0; i < filter->numel(); ++i) {
+      tmp_data[i] = (int8_t)(float_data[i] * scale);
+    }
+  } else {
+    auto max = find_max<int8_t>(filter->data<int8_t>(), filter->numel());
+    scale = (fix_range / max);
+    std::memcpy(tmp_data, filter->data<int8_t>(), (size_t)filter->numel());
+  }
+
+  if (filter->dims().size() == 4) {
+    const auto batch_size = filter->dims()[0];
+    const auto channel = filter->dims()[1];
+    const auto height = filter->dims()[2];
+    const auto width = filter->dims()[3];
+    chw_to_hwc<int8_t>(tmp_data, filter->mutable_data<int8_t>(), batch_size,
+                       channel, height, width);
+  } else if (filter->dims().size() == 2) {
+    std::memcpy(filter->mutable_data<int8_t>(), tmp_data,
+                (size_t)filter->numel());
+  }
+
+  delete tmp_data;
+  filter->SetFpgaScale(scale);
+}
+
+}  // namespace fpga
+}  // namespace paddle_mobile
diff --git a/src/fpga/fpga_quantilization.h b/src/fpga/quantization.h
similarity index 79%
rename from src/fpga/fpga_quantilization.h
rename to src/fpga/quantization.h
index 8dacd20abdc85da05a451ec763fd01f03f8f4516..0d6c2405fccd814f73d44eef20b6735dc0ad0eab 100644
--- a/src/fpga/fpga_quantilization.h
+++ b/src/fpga/quantization.h
@@ -21,10 +21,10 @@ namespace paddle_mobile {
 namespace fpga {
 
 template <typename Dtype>
-static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int num, int channel,
-                       int height, int width);
+static void chw_to_hwc(Dtype* data_in, Dtype* data_out, int64_t num,
+                       int64_t channel, int64_t height, int64_t width);
+
+void quantize_filter(framework::Tensor* filter);
 
-template <typename Dtype>
-framework::Tensor* quantilize_filter(framework::Tensor* filter);
 }  // namespace fpga
 }  // namespace paddle_mobile
diff --git a/src/framework/tensor.h b/src/framework/tensor.h
index 797fcf5bffbe5e738fe352d1ca84602f0e5d86a0..6fc16a01a2874f04ecea3edb89774f4deea93dd5 100644
--- a/src/framework/tensor.h
+++ b/src/framework/tensor.h
@@ -64,7 +64,8 @@ struct SizeOfTypeFunctor<HEAD, TAIL...> {
 };
 
 static inline size_t SizeOfType(std::type_index type) {
-  SizeOfTypeFunctor<int, half, float, double, int16_t, int64_t, bool, size_t>
+  SizeOfTypeFunctor<int8_t, int, half, float, double, int16_t, int64_t, bool,
+                    size_t>
       functor;
   size_t size = functor(type);
 
@@ -115,8 +116,8 @@ class Tensor {
     PADDLE_MOBILE_ENFORCE(
         (std::is_same<T, void>::value ||
          holder_->type().hash_code() == typeid(T).hash_code()),
-        "Tensor holds the wrong type, it holds %s",
-        this->holder_->type().name());
+        "Tensor holds the wrong type, it holds %s ,requested:%s",
+        this->holder_->type().name(), typeid(T).name());
 
     return reinterpret_cast<const T *>(
         reinterpret_cast<uintptr_t>(holder_->ptr()) + offset_);
@@ -255,14 +256,26 @@ class Tensor {
 
 #ifdef PADDLE_MOBILE_FPGA
   struct FPGAArgs {
-    float scale;
+    friend class Tensor;
+
+    inline float *scale_pointer() { return scale_; }
+    inline float scale() { return *scale_; }
 
-    inline float *scale_pointer() { return &scale; }
+   private:
+    float *scale_;
   };
 
   struct FPGAArgs fpga_args() const {
-    return fpgaArgs_;
+    FPGAArgs args;
+    args.scale_ = scale.get();
+    return args;
   }
+
+  void SetFpgaScale(float s) { *(scale.get()) = s; }
+
+ private:
+  std::shared_ptr<float> scale = std::make_shared<float>(0);
+
 #endif
 
  private:
@@ -331,10 +344,6 @@ class Tensor {
    * begins.
    */
   size_t offset_;
-
-#ifdef PADDLE_MOBILE_FPGA
-  FPGAArgs fpgaArgs_;
-#endif
 };
 
 #ifdef PADDLE_MOBILE_DEBUG
@@ -342,9 +351,12 @@ inline Print &operator<<(Print &printer, const Tensor &tensor) {
   printer << " dims: " << tensor.dims() << "\n";
   int stride = tensor.numel() / 20;
   stride = stride > 0 ? stride : 1;
+#ifndef PADDLE_MOBILE_FPGA
   for (int i = 0; i < tensor.numel(); i += stride) {
     printer << tensor.data<float>()[i] << " ";
   }
+#endif
+
   return printer;
 }
 
diff --git a/src/io/executor.cpp b/src/io/executor.cpp
index d6434b64aa752fd62bc637a882298228d59880b8..91005287055b7af859d738ea20c40abbf5f7db96 100644
--- a/src/io/executor.cpp
+++ b/src/io/executor.cpp
@@ -89,7 +89,6 @@ Executor<Dtype, P>::Executor(const framework::Program<Dtype> p, int batch_size,
   } else {
     InitMemory();
   }
-
   std::shared_ptr<framework::BlockDesc> to_predict_block =
       to_predict_program_->Block(0);
   auto &ops = ops_of_block_[*to_predict_block.get()];
@@ -193,8 +192,14 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
     }
     *data += (memory_size * sizeof(uint8_t));
   } else {
-    for (int n = 0; n < memory_size * type_size; ++n) {
-      static_cast<char *>(memory)[n] = (*data)[n];
+    for (int n = 0; n < memory_size; n++) {
+      float value;
+      memcpy(&value, *data + n * type_size, type_size);
+      if (value < 1e-30 && value > -1e-30) {
+        static_cast<float *>(memory)[n] = 0.0;
+      } else {
+        static_cast<float *>(memory)[n] = value;
+      }
     }
     (*data) += (sizeof(char) * memory_size * type_size);
   }
diff --git a/src/jni/PML.java b/src/jni/PML.java
new file mode 100644
index 0000000000000000000000000000000000000000..717d9ebb972a2ba36aec33ff59868ff8f0530c5b
--- /dev/null
+++ b/src/jni/PML.java
@@ -0,0 +1,63 @@
+package com.baidu.paddle;
+
+public class PML {
+    /**
+     * load seperated model
+     *
+     * @param modelDir model dir
+     * @return isloadsuccess
+     */
+    public static native boolean load(String modelDir);
+
+    /**
+     * load combined model
+     *
+     * @param modelPath model file path
+     * @param paramPath param file path
+     * @return isloadsuccess
+     */
+    public static native boolean loadCombined(String modelPath, String paramPath);
+
+    /**
+     * load model and qualified params
+     *
+     * @param modelDir qualified model dir
+     * @return isloadsuccess
+     */
+    public static native boolean loadQualified(String modelDir);
+
+    /**
+     * load model and qualified combined params
+     *
+     * @param modelPath model file path
+     * @param paramPath qualified param path
+     * @return isloadsuccess
+     */
+    public static native boolean loadCombinedQualified(String modelPath, String paramPath);
+
+    /**
+     * predict image
+     *
+     * @param buf   of pretreated image (as your model like)
+     * @param ddims format of your input
+     * @return result
+     */
+    public static native float[] predictImage(float[] buf, int[] ddims);
+
+
+    public static native float[] predictYuv(byte[] buf, int imgWidth, int imgHeight, int[] ddims, float[] meanValues);
+
+    /**
+     * clear model data
+     */
+    public static native void clear();
+
+    /**
+     * setThread num when u enable openmp
+     *
+     * @param threadCount threadCount
+     */
+    public static native void setThread(int threadCount);
+
+
+}
diff --git a/src/jni/paddle_mobile_jni.cpp b/src/jni/paddle_mobile_jni.cpp
index c8ed491672920d85adafa28316663ede64a6dcc9..1b909532e96d4337d620fb0b7cf562ee35a3dc72 100644
--- a/src/jni/paddle_mobile_jni.cpp
+++ b/src/jni/paddle_mobile_jni.cpp
@@ -20,6 +20,12 @@ limitations under the License. */
 #include "framework/tensor.h"
 #include "io/paddle_mobile.h"
 
+#ifdef ENABLE_EXCEPTION
+
+#include "common/enforce.h"
+
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -33,17 +39,10 @@ using std::string;
 
 extern const char *ANDROID_LOG_TAG =
     "paddle_mobile LOG built on " __DATE__ " " __TIME__;
-static PaddleMobile<CPU> *shared_paddle_mobile_instance = nullptr;
+paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+static std::mutex shared_mutex;
 
-// toDo mutex lock
-// static std::mutex shared_mutex;
-
-PaddleMobile<CPU> *getPaddleMobileInstance() {
-  if (nullptr == shared_paddle_mobile_instance) {
-    shared_paddle_mobile_instance = new PaddleMobile<CPU>();
-  }
-  return shared_paddle_mobile_instance;
-}
+PaddleMobile<CPU> *getPaddleMobileInstance() { return &paddle_mobile; }
 
 string jstring2cppstring(JNIEnv *env, jstring jstr) {
   const char *cstr = env->GetStringUTFChars(jstr, 0);
@@ -55,43 +54,144 @@ string jstring2cppstring(JNIEnv *env, jstring jstr) {
 JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_load(JNIEnv *env,
                                                           jclass thiz,
                                                           jstring modelPath) {
+  std::lock_guard<std::mutex> lock(shared_mutex);
   ANDROIDLOGI("load invoked");
   bool optimize = true;
-  return getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
-                                         optimize);
+  bool isLoadOk = false;
+
+#ifdef ENABLE_EXCEPTION
+  try {
+    isLoadOk = getPaddleMobileInstance()->Load(
+        jstring2cppstring(env, modelPath), optimize);
+  } catch (paddle_mobile::PaddleMobileException &e) {
+    ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
+    isLoadOk = false;
+  }
+#else
+  isLoadOk = getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
+                                             optimize);
+#endif
+  return static_cast<jboolean>(isLoadOk);
 }
 
 JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_loadQualified(
     JNIEnv *env, jclass thiz, jstring modelPath) {
+  std::lock_guard<std::mutex> lock(shared_mutex);
+
   ANDROIDLOGI("loadQualified invoked");
   bool optimize = true;
   bool qualified = true;
-  return getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
-                                         optimize, qualified);
+  bool isLoadOk = false;
+
+#ifdef ENABLE_EXCEPTION
+  try {
+    isLoadOk = getPaddleMobileInstance()->Load(
+        jstring2cppstring(env, modelPath), optimize, qualified);
+  } catch (paddle_mobile::PaddleMobileException &e) {
+    ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
+    isLoadOk = false;
+  }
+#else
+  isLoadOk = getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
+                                             optimize, qualified);
+#endif
+
+  return static_cast<jboolean>(isLoadOk);
 }
 
 JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_loadCombined(
     JNIEnv *env, jclass thiz, jstring modelPath, jstring paramPath) {
+  std::lock_guard<std::mutex> lock(shared_mutex);
   ANDROIDLOGI("loadCombined invoked");
   bool optimize = true;
-  return getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
-                                         jstring2cppstring(env, paramPath),
-                                         optimize);
+  bool isLoadOk = false;
+
+#ifdef ENABLE_EXCEPTION
+  try {
+    isLoadOk = getPaddleMobileInstance()->Load(
+        jstring2cppstring(env, modelPath), jstring2cppstring(env, paramPath),
+        optimize);
+  } catch (paddle_mobile::PaddleMobileException &e) {
+    ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
+    isLoadOk = false;
+  }
+#else
+  isLoadOk = getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
+                                             jstring2cppstring(env, paramPath),
+                                             optimize);
+#endif
+  return static_cast<jboolean>(isLoadOk);
 }
 
 JNIEXPORT jboolean JNICALL Java_com_baidu_paddle_PML_loadCombinedQualified(
     JNIEnv *env, jclass thiz, jstring modelPath, jstring paramPath) {
+  std::lock_guard<std::mutex> lock(shared_mutex);
   ANDROIDLOGI("loadCombinedQualified invoked");
   bool optimize = true;
   bool qualified = true;
-  return getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
-                                         jstring2cppstring(env, paramPath),
-                                         optimize, qualified);
+  bool isLoadOk = false;
+
+#ifdef ENABLE_EXCEPTION
+  try {
+    isLoadOk = getPaddleMobileInstance()->Load(
+        jstring2cppstring(env, modelPath), jstring2cppstring(env, paramPath),
+        optimize, qualified);
+  } catch (paddle_mobile::PaddleMobileException &e) {
+    ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
+    isLoadOk = false;
+  }
+#else
+  isLoadOk = getPaddleMobileInstance()->Load(jstring2cppstring(env, modelPath),
+                                             jstring2cppstring(env, paramPath),
+                                             optimize, qualified);
+#endif
+  return static_cast<jboolean>(isLoadOk);
 }
 
 JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
     JNIEnv *env, jclass thiz, jfloatArray buf, jintArray ddims) {
+  std::lock_guard<std::mutex> lock(shared_mutex);
+
   ANDROIDLOGI("predictImage invoked");
+  jfloatArray result = NULL;
+
+#ifdef ENABLE_EXCEPTION
+  ANDROIDLOGE("ENABLE_EXCEPTION!");
+
+  try {
+    jsize ddim_size = env->GetArrayLength(ddims);
+    if (ddim_size != 4) {
+      ANDROIDLOGE("ddims size not equal to 4");
+    }
+    jint *ddim_ptr = env->GetIntArrayElements(ddims, NULL);
+    framework::DDim ddim = framework::make_ddim(
+        {ddim_ptr[0], ddim_ptr[1], ddim_ptr[2], ddim_ptr[3]});
+    int length = framework::product(ddim);
+    int count = 0;
+    float *dataPointer = nullptr;
+    if (nullptr != buf) {
+      dataPointer = env->GetFloatArrayElements(buf, NULL);
+    }
+    framework::Tensor input;
+    input.Resize(ddim);
+    auto input_ptr = input.mutable_data<float>();
+    for (int i = 0; i < length; i++) {
+      input_ptr[i] = dataPointer[i];
+    }
+    auto output = getPaddleMobileInstance()->Predict(input);
+    count = output->numel();
+    result = env->NewFloatArray(count);
+    env->SetFloatArrayRegion(result, 0, count, output->data<float>());
+    env->ReleaseIntArrayElements(ddims, ddim_ptr, 0);
+    env->DeleteLocalRef(ddims);
+    env->ReleaseFloatArrayElements(buf, dataPointer, 0);
+    env->DeleteLocalRef(buf);
+
+  } catch (paddle_mobile::PaddleMobileException &e) {
+    ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
+  }
+
+#else
   jsize ddim_size = env->GetArrayLength(ddims);
   if (ddim_size != 4) {
     ANDROIDLOGE("ddims size not equal to 4");
@@ -100,7 +200,6 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
   framework::DDim ddim = framework::make_ddim(
       {ddim_ptr[0], ddim_ptr[1], ddim_ptr[2], ddim_ptr[3]});
   int length = framework::product(ddim);
-  jfloatArray result = NULL;
   int count = 0;
   float *dataPointer = nullptr;
   if (nullptr != buf) {
@@ -112,12 +211,19 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictImage(
   for (int i = 0; i < length; i++) {
     input_ptr[i] = dataPointer[i];
   }
-  auto output = shared_paddle_mobile_instance->Predict(input);
+  auto output = getPaddleMobileInstance()->Predict(input);
   count = output->numel();
   result = env->NewFloatArray(count);
   env->SetFloatArrayRegion(result, 0, count, output->data<float>());
   env->ReleaseIntArrayElements(ddims, ddim_ptr, 0);
+  env->DeleteLocalRef(ddims);
+  env->ReleaseFloatArrayElements(buf, dataPointer, 0);
+  env->DeleteLocalRef(buf);
+  env->DeleteLocalRef(dataPointer);
+#endif
+
   ANDROIDLOGI("predictImage finished");
+
   return result;
 }
 
@@ -170,7 +276,48 @@ void convert_nv21_to_matrix(uint8_t *nv21, float *matrix, int width, int height,
 JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictYuv(
     JNIEnv *env, jclass thiz, jbyteArray yuv_, jint imgwidth, jint imgHeight,
     jintArray ddims, jfloatArray meanValues) {
+  std::lock_guard<std::mutex> lock(shared_mutex);
+
   ANDROIDLOGI("predictYuv invoked");
+  jfloatArray result = NULL;
+
+#ifdef ENABLE_EXCEPTION
+  try {
+    jsize ddim_size = env->GetArrayLength(ddims);
+    if (ddim_size != 4) {
+      ANDROIDLOGE("ddims size not equal to 4");
+    }
+    jint *ddim_ptr = env->GetIntArrayElements(ddims, NULL);
+    framework::DDim ddim = framework::make_ddim(
+        {ddim_ptr[0], ddim_ptr[1], ddim_ptr[2], ddim_ptr[3]});
+    int length = framework::product(ddim);
+    float matrix[length];
+    jbyte *yuv = env->GetByteArrayElements(yuv_, NULL);
+    float *meansPointer = nullptr;
+    if (nullptr != meanValues) {
+      meansPointer = env->GetFloatArrayElements(meanValues, NULL);
+    }
+    convert_nv21_to_matrix((uint8_t *)yuv, matrix, imgwidth, imgHeight, ddim[3],
+                           ddim[2], meansPointer);
+    int count = 0;
+    framework::Tensor input;
+    input.Resize(ddim);
+    auto input_ptr = input.mutable_data<float>();
+    for (int i = 0; i < length; i++) {
+      input_ptr[i] = matrix[i];
+    }
+    auto output = getPaddleMobileInstance()->Predict(input);
+    count = output->numel();
+    result = env->NewFloatArray(count);
+    env->SetFloatArrayRegion(result, 0, count, output->data<float>());
+    env->ReleaseByteArrayElements(yuv_, yuv, 0);
+    env->ReleaseIntArrayElements(ddims, ddim_ptr, 0);
+    env->ReleaseFloatArrayElements(meanValues, meansPointer, 0);
+    ANDROIDLOGI("predictYuv finished");
+  } catch (paddle_mobile::PaddleMobileException &e) {
+    ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
+  }
+#else
   jsize ddim_size = env->GetArrayLength(ddims);
   if (ddim_size != 4) {
     ANDROIDLOGE("ddims size not equal to 4");
@@ -187,7 +334,6 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictYuv(
   }
   convert_nv21_to_matrix((uint8_t *)yuv, matrix, imgwidth, imgHeight, ddim[3],
                          ddim[2], meansPointer);
-  jfloatArray result = NULL;
   int count = 0;
   framework::Tensor input;
   input.Resize(ddim);
@@ -195,7 +341,7 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictYuv(
   for (int i = 0; i < length; i++) {
     input_ptr[i] = matrix[i];
   }
-  auto output = shared_paddle_mobile_instance->Predict(input);
+  auto output = getPaddleMobileInstance()->Predict(input);
   count = output->numel();
   result = env->NewFloatArray(count);
   env->SetFloatArrayRegion(result, 0, count, output->data<float>());
@@ -203,19 +349,44 @@ JNIEXPORT jfloatArray JNICALL Java_com_baidu_paddle_PML_predictYuv(
   env->ReleaseIntArrayElements(ddims, ddim_ptr, 0);
   env->ReleaseFloatArrayElements(meanValues, meansPointer, 0);
   ANDROIDLOGI("predictYuv finished");
+#endif
+
   return result;
 }
 
 JNIEXPORT void JNICALL Java_com_baidu_paddle_PML_setThread(JNIEnv *env,
                                                            jclass thiz,
                                                            jint threadCount) {
+  std::lock_guard<std::mutex> lock(shared_mutex);
+
   ANDROIDLOGI("setThreadCount %d", threadCount);
+#ifdef ENABLE_EXCEPTION
+  try {
+    getPaddleMobileInstance()->SetThreadNum((int)threadCount);
+  } catch (paddle_mobile::PaddleMobileException &e) {
+    ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
+  }
+#else
   getPaddleMobileInstance()->SetThreadNum((int)threadCount);
+
+#endif
 }
 
 JNIEXPORT void JNICALL Java_com_baidu_paddle_PML_clear(JNIEnv *env,
                                                        jclass thiz) {
+  std::lock_guard<std::mutex> lock(shared_mutex);
+
+#ifdef ENABLE_EXCEPTION
+  try {
+    getPaddleMobileInstance()->Clear();
+
+  } catch (paddle_mobile::PaddleMobileException &e) {
+    ANDROIDLOGE("jni got an PaddleMobileException! ", e.what());
+  }
+#else
   getPaddleMobileInstance()->Clear();
+
+#endif
 }
 
 }  // namespace jni
diff --git a/src/jni/paddle_mobile_jni.h b/src/jni/paddle_mobile_jni.h
index 4fd62a6d56c71dfc748cc967244bc830abb74a80..158d64d4517b69761b26fc18f2e0943798174014 100644
--- a/src/jni/paddle_mobile_jni.h
+++ b/src/jni/paddle_mobile_jni.h
@@ -73,8 +73,8 @@ JNIEXPORT void JNICALL Java_com_baidu_paddle_PML_setThread(JNIEnv *env,
 /**
  * clear data of the net when destroy for android
  */
-JNIEXPORT void JNICALL Java_com_baidu_paddle_PMLL_clear(JNIEnv *env,
-                                                        jclass thiz);
+JNIEXPORT void JNICALL Java_com_baidu_paddle_PML_clear(JNIEnv *env,
+                                                       jclass thiz);
 }  // namespace jni
 }  // namespace paddle_mobile
 #ifdef __cplusplus
diff --git a/src/memory/t_malloc.cpp b/src/memory/t_malloc.cpp
index 42b8c4551871c58955251d94845ca13576d7735b..2bd4c0ac6ba3c7b066cc7ad2439ab6bebb7c3cd9 100644
--- a/src/memory/t_malloc.cpp
+++ b/src/memory/t_malloc.cpp
@@ -18,7 +18,7 @@ limitations under the License. */
 
 #ifdef PADDLE_MOBILE_FPGA
 
-#include "fpga/api/fpga_api.h"
+#include "fpga/api.h"
 
 #endif
 
diff --git a/src/operators/conv_transpose_op.cpp b/src/operators/conv_transpose_op.cpp
index 1e1d9e9c519732607b27aac7873b6a8eec93510b..34de4cbb10d3689f0be95f1277cfdd76b4c2c141 100644
--- a/src/operators/conv_transpose_op.cpp
+++ b/src/operators/conv_transpose_op.cpp
@@ -20,4 +20,13 @@ namespace paddle_mobile {
 namespace operators {}
 }  // namespace paddle_mobile
 
+namespace ops = paddle_mobile::operators;
+#ifdef PADDLE_MOBILE_CPU
+REGISTER_OPERATOR_CPU(conv2d_transpose, ops::ConvOpTranspose);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+#endif
+
 #endif
diff --git a/src/operators/conv_transpose_op.h b/src/operators/conv_transpose_op.h
index e28cee2d74d6ef4b98ea49ee49c2257b6491e832..c9b5e86bef0674b176ba901212a9add2ee2def83 100644
--- a/src/operators/conv_transpose_op.h
+++ b/src/operators/conv_transpose_op.h
@@ -88,4 +88,14 @@ class ConvOpTranspose : public framework::OperatorWithKernel<
 }  // namespace operators
 }  // namespace paddle_mobile
 
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(conv2d_transpose);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+USE_OP_MALI_GPU(conv2d_transpose);
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+USE_OP_FPGA(conv2d_transpose);
+#endif
+
 #endif
diff --git a/src/operators/feed_op.h b/src/operators/feed_op.h
index 5e8de2cb0bef73104f61367b2bbf01a44684b8f1..4a2f9285e0c1ee79e9820ef775436d39017f7f79 100644
--- a/src/operators/feed_op.h
+++ b/src/operators/feed_op.h
@@ -38,12 +38,18 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
   }
 
 #ifdef PADDLE_MOBILE_FPGA
-  void RunImpl() const { fpga::PerformBypass(param_.FpgaArgs()); }
+
   void Init() {
+    Tensor *output = param_.Out();
+    output->mutable_data<half>();
+  }
+
+  void RunImpl() const {
     const Tensor *input = param_.InputX();
     auto input_ptr = input->data<float>();
     Tensor *output = param_.Out();
     auto output_ptr = output->mutable_data<half>();
+    auto out_address = output->fpga_args().scale_pointer();
     fpga::BypassArgs args;
     args.convert_type = fpga::DATA_FP32_TO_FP16;
     args.layout_type = fpga::LAYOUT_CHW_TO_HWC;
@@ -51,13 +57,16 @@ class FeedOp : public framework::OperatorBase<DeviceType> {
     args.image.channels = input->dims()[1];
     args.image.height = input->dims()[2];
     args.image.width = input->dims()[3];
+    args.image.pad_height = 0;
+    args.image.pad_width = 0;
     args.output.address = output_ptr;
-    param_.SetFpgaArgs(args);
+    args.output.scale_address = out_address;
+    fpga::PerformBypass(args);
   }
 
 #else
-  void RunImpl() const { param_.Out()->ShareDataWith(*param_.InputX()); }
   void Init() {}
+  void RunImpl() const { param_.Out()->ShareDataWith(*param_.InputX()); }
 #endif
 
  protected:
diff --git a/src/operators/fusion_conv_add_relu_op.h b/src/operators/fusion_conv_add_relu_op.h
index 8f41f68296c9c400042646bb357f2b628198bbc9..926f309403d37fa8ec1f15f7cb955c1c13842405 100644
--- a/src/operators/fusion_conv_add_relu_op.h
+++ b/src/operators/fusion_conv_add_relu_op.h
@@ -16,6 +16,8 @@ limitations under the License. */
 
 #pragma once
 
+#include <string>
+#include <vector>
 #include "framework/operator.h"
 #include "framework/program/program-optimize/fusion_op_register.h"
 #include "operators/kernel/conv_add_relu_kernel.h"
@@ -65,11 +67,11 @@ class FusionConvAddReluOp : public framework::OperatorWithKernel<
 
 #ifdef PADDLE_MOBILE_CPU
 
-//#ifndef CONV_ADD_RELU_REGISTER
-//#define CONV_ADD_RELU_REGISTER
-// static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(new
-// FusionConvAddReluOpMatcher());
-//#endif
+#ifndef CONV_ADD_RELU_REGISTER
+#define CONV_ADD_RELU_REGISTER
+static framework::FusionOpRegistrar fusion_conv_add_relu_registrar(
+    new FusionConvAddReluOpMatcher());
+#endif
 
 #endif
 #ifdef PADDLE_MOBILE_MALI_GPU
diff --git a/src/operators/fusion_conv_bn_op.cpp b/src/operators/fusion_conv_bn_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..470678bfe57a41e66d6f11f3bfd469d97369d939
--- /dev/null
+++ b/src/operators/fusion_conv_bn_op.cpp
@@ -0,0 +1,60 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVBN_OP
+
+#include "operators/fusion_conv_bn_op.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <typename Dtype, typename T>
+void FusionConvBNOp<Dtype, T>::InferShape() const {
+  auto in_dims = this->param_.Input()->dims();
+  auto filter_dims = this->param_.Filter()->dims();
+  const std::vector<int> &strides = this->param_.Strides();
+  std::vector<int> paddings = this->param_.Paddings();
+  int groups = this->param_.Groups();
+  std::vector<int> dilations = this->param_.Dilations();
+
+  PADDLE_MOBILE_ENFORCE((in_dims.size() == filter_dims.size() &&
+                         dilations.size() == paddings.size() &&
+                         paddings.size() == strides.size()),
+                        "ConvParam is not suitable");
+
+  std::vector<int64_t> output_shape({in_dims[0], filter_dims[0]});
+  for (size_t i = 0; i < strides.size(); ++i) {
+    output_shape.push_back(
+        math::ConvOutputSize(in_dims[i + 2], filter_dims[i + 2], dilations[i],
+                             paddings[i], strides[i]));
+  }
+
+  framework::DDim ddim = framework::make_ddim(output_shape);
+  this->param_.Output()->Resize(ddim);
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+namespace ops = paddle_mobile::operators;
+#ifdef PADDLE_MOBILE_CPU
+REGISTER_OPERATOR_CPU(fusion_conv_bn, ops::FusionConvBNOp);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_FPGA(fusion_conv_bn, ops::FusionConvBNOp);
+#endif
+
+#endif
diff --git a/src/operators/fusion_conv_bn_op.h b/src/operators/fusion_conv_bn_op.h
new file mode 100644
index 0000000000000000000000000000000000000000..f43e62c9fa5c4b40c07fcb9cbdab4d06ab2c482f
--- /dev/null
+++ b/src/operators/fusion_conv_bn_op.h
@@ -0,0 +1,105 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVBN_OP
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include "framework/operator.h"
+#include "framework/program/program-optimize/fusion_op_register.h"
+#include "operators/kernel/conv_bn_kernel.h"
+#include "operators/op_param.h"
+
+namespace paddle_mobile {
+namespace operators {
+using std::string;
+using std::vector;
+class FusionConvBNMatcher : public framework::FusionOpMatcher {
+ public:
+  FusionConvBNMatcher() {
+    node_ = framework::Node(G_OP_TYPE_CONV);
+    node_ > std::make_shared<framework::Node>(G_OP_TYPE_BATCHNORM);
+  }
+
+  void FolderNodes(
+      framework::Node *node,
+      std::vector<std::shared_ptr<framework::Node>> *removed_nodes) {
+    node->Folder(node_.Depth(), Type(),
+                 {{G_OP_TYPE_BATCHNORM,
+                   {{"Scale", "Scale"},
+                    {"Mean", "Mean"},
+                    {"Bias", "Bias"},
+                    {"Variance", "Variance"}}}},
+                 removed_nodes);
+  }
+
+  std::string Type() { return G_OP_TYPE_FUSION_CONV_BN; }
+};
+
+template <typename DeviceType, typename T>
+class FusionConvBNOp : public framework::OperatorWithKernel<
+                           DeviceType, FusionConvBNParam<DeviceType>,
+                           operators::ConvBNKernel<DeviceType, T>> {
+ public:
+  FusionConvBNOp(const string &type, const VariableNameMap &inputs,
+                 const VariableNameMap &outputs,
+                 const framework::AttributeMap &attrs,
+                 std::shared_ptr<framework::Scope> scope)
+      : framework::OperatorWithKernel<DeviceType, FusionConvBNParam<DeviceType>,
+                                      operators::ConvBNKernel<DeviceType, T>>(
+            type, inputs, outputs, attrs, scope) {}
+
+  void InferShape() const override;
+
+ protected:
+};
+
+#ifdef PADDLE_MOBILE_CPU
+
+#ifndef FUSION_CONV_BN_REGISTER
+static framework::FusionOpRegistrar fusion_conv_bn_registrar(
+    new FusionConvBNMatcher());
+#define FUSION_CONV_BN_REGISTER
+#endif
+
+#endif
+
+#ifdef PADDLE_MOBILE_MALI_GPU
+
+#endif
+
+#ifdef PADDLE_MOBILE_FPGA
+
+#ifndef FUSION_CONV_BN_REGISTER
+static framework::FusionOpRegistrar fusion_conv_bn_registrar(
+    new FusionConvBNMatcher());
+#define FUSION_CONV_BN_REGISTER
+#endif
+#endif
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#ifdef PADDLE_MOBILE_CPU
+USE_OP_CPU(fusion_conv_bn);
+#endif
+#ifdef PADDLE_MOBILE_MALI_GPU
+#endif
+#ifdef PADDLE_MOBILE_FPGA
+USE_OP_FPGA(fusion_conv_bn);
+#endif
+
+#endif
diff --git a/src/operators/fusion_conv_bn_relu_op.cpp b/src/operators/fusion_conv_bn_relu_op.cpp
index 49fe9c933a5a9695f2c18bd0921c2d36063dc065..bfc9b99ea796bfdcc1a4ae1a23b2e39e8a513393 100644
--- a/src/operators/fusion_conv_bn_relu_op.cpp
+++ b/src/operators/fusion_conv_bn_relu_op.cpp
@@ -55,6 +55,7 @@ REGISTER_OPERATOR_CPU(fusion_conv_bn_relu, ops::FusionConvBNReluOp);
 #ifdef PADDLE_MOBILE_MALI_GPU
 #endif
 #ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_FPGA(fusion_conv_bn_relu, ops::FusionConvBNReluOp);
 #endif
 
 #endif
diff --git a/src/operators/fusion_conv_bn_relu_op.h b/src/operators/fusion_conv_bn_relu_op.h
index d8738b961884db268a21a9dbdf317efd3c5ae857..2b5ff4ea9d3e77ad9449b3968667ecc4558c2147 100644
--- a/src/operators/fusion_conv_bn_relu_op.h
+++ b/src/operators/fusion_conv_bn_relu_op.h
@@ -87,6 +87,12 @@ static framework::FusionOpRegistrar fusion_conv_bn_relu_registrar(
 #endif
 
 #ifdef PADDLE_MOBILE_FPGA
+
+#ifndef FUSION_CONV_BN_RELU_REGISTER
+static framework::FusionOpRegistrar fusion_conv_bn_relu_registrar(
+    new FusionConvBNReluMatcher());
+#define FUSION_CONV_BN_RELU_REGISTER
+#endif
 #endif
 
 }  // namespace operators
@@ -98,6 +104,7 @@ USE_OP_CPU(fusion_conv_bn_relu);
 #ifdef PADDLE_MOBILE_MALI_GPU
 #endif
 #ifdef PADDLE_MOBILE_FPGA
+USE_OP_FPGA(fusion_conv_bn_relu);
 #endif
 
 #endif
diff --git a/src/operators/fusion_elementwise_add_relu_op.h b/src/operators/fusion_elementwise_add_relu_op.h
index 5b8edb08df801621d623309d6a51c85c33cf78ed..078d09858c391214f13d70543e70da8c38e67b69 100644
--- a/src/operators/fusion_elementwise_add_relu_op.h
+++ b/src/operators/fusion_elementwise_add_relu_op.h
@@ -28,7 +28,7 @@ using std::vector;
 class FusioneElementwiseAddReluMatcher : public framework::FusionOpMatcher {
  public:
   FusioneElementwiseAddReluMatcher() {
-    node_ = framework::Node(G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU);
+    node_ = framework::Node(G_OP_TYPE_ELEMENTWISE_ADD);
     node_ > std::make_shared<framework::Node>(G_OP_TYPE_RELU);
   }
 
diff --git a/src/operators/kernel/arm/prelu_kernel.cpp b/src/operators/kernel/arm/prelu_kernel.cpp
index 0ca086de972681ce766a291e937c3d0cb7222348..eda944a2ff44a0551ef0342c64e6e955c96b356d 100644
--- a/src/operators/kernel/arm/prelu_kernel.cpp
+++ b/src/operators/kernel/arm/prelu_kernel.cpp
@@ -33,77 +33,34 @@ struct PReluFunctor {
  * */
 template <>
 void PReluKernel<CPU, float>::Compute(const PReluParam<CPU> &param) const {
-  const auto *input_x = param.InputX();
-  auto *input_x_ptr = input_x->data<float>();
+  auto *x = param.InputX();
+  auto *alpha = param.InputAlpha();
   auto *out = param.Out();
-  auto *out_ptr = out->mutable_data<float>();
-
-  if (param.Slopes().size() == 1) {
-    PReluFunctor<float> func_(param.Slopes()[0]);
-    math::Transform trans;
-    trans(input_x_ptr, input_x_ptr + input_x->numel(), out_ptr, func_);
-  } else if (param.Slopes().size() > 1) {
-    const int dim_size = input_x->dims().size();
-    switch (dim_size) {
-      case 0:
-        break;
-      case 1: {
-        const int input_width = input_x->dims()[0];
-        math::Transform trans;
-
-        #pragma omp parallel for
-        for (int w = 0; w < input_width; ++w) {
-          out_ptr[w] = input_x_ptr[w] * param.Slopes()[w];
-        }
-      } break;
-      case 2: {
-        const int input_height = input_x->dims()[0];
-        const int input_width = input_x->dims()[1];
-
-        math::Transform trans;
-        #pragma omp parallel for
-        for (int h = 0; h < input_height; ++h) {
-          PReluFunctor<float> func_(param.Slopes()[h]);
-          const float *ptr = input_x_ptr + h * input_width;
-          float *optr = out_ptr + +h * input_width;
-          trans(ptr, ptr + input_width, optr, func_);
-        }
-      } break;
-      case 3: {
-        const int chan_size = input_x->dims()[0];
-        const int input_height = input_x->dims()[1];
-        const int input_width = input_x->dims()[2];
-
-        math::Transform trans;
-        #pragma omp parallel for
-        for (int c = 0; c < chan_size; ++c) {
-          PReluFunctor<float> func_(param.Slopes()[c]);
-          int size = input_height * input_width;
-          const float *ptr = input_x_ptr + c * size;
-          float *optr = out_ptr + c * size;
-          trans(ptr, ptr + size, optr, func_);
-        }
-      } break;
-      case 4:
-      default: {
-        const int batch_size = input_x->dims()[0];
-        const int chan_size = input_x->dims()[1];
-        const int input_height = input_x->dims()[2];
-        const int input_width = input_x->dims()[3];
-        math::Transform trans;
-
-        #pragma omp parallel for
-        for (int b = 0; b < batch_size; ++b) {
-          for (int c = 0; c < chan_size; ++c) {
-            PReluFunctor<float> func_(param.Slopes()[c]);
-            int size = input_height * input_width;
-            const float *ptr = input_x_ptr + b * c * size;
-            float *optr = out_ptr + +b * c * size;
-            trans(ptr, ptr + size, optr, func_);
-          }
-        }
-      }  // case 3,default
-      break;
+  std::string mode = param.Mode();
+  const auto *x_ptr = x->data<float>();
+  auto *o_ptr = out->mutable_data<float>();
+  const auto *alpha_ptr = alpha->data<float>();
+  int numel = x->numel();
+  auto dim = x->dims();
+  int index = 0;
+  int i = 0;
+  int temp = 0;
+  if (mode == "channel") {
+    temp = numel / (dim[0] * dim[1]);
+    #pragma omp parallel for
+    for (i = 0; i < numel; i++) {
+      index = (i / temp) % dim[1];
+      o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[index] * x_ptr[i];
+    }
+  } else if (mode == "element") {
+    #pragma omp parallel for
+    for (i = 0; i < numel; i++) {
+      o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[i] * x_ptr[i];
+    }
+  } else {
+    #pragma omp parallel for
+    for (i = 0; i < numel; i++) {
+      o_ptr[i] = x_ptr[i] > 0 ? x_ptr[i] : alpha_ptr[0] * x_ptr[i];
     }
   }
 }
diff --git a/src/operators/kernel/fpga/conv_kernel.cpp b/src/operators/kernel/conv_bn_kernel.h
similarity index 53%
rename from src/operators/kernel/fpga/conv_kernel.cpp
rename to src/operators/kernel/conv_bn_kernel.h
index 818c04f0b8ed95012d203ece84d4cdbdb9e7344e..d72355939f1b21bb051c91c36fcc3fd280a8c0b8 100644
--- a/src/operators/kernel/fpga/conv_kernel.cpp
+++ b/src/operators/kernel/conv_bn_kernel.h
@@ -12,25 +12,31 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#ifdef CONV_OP
+#pragma once
 
-#include "operators/kernel/conv_kernel.h"
-#include "operators/kernel/central-arm-func/conv_arm_func.h"
+#ifdef FUSION_CONVBN_OP
+
+#include <vector>
+#include "framework/ddim.h"
+#include "framework/operator.h"
+#include "operators/math/conv_func.h"
+#include "operators/math/im2col.h"
+#include "operators/math/math_function.h"
+#include "operators/math/vol2col.h"
+#include "operators/op_param.h"
 
 namespace paddle_mobile {
 namespace operators {
 
-template <>
-bool ConvKernel<FPGA, float>::Init(ConvParam<FPGA> *param) {
-  return true;
-}
-
-template <>
-void ConvKernel<FPGA, float>::Compute(const ConvParam<FPGA> &param) const {
-  // ConvCompute<float>(param);
-}
+using framework::DDim;
+using framework::OpKernelBase;
 
-template class ConvKernel<FPGA, float>;
+template <typename DeviceType, typename T>
+class ConvBNKernel : public OpKernelBase<DeviceType, FusionConvBNParam<DeviceType>> {
+ public:
+  void Compute(const FusionConvBNParam<DeviceType> &param) const;
+  bool Init(FusionConvBNParam<DeviceType> *param);
+};
 
 }  // namespace operators
 }  // namespace paddle_mobile
diff --git a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
index 9eed7c12c8d69b2211768dd0e65960d8023ec4bc..7f120fa930334194600103b7310e3e8b50adbe31 100644
--- a/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_add_bn_kernel.cpp
@@ -15,8 +15,8 @@ limitations under the License. */
 #ifdef FUSION_CONVADDBN_OP
 
 #include "operators/kernel/conv_add_bn_kernel.h"
-#include "fpga/api/fpga_api.h"
-#include "fpga/quantilization.h"
+#include "fpga/api.h"
+#include "fpga/quantization.h"
 
 namespace paddle_mobile {
 namespace operators {
@@ -28,7 +28,7 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
   auto input_ptr = input->data<half>();
   const Tensor *bias = param->Bias();
   auto bias_ptr = bias->data<float>();
-  const Tensor *filter = param->Filter();
+  Tensor *filter = param->Filter();
 
   Tensor *out = param->Output();
   auto out_ptr = out->mutable_data<half>();
@@ -37,11 +37,11 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
   auto bn_scale_ptr = param->InputScale()->data<float>();
   auto bn_bias_ptr = param->InputBias()->data<float>();
   const float epsilon = param->Epsilon();
-  PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0] &&
+  PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0] &&
                             bias->dims()[0] == param->InputBias()->dims()[0],
-                        "Image channel should be equal to bias number");
+                        "Output channel should be equal to bias number");
 
-  const int channel = input->dims()[1];
+  const int channel = out->dims()[1];
   float *bs_ptr =
       reinterpret_cast<float *>(fpga::fpga_malloc(2 * channel * sizeof(float)));
   Tensor *new_scale = new Tensor();
@@ -60,30 +60,27 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
   param->SetNewScale(new_scale);
   param->SetNewBias(new_bias);
 
-  const Tensor *quant_filter = quantilize_filter(filter);
+  fpga::quantize_filter(filter);
+  auto filter_ptr = filter->data<int8_t>();
 
-  // delete original filter?
-  filter = quant_filter;
-
-  auto filter_ptr = filter->data<float>();
   fpga::ConvArgs convArgs;
   convArgs.relu_enabled = relu_enabled;
-  convArgs.filter_address = reinterpret_cast<void *> filter_ptr;
+  convArgs.filter_address = (void *)filter_ptr;
   convArgs.filter_num = filter->dims()[0];
   convArgs.group_num = param->Groups();
-  convArgs.sb_address = reinterpret_cast<void *> bs_ptr;
+  convArgs.sb_address = (void *)bs_ptr;
   convArgs.kernel.stride_h = param->Strides()[0];
   convArgs.kernel.stride_w = param->Strides()[1];
   convArgs.kernel.height = filter->dims()[2];
   convArgs.kernel.width = filter->dims()[3];
-  convArgs.image.address = reinterpret_cast<void *> input_ptr;
+  convArgs.image.address = (void *)input_ptr;
   convArgs.image.channels = input->dims()[1];
   convArgs.image.height = input->dims()[2];
   convArgs.image.width = input->dims()[3];
   convArgs.image.pad_height = param->Paddings()[0];
   convArgs.image.pad_width = param->Paddings()[1];
   convArgs.image.scale_address = input->fpga_args().scale_pointer();
-  convArgs.output.address = reinterpret_cast<void *> out_ptr;
+  convArgs.output.address = (void *)out_ptr;
   convArgs.output.scale_address = out->fpga_args().scale_pointer();
   param->SetFpgaArgs(convArgs);
 
diff --git a/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
index 27eda3c64e06003b77b581e21a5054985882a016..7bf5cd3a66c2149079ac213342e8ed7b046cfa99 100644
--- a/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_add_bn_relu_kernel.cpp
@@ -15,7 +15,7 @@ limitations under the License. */
 #ifdef FUSION_CONVADDBNRELU_OP
 
 #include "operators/kernel/conv_add_bn_relu_kernel.h"
-#include "memory/t_malloc.h"
+#include "fpga/quantization.h"
 
 namespace paddle_mobile {
 namespace operators {
@@ -28,8 +28,7 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
   auto input_ptr = input->data<half>();
   const Tensor *bias = param->Bias();
   auto bias_ptr = bias->data<float>();
-  const Tensor *filter = param->Filter();
-  auto filter_ptr = filter->data<float>();
+  Tensor *filter = param->Filter();
   Tensor *out = param->Output();
   auto out_ptr = out->mutable_data<half>();
   auto bn_mean_ptr = param->InputMean()->data<float>();
@@ -37,11 +36,11 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
   auto bn_scale_ptr = param->InputScale()->data<float>();
   auto bn_bias_ptr = param->InputBias()->data<float>();
   const float epsilon = param->Epsilon();
-  PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0] &&
+  PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0] &&
                             bias->dims()[0] == param->InputBias()->dims()[0],
-                        "Image channel should be equal to bias number");
+                        "Output channel should be equal to bias number");
 
-  const int channel = input->dims()[1];
+  const int channel = out->dims()[1];
   float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
   Tensor *new_scale = new Tensor();
   Tensor *new_bias = new Tensor();
@@ -58,6 +57,8 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
   }
   param->SetNewScale(new_scale);
   param->SetNewBias(new_bias);
+  fpga::quantize_filter(filter);
+  auto filter_ptr = filter->data<int8_t>();
 
   fpga::ConvArgs convArgs;
   convArgs.relu_enabled = relu_enabled;
diff --git a/src/operators/kernel/fpga/conv_add_relu_kernel.cpp b/src/operators/kernel/fpga/conv_add_relu_kernel.cpp
index 62aed7ac4ad215a2ffef56198eae96800be9f130..48007e4cb8e90c500d53455d4dd8095827c92831 100644
--- a/src/operators/kernel/fpga/conv_add_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/conv_add_relu_kernel.cpp
@@ -15,7 +15,7 @@ limitations under the License. */
 #ifdef FUSION_CONVADDRELU_OP
 
 #include "operators/kernel/conv_add_relu_kernel.h"
-#include "common/enforce.h"
+#include "fpga/quantization.h"
 
 namespace paddle_mobile {
 namespace operators {
@@ -27,20 +27,22 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
   auto input_ptr = input->data<half>();
   const Tensor *bias = param->Bias();
   auto bias_ptr = bias->data<float>();
-  const Tensor *filter = param->Filter();
-  auto filter_ptr = filter->data<float>();
+  Tensor *filter = param->Filter();
   Tensor *out = param->Output();
   auto out_ptr = out->mutable_data<half>();
 
-  PADDLE_MOBILE_ENFORCE(input->dims()[1] == bias->dims()[0],
-                        "Image channel should be equal to bias number");
-  int channel = input->dims()[1];
+  PADDLE_MOBILE_ENFORCE(out->dims()[1] == bias->dims()[0],
+                        "Output channel should be equal to bias number");
+  int channel = out->dims()[1];
   float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
   for (int i = 0; i < channel; i++) {
     bs_ptr[i * 2] = 1;
     bs_ptr[i * 2 + 1] = bias_ptr[i];
   }
 
+  fpga::quantize_filter(filter);
+  auto filter_ptr = filter->data<int8_t>();
+
   fpga::ConvArgs convArgs;
   convArgs.relu_enabled = relu_enabled;
   convArgs.filter_address = (void *)filter_ptr;
diff --git a/src/operators/kernel/fpga/conv_bn_kernel.cpp b/src/operators/kernel/fpga/conv_bn_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..a623cbd7960aed805fb6e3d20a5ac8259e929b4c
--- /dev/null
+++ b/src/operators/kernel/fpga/conv_bn_kernel.cpp
@@ -0,0 +1,94 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVBN_OP
+
+#include "operators/kernel/conv_bn_kernel.h"
+#include "fpga/api.h"
+#include "fpga/quantization.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
+  bool relu_enabled = false;
+  const Tensor *input = param->Input();
+  auto input_ptr = input->data<half>();
+  Tensor *filter = param->Filter();
+
+  Tensor *out = param->Output();
+  auto out_ptr = out->mutable_data<half>();
+  auto bn_mean_ptr = param->InputMean()->data<float>();
+  auto bn_var_ptr = param->InputVariance()->data<float>();
+  auto bn_scale_ptr = param->InputScale()->data<float>();
+  auto bn_bias_ptr = param->InputBias()->data<float>();
+  const float epsilon = param->Epsilon();
+  PADDLE_MOBILE_ENFORCE(out->dims()[1] == param->InputBias()->dims()[0],
+                        "Output channel should be equal to bias number");
+
+  const int channel = out->dims()[1];
+  float *bs_ptr =
+      reinterpret_cast<float *>(fpga::fpga_malloc(2 * channel * sizeof(float)));
+  Tensor *new_scale = new Tensor();
+  Tensor *new_bias = new Tensor();
+  auto new_scale_ptr = new_scale->mutable_data<float>({channel});
+  auto new_bias_ptr = new_bias->mutable_data<float>({channel});
+
+  for (int i = 0; i < channel; i++) {
+    new_scale_ptr[i] = bn_scale_ptr[i] /
+                       static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
+    new_bias_ptr[i] = bn_bias_ptr[i] + (0 - bn_mean_ptr[i]) * new_scale_ptr[i];
+    bs_ptr[i * 2] = new_scale_ptr[i];
+    bs_ptr[i * 2 + 1] = new_bias_ptr[i];
+  }
+  param->SetNewScale(new_scale);
+  param->SetNewBias(new_bias);
+  fpga::quantize_filter(filter);
+  auto filter_ptr = filter->data<int8_t>();
+
+  fpga::ConvArgs convArgs;
+  convArgs.relu_enabled = relu_enabled;
+  convArgs.filter_address = (void *)filter_ptr;
+  convArgs.filter_num = filter->dims()[0];
+  convArgs.group_num = param->Groups();
+  convArgs.sb_address = (void *)bs_ptr;
+  convArgs.kernel.stride_h = param->Strides()[0];
+  convArgs.kernel.stride_w = param->Strides()[1];
+  convArgs.kernel.height = filter->dims()[2];
+  convArgs.kernel.width = filter->dims()[3];
+  convArgs.image.address = (void *)input_ptr;
+  convArgs.image.channels = input->dims()[1];
+  convArgs.image.height = input->dims()[2];
+  convArgs.image.width = input->dims()[3];
+  convArgs.image.pad_height = param->Paddings()[0];
+  convArgs.image.pad_width = param->Paddings()[1];
+  convArgs.image.scale_address = input->fpga_args().scale_pointer();
+  convArgs.output.address = (void *)out_ptr;
+  convArgs.output.scale_address = out->fpga_args().scale_pointer();
+  param->SetFpgaArgs(convArgs);
+
+  return true;
+}
+
+template <>
+void ConvBNKernel<FPGA, float>::Compute(const FusionConvBNParam<FPGA> &param) const {
+  fpga::ComputeFpgaConv(param.FpgaArgs());
+}
+template class ConvBNKernel<FPGA, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp b/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6bbe1b8763160993da5edb96162e54c8ab688d14
--- /dev/null
+++ b/src/operators/kernel/fpga/conv_bn_relu_kernel.cpp
@@ -0,0 +1,91 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef FUSION_CONVBNRELU_OP
+
+#include "operators/kernel/conv_bn_relu_kernel.h"
+#include "fpga/quantization.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
+  bool relu_enabled = true;
+  const Tensor *input = param->Input();
+  auto input_ptr = input->data<half>();
+  Tensor *filter = param->Filter();
+  Tensor *out = param->Output();
+  auto out_ptr = out->mutable_data<half>();
+  auto bn_mean_ptr = param->InputMean()->data<float>();
+  auto bn_var_ptr = param->InputVariance()->data<float>();
+  auto bn_scale_ptr = param->InputScale()->data<float>();
+  auto bn_bias_ptr = param->InputBias()->data<float>();
+  const float epsilon = param->Epsilon();
+  PADDLE_MOBILE_ENFORCE(out->dims()[1] == param->InputBias()->dims()[0],
+                        "Output channel should be equal to bias number");
+
+  const int channel = out->dims()[1];
+  float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
+  Tensor *new_scale = new Tensor();
+  Tensor *new_bias = new Tensor();
+  auto new_scale_ptr = new_scale->mutable_data<float>({channel});
+  auto new_bias_ptr = new_bias->mutable_data<float>({channel});
+
+  for (int i = 0; i < channel; i++) {
+    new_scale_ptr[i] = bn_scale_ptr[i] /
+                       static_cast<float>(pow((bn_var_ptr[i] + epsilon), 0.5));
+    new_bias_ptr[i] = bn_bias_ptr[i] + (0 - bn_mean_ptr[i]) * new_scale_ptr[i];
+    bs_ptr[i * 2] = new_scale_ptr[i];
+    bs_ptr[i * 2 + 1] = new_bias_ptr[i];
+  }
+  param->SetNewScale(new_scale);
+  param->SetNewBias(new_bias);
+  fpga::quantize_filter(filter);
+  auto filter_ptr = filter->data<int8_t>();
+
+  fpga::ConvArgs convArgs;
+  convArgs.relu_enabled = relu_enabled;
+  convArgs.filter_address = (void *)filter_ptr;
+  convArgs.filter_num = filter->dims()[0];
+  convArgs.group_num = param->Groups();
+  convArgs.sb_address = (void *)bs_ptr;
+  convArgs.kernel.stride_h = param->Strides()[0];
+  convArgs.kernel.stride_w = param->Strides()[1];
+  convArgs.kernel.height = filter->dims()[2];
+  convArgs.kernel.width = filter->dims()[3];
+  convArgs.image.address = (void *)input_ptr;
+  convArgs.image.channels = input->dims()[1];
+  convArgs.image.height = input->dims()[2];
+  convArgs.image.width = input->dims()[3];
+  convArgs.image.pad_height = param->Paddings()[0];
+  convArgs.image.pad_width = param->Paddings()[1];
+  convArgs.image.scale_address = input->fpga_args().scale_pointer();
+  convArgs.output.address = (void *)out_ptr;
+  convArgs.output.scale_address = out->fpga_args().scale_pointer();
+  param->SetFpgaArgs(convArgs);
+  return true;
+}
+
+template <>
+void ConvBNReluKernel<FPGA, float>::Compute(
+    const FusionConvBNReluParam<FPGA> &param) const {
+  fpga::ComputeFpgaConv(param.FpgaArgs());
+}
+template class ConvBNReluKernel<FPGA, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/fpga/fc_relu_kernel.cpp b/src/operators/kernel/fpga/fc_relu_kernel.cpp
index a147855c3022a82b0b3d70752a2f5c0dc0c3fc13..52c8c71537cd3d11eb3bfab43c78a5ad79d0db37 100644
--- a/src/operators/kernel/fpga/fc_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/fc_relu_kernel.cpp
@@ -13,7 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 #ifdef FUSION_FCRELU_OP
 #include "operators/kernel/fc_relu_kernel.h"
-#include "fpga/api/fpga_api.h"
+
+#include "fpga/api.h"
+#include "fpga/quantization.h"
 
 namespace paddle_mobile {
 namespace operators {
@@ -23,8 +25,7 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
   bool relu_enabled = true;
   const Tensor *input_x = param->InputX();
   auto input_x_ptr = input_x->data<half>();
-  const Tensor *input_y = param->InputY();
-  auto input_y_ptr = input_y->data<float>();
+  Tensor *input_y = param->InputY();
   const Tensor *input_z = param->InputZ();
   auto input_z_ptr = input_z->data<float>();
   Tensor *out = param->Out();
@@ -32,13 +33,16 @@ bool FusionFcReluKernel<FPGA, float>::Init(FusionFcReluParam<FPGA> *param) {
 
   PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0],
                         "Image channel should be equal to weight number");
-  int channel = input_x->dims()[1];
+  int channel = out->dims()[1];
   float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
   for (int i = 0; i < channel; i++) {
     bs_ptr[i * 2] = 1;
     bs_ptr[i * 2 + 1] = input_z_ptr[i];
   }
 
+  fpga::quantize_filter(input_y);
+  auto input_y_ptr = input_y->data<int8_t>();
+
   fpga::ConvArgs convArgs;
   convArgs.relu_enabled = relu_enabled;
   convArgs.filter_address = (void *)input_y_ptr;
diff --git a/src/operators/kernel/fpga/fusion_fc_kernel.cpp b/src/operators/kernel/fpga/fusion_fc_kernel.cpp
index 5e4e2aab42185a874b50186dba7634c8cb3a5854..199b6b6878ad2c838e3b3d12d8e92a70ea541dad 100644
--- a/src/operators/kernel/fpga/fusion_fc_kernel.cpp
+++ b/src/operators/kernel/fpga/fusion_fc_kernel.cpp
@@ -14,6 +14,7 @@ limitations under the License. */
 #ifdef FUSION_FC_OP
 
 #include "operators/kernel/fusion_fc_kernel.h"
+#include "fpga/quantization.h"
 
 namespace paddle_mobile {
 namespace operators {
@@ -23,8 +24,7 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
   bool relu_enabled = false;
   const Tensor *input_x = param->InputX();
   auto input_x_ptr = input_x->data<half>();
-  const Tensor *input_y = param->InputY();
-  auto input_y_ptr = input_y->data<float>();
+  Tensor *input_y = param->InputY();
   const Tensor *input_z = param->InputZ();
   auto input_z_ptr = input_z->data<float>();
   Tensor *out = param->Out();
@@ -32,13 +32,16 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
 
   PADDLE_MOBILE_ENFORCE(input_x->dims()[1] == input_y->dims()[0],
                         "Image channel should be equal to weight number");
-  int channel = input_x->dims()[1];
+  int channel = out->dims()[1];
   float *bs_ptr = (float *)fpga::fpga_malloc(2 * channel * sizeof(float));
   for (int i = 0; i < channel; i++) {
     bs_ptr[i * 2] = 1;
     bs_ptr[i * 2 + 1] = input_z_ptr[i];
   }
 
+  fpga::quantize_filter(input_y);
+  auto input_y_ptr = input_y->data<int8_t>();
+
   fpga::ConvArgs convArgs;
   convArgs.relu_enabled = relu_enabled;
   convArgs.filter_address = (void *)input_y_ptr;
@@ -55,11 +58,9 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
   convArgs.image.width = input_x->dims()[3];
   convArgs.image.pad_height = 0;
   convArgs.image.pad_width = 0;
-  convArgs.image.scale_address =
-      input_x->fpga_args().scale_pointer();  // fc input has scale attribute??
+  convArgs.image.scale_address = input_x->fpga_args().scale_pointer();
   convArgs.output.address = (void *)out_ptr;
-  convArgs.output.scale_address =
-      out->fpga_args().scale_pointer();  // fc output has scale attribute??
+  convArgs.output.scale_address = out->fpga_args().scale_pointer();
   param->SetFpgaArgs(convArgs);
   return true;
 }
diff --git a/src/operators/kernel/fpga/softmax_kernel.cpp b/src/operators/kernel/fpga/softmax_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f3681478f72cb77fa5a69a5c9dd1db8fa42f6046
--- /dev/null
+++ b/src/operators/kernel/fpga/softmax_kernel.cpp
@@ -0,0 +1,54 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef SOFTMAX_OP
+
+#include "../softmax_kernel.h"
+#include "../central-arm-func/softmax_arm_func.h"
+#include "common/types.h"
+#include "fpga/api.h"
+#include "operators/math/softmax.h"
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
+  const Tensor *input = param->InputX();
+  if (input->type() == typeid(half)) {
+    auto input_ptr = input->data<half>();
+    auto output_ptr = param->Out();
+    fpga::BypassArgs args;
+    args.convert_type = fpga::DATA_FP16_TO_FP32;
+    args.layout_type = fpga::LAYOUT_HWC_TO_CHW;
+    args.image.address = (void *)(input_ptr);
+    args.image.height = input->dims()[0];
+    args.image.width = input->dims()[1];
+    args.image.channels = 1;
+    args.output.address = output_ptr;
+    param->SetFpgaArgs(args);
+  }
+
+  return true;
+}
+
+template <>
+void SoftmaxKernel<FPGA, float>::Compute(const SoftmaxParam<FPGA> &param) const {
+  // SoftmaxCompute<float>(param);
+}
+
+template class SoftmaxKernel<FPGA, float>;
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/math/depthwise_conv_3x3.cpp b/src/operators/math/depthwise_conv_3x3.cpp
index 1ca3797882807ae5f12b16483d90e359da6dfb99..ea7e611e8eba8359de66b4a3e62bca39f25d82f4 100644
--- a/src/operators/math/depthwise_conv_3x3.cpp
+++ b/src/operators/math/depthwise_conv_3x3.cpp
@@ -540,15 +540,17 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter,
   const int hxw = input_height * input_width;
 
   const int l = input_height;
-  float32x4_t vnewbias = vdupq_n_f32(0.0);
-  float32x4_t vnewscale = vdupq_n_f32(1.0);
+
   float32x4_t vzero = vdupq_n_f32(0);
 
   for (int b = 0; b < batch_size; b++) {
-    filter_data = filter->data<float>();
+#pragma omp parallel for
     for (int c = 0; c < input_channel; c++) {
-      vnewbias = vdupq_n_f32(newbias_data[c]);
-      vnewscale = vdupq_n_f32(newscale_data[c]);
+      const float *filter_data = filter->data<float>() + c * 9;
+      const float *input_data = input->data<float>() + c * hxw;
+      float *output_data = output->data<float>() + c * hxw;
+      float32x4_t vnewbias = vdupq_n_f32(newbias_data[c]);
+      float32x4_t vnewscale = vdupq_n_f32(newscale_data[c]);
 
       float w00 = filter_data[0];
       float w01 = filter_data[1];
@@ -560,6 +562,69 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter,
       float w21 = filter_data[7];
       float w22 = filter_data[8];
 
+      for (int i = 1; i < output_height - 1; i++) {
+        float *output_ptr;
+        float32x4_t in0, in1, in2, in3, in4, in5, tmp0, tmp1, tmp2, tmp3, tmp4,
+            tmp5, out0;
+        for (int m = 1; m < output_width - 4; m += 4) {
+          output_ptr = output_data + i * output_width + m;
+          in0 = vld1q_f32(input_data + (i - 1) * input_width + m - 1);
+          in1 = vld1q_f32(input_data + (i - 1) * input_width + m + 3);
+          in2 = vld1q_f32(input_data + i * input_width + m - 1);
+          in3 = vld1q_f32(input_data + i * input_width + m + 3);
+          in4 = vld1q_f32(input_data + (i + 1) * input_width + m - 1);
+          in5 = vld1q_f32(input_data + (i + 1) * input_width + m + 3);
+
+          tmp0 = vextq_f32(in0, in1, 1);
+          tmp1 = vextq_f32(in0, in1, 2);
+          tmp2 = vextq_f32(in2, in3, 1);
+          tmp3 = vextq_f32(in2, in3, 2);
+          tmp4 = vextq_f32(in4, in5, 1);
+          tmp5 = vextq_f32(in4, in5, 2);
+
+          out0 = vmulq_n_f32(in0, w00);
+          out0 = vmlaq_n_f32(out0, tmp0, w01);
+          out0 = vmlaq_n_f32(out0, tmp1, w02);
+          out0 = vmlaq_n_f32(out0, in2, w10);
+          out0 = vmlaq_n_f32(out0, tmp2, w11);
+          out0 = vmlaq_n_f32(out0, tmp3, w12);
+          out0 = vmlaq_n_f32(out0, in4, w20);
+          out0 = vmlaq_n_f32(out0, tmp4, w21);
+          out0 = vmlaq_n_f32(out0, tmp5, w22);
+
+          out0 = vmlaq_f32(vnewbias, vnewscale, out0);
+          if (if_relu) {
+            out0 = vmaxq_f32(out0, vzero);
+          }
+          vst1q_f32(output_ptr, out0);
+        }
+        int m;
+        for (m = 1; (m + 3) < output_width - 1; m = m + 4) {
+        }
+
+        for (int j = m; j < output_width - 1; j++) {
+          output_data[i * output_width + j] =
+              input_data[(i - 1) * input_width + j - 1] * w00 +
+              input_data[(i - 1) * input_width + j] * w01 +
+              input_data[(i - 1) * input_width + j + 1] * w02 +
+              input_data[(i)*input_width + j - 1] * w10 +
+              input_data[(i)*input_width + j] * w11 +
+              input_data[(i)*input_width + j + 1] * w12 +
+              input_data[(i + 1) * input_width + j - 1] * w20 +
+              input_data[(i + 1) * input_width + j] * w21 +
+              input_data[(i + 1) * input_width + j + 1] * w22;
+          output_data[i * output_width + j] =
+              newscale_data[c] * output_data[i * output_width + j] +
+              newbias_data[c];
+          if (if_relu) {
+            output_data[i * output_width + j] =
+                output_data[i * output_width + j] < 0
+                    ? 0
+                    : output_data[i * output_width + j];
+          }
+        }
+      }
+
       output_data[0] = w11 * input_data[0] + w12 * input_data[1] +
                        w21 * input_data[l] + w22 * input_data[l + 1];
       output_data[l - 1] = w10 * input_data[l - 2] + w11 * input_data[l - 1] +
@@ -699,72 +764,6 @@ void DepthwiseConvAddBNRelu3x3s1p1(const Tensor *input, const Tensor *filter,
                   : output_data[(output_height - 1) * output_width + j];
         }
       }
-      #pragma omp parallel for
-      for (int i = 1; i < output_height - 1; i++) {
-        for (int m = 1; (m + 3) < output_width - 1; m = m + 4) {
-          float *output_ptr = output_data + i * output_width + m;
-          float32x4_t in0, in1, in2, in3, in4, in5, tmp0, tmp1, tmp2, tmp3,
-              tmp4, tmp5, out0;
-          in0 = vld1q_f32(input_data + (i - 1) * input_width + m - 1);
-          in1 = vld1q_f32(input_data + (i - 1) * input_width + m + 3);
-          in2 = vld1q_f32(input_data + i * input_width + m - 1);
-          in3 = vld1q_f32(input_data + i * input_width + m + 3);
-          in4 = vld1q_f32(input_data + (i + 1) * input_width + m - 1);
-          in5 = vld1q_f32(input_data + (i + 1) * input_width + m + 3);
-
-          tmp0 = vextq_f32(in0, in1, 1);
-          tmp1 = vextq_f32(in0, in1, 2);
-          tmp2 = vextq_f32(in2, in3, 1);
-          tmp3 = vextq_f32(in2, in3, 2);
-          tmp4 = vextq_f32(in4, in5, 1);
-          tmp5 = vextq_f32(in4, in5, 2);
-
-          out0 = vmulq_n_f32(in0, w00);
-          out0 = vmlaq_n_f32(out0, tmp0, w01);
-          out0 = vmlaq_n_f32(out0, tmp1, w02);
-          out0 = vmlaq_n_f32(out0, in2, w10);
-          out0 = vmlaq_n_f32(out0, tmp2, w11);
-          out0 = vmlaq_n_f32(out0, tmp3, w12);
-          out0 = vmlaq_n_f32(out0, in4, w20);
-          out0 = vmlaq_n_f32(out0, tmp4, w21);
-          out0 = vmlaq_n_f32(out0, tmp5, w22);
-
-          out0 = vmlaq_f32(vnewbias, vnewscale, out0);
-          if (if_relu) {
-            out0 = vmaxq_f32(out0, vzero);
-          }
-          vst1q_f32(output_ptr, out0);
-        }
-        int m;
-        for (m = 1; (m + 3) < output_width - 1; m = m + 4) {
-        }
-
-        for (int j = m; j < output_width - 1; j++) {
-          output_data[i * output_width + j] =
-              input_data[(i - 1) * input_width + j - 1] * w00 +
-              input_data[(i - 1) * input_width + j] * w01 +
-              input_data[(i - 1) * input_width + j + 1] * w02 +
-              input_data[(i)*input_width + j - 1] * w10 +
-              input_data[(i)*input_width + j] * w11 +
-              input_data[(i)*input_width + j + 1] * w12 +
-              input_data[(i + 1) * input_width + j - 1] * w20 +
-              input_data[(i + 1) * input_width + j] * w21 +
-              input_data[(i + 1) * input_width + j + 1] * w22;
-          output_data[i * output_width + j] =
-              newscale_data[c] * output_data[i * output_width + j] +
-              newbias_data[c];
-          if (if_relu) {
-            output_data[i * output_width + j] =
-                output_data[i * output_width + j] < 0
-                    ? 0
-                    : output_data[i * output_width + j];
-          }
-        }
-      }
-
-      input_data = input_data + hxw;
-      output_data = output_data + hxw;
-      filter_data = filter_data + 9;
     }
   }
 
@@ -1466,9 +1465,7 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
                                      Tensor *output, const Tensor *new_scale,
                                      const Tensor *new_bias, bool if_relu) {
 #if __ARM_NEON
-  const float *input_data = input->data<float>();
-  const float *filter_data = filter->data<float>();
-  float *output_data = output->data<float>();
+#ifdef _OPENMP
   const float *newscale_data = new_scale->data<float>();
   const float *newbias_data = new_bias->data<float>();
 
@@ -1482,14 +1479,15 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
   const int inhxw = input_height * input_width;
   const int outhxw = output_height * output_width;
 
-  float32x4_t vnewbias = vdupq_n_f32(0.0);
-  float32x4_t vnewscale = vdupq_n_f32(1.0);
   float32x4_t zero = vdupq_n_f32(0.0);
   for (int b = 0; b < batch_size; b++) {
-    filter_data = filter->data<float>();
+    #pragma omp parallel for
     for (int c = 0; c < input_channel; c++) {
-      vnewbias = vdupq_n_f32(newbias_data[c]);
-      vnewscale = vdupq_n_f32(newscale_data[c]);
+      const float *filter_data = filter->data<float>() + c * 9;
+      const float *input_data = input->data<float>() + c * inhxw;
+      float *output_data = output->data<float>() + c * outhxw;
+      float32x4_t vnewbias = vdupq_n_f32(newbias_data[c]);
+      float32x4_t vnewscale = vdupq_n_f32(newscale_data[c]);
 
       float w00 = filter_data[0];
       float w01 = filter_data[1];
@@ -1527,7 +1525,9 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
         if (if_relu) {
           out0 = vmaxq_f32(out0, zero);
         }
-        vst1q_f32(output_ptr, out0);
+        vst1q_lane_f32(output_ptr, out0, 0);
+        vst1q_lane_f32(output_ptr + 1, out0, 1);
+        vst1q_lane_f32(output_ptr + 2, out0, 2);
       }
       for (m = 1; m < output_width - 2; m += 3) {
       }
@@ -1543,8 +1543,6 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
         }
       }
 
-#pragma omp parallel for
-
       for (int i = 1; i < output_height; i += 1) {
         for (int m = 1; m < output_width - 2; m += 3) {
           float *output_ptr = output_data + i * output_width + m;
@@ -1583,7 +1581,9 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
           if (if_relu) {
             out0 = vmaxq_f32(out0, zero);
           }
-          vst1q_f32(output_ptr, out0);
+          vst1q_lane_f32(output_ptr, out0, 0);
+          vst1q_lane_f32(output_ptr + 1, out0, 1);
+          vst1q_lane_f32(output_ptr + 2, out0, 2);
         }
         int m;
         for (m = 1; m < output_width - 2; m += 3) {
@@ -1635,258 +1635,242 @@ void DepthwiseConvAddBNRelu3x3s2p1v2(const Tensor *input, const Tensor *filter,
                                               : output_data[i * output_width];
         }
       }
-
-      input_data = input_data + inhxw;
-      output_data = output_data + outhxw;
-      filter_data = filter_data + 9;
     }
   }
 
-    //  const float *input_data = input->data<float>();
-    //  const float *filter_data = filter->data<float>();
-    //  float *output_data = output->data<float>();
-    //  const float *newscale_data = new_scale->data<float>();
-    //  const float *newbias_data = new_bias->data<float>();
-    //
-    //  float32x4_t vnewbias = vdupq_n_f32(0.0);
-    //  float32x4_t vnewscale = vdupq_n_f32(1.0);
-    //
-    //  const int in_h = static_cast<int>(input->dims()[2]);
-    //  const int in_w = static_cast<int>(input->dims()[3]);
-    //  const int out_h = static_cast<int>(output->dims()[2]);
-    //  const int out_w = static_cast<int>(output->dims()[3]);
-    //  const int out_l = out_h;
-    //  const int in_l = in_h;
-    //  const int inhxw = in_h * in_w;
-    //  const int outhxw = out_h * out_w;
-    //  const int if_pad = in_l - 1 == (out_l - 1) * 2 ? 1 : 0;
-    //  const int batch_size = static_cast<int>(input->dims()[0]);
-    //  const int c = static_cast<int>(input->dims()[1]);
-    //  const float *input_row_ptr;
-    //  float *output_row_ptr;
-    //
-    //  const int w_times = (out_w - 2) / 3;
-    //
-    //  float32x4x2_t input_buff_mid{}, input_buff_bottom[w_times + 1];
-    //  float32x4_t elewise_res0, elewise_res1, elewise_res2, res3;
-    //  int out2in_mid;
-    //  float32x4_t zero = vdupq_n_f32(0.0);
-    //  for (int b = batch_size; b > 0; --b) {
-    //    const float *filter_data_tmp = filter_data;
-    //    for (int j = 0; j < c; ++j) {
-    //      auto output_data_tmp = output_data + j * out_h * out_w;
-    //      auto input_data_tmp = input_data + j * in_h * in_w;
-    //      auto input_const = input_data_tmp;
-    //
-    //      vnewbias = vdupq_n_f32(newbias_data[j]);
-    //      vnewscale = vdupq_n_f32(newscale_data[j]);
-    //
-    //      float w00 = filter_data_tmp[0];
-    //      float w01 = filter_data_tmp[1];
-    //      float w02 = filter_data_tmp[2];
-    //      float w10 = filter_data_tmp[3];
-    //      float w11 = filter_data_tmp[4];
-    //      float w12 = filter_data_tmp[5];
-    //      float w20 = filter_data_tmp[6];
-    //      float w21 = filter_data_tmp[7];
-    //      float w22 = filter_data_tmp[8];
-    //
-    //      int h_mid = 0;
-    //
-    //      for (; h_mid < out_h - 1; h_mid++) {
-    //        input_row_ptr = input_data_tmp + 1 + h_mid * 2 * in_w;
-    //        output_row_ptr = output_data_tmp + 1 + h_mid * out_w;
-    //
-    //        for (int w4 = 0; w4 < w_times + 1; w4++) {
-    //          if (h_mid == 0) {
-    //            elewise_res1 = zero;
-    //            elewise_res0 = zero;
-    //            elewise_res2 = zero;
-    //          } else {
-    //            elewise_res1 = vmulq_n_f32(input_buff_bottom[w4].val[1], w01);
-    //            elewise_res0 = vmulq_n_f32(input_buff_bottom[w4].val[0], w00);
-    //            elewise_res2 = vmulq_n_f32(input_buff_bottom[w4].val[0], w02);
-    //          }
-    //          input_buff_mid = vld2q_f32(input_row_ptr);
-    //          input_buff_bottom[w4] = vld2q_f32(input_row_ptr + in_w);
-    //
-    //          elewise_res1 = vmlaq_n_f32(elewise_res1, input_buff_mid.val[1],
-    //          w11); elewise_res0 = vmlaq_n_f32(elewise_res0,
-    //          input_buff_mid.val[0], w10); elewise_res2 =
-    //          vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12);
-    //
-    //          elewise_res1 =
-    //              vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1],
-    //              w21);
-    //          elewise_res0 =
-    //              vmlaq_n_f32(elewise_res0, input_buff_bottom[w4].val[0],
-    //              w20);
-    //          elewise_res2 =
-    //              vmlaq_n_f32(elewise_res2, input_buff_bottom[w4].val[0],
-    //              w22);
-    //
-    //          res3 = vaddq_f32(vextq_f32(elewise_res2, zero, 1),
-    //                           vaddq_f32(elewise_res0, elewise_res1));
-    //          res3 = vmlaq_f32(vnewbias, vnewscale, res3);
-    //
-    //          if (if_relu) {
-    //            res3 = vmaxq_f32(res3, zero);
-    //          }
-    //          vst1q_f32(output_row_ptr, res3);
-    //
-    //          input_row_ptr += 6;
-    //          output_row_ptr += 3;
-    //        }
-    //      }
-    //      clock();
-    //
-    //      input_row_ptr = input_data_tmp + 1 + h_mid * 2 * in_w;
-    //      output_row_ptr = output_data_tmp + 1 + h_mid * out_w;
-    //
-    //      for (int w4 = 0; w4 < w_times + 1; w4++) {
-    //        elewise_res1 = vmulq_n_f32(input_buff_bottom[w4].val[1], w01);
-    //        elewise_res0 = vmulq_n_f32(input_buff_bottom[w4].val[0], w00);
-    //        elewise_res2 = vmulq_n_f32(input_buff_bottom[w4].val[0], w02);
-    //
-    //        input_buff_mid = vld2q_f32(input_row_ptr);
-    //        input_buff_bottom[w4] = vld2q_f32(input_row_ptr + in_w);
-    //
-    //        elewise_res1 = vmlaq_n_f32(elewise_res1, input_buff_mid.val[1],
-    //        w11); elewise_res0 = vmlaq_n_f32(elewise_res0,
-    //        input_buff_mid.val[0], w10); elewise_res2 =
-    //        vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12);
-    //
-    //        if (!if_pad) {
-    //          elewise_res1 =
-    //              vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1],
-    //              w21);
-    //          elewise_res0 =
-    //              vmlaq_n_f32(elewise_res0, input_buff_bottom[w4].val[0],
-    //              w20);
-    //          elewise_res2 =
-    //              vmlaq_n_f32(elewise_res2, input_buff_bottom[w4].val[0],
-    //              w22);
-    //        }
-    //        res3 = vaddq_f32(vextq_f32(elewise_res2, zero, 1),
-    //                         vaddq_f32(elewise_res0, elewise_res1));
-    //        res3 = vmlaq_f32(vnewbias, vnewscale, res3);
-    //
-    //        if (if_relu) {
-    //          res3 = vmaxq_f32(res3, zero);
-    //        }
-    //        if ((w4 != w_times)) {
-    //          vst1q_f32(output_row_ptr, res3);
-    //        } else {
-    //          if (out_l - 2 - w_times * 3 == 1) {
-    //            vst1q_lane_f32(output_row_ptr, res3, 0);
-    //          } else if (out_l - 2 - w_times * 3 == 2) {
-    //            vst1q_lane_f32(output_row_ptr, res3, 0);
-    //            vst1q_lane_f32(output_row_ptr + 1, res3, 1);
-    //          }
-    //        }
-    //        input_row_ptr += 6;
-    //        output_row_ptr += 3;
-    //      }
-    //
-    //      output_data_tmp[0] = input_const[0] * w11 + input_const[1] * w12 +
-    //                           input_const[in_l] * w21 +
-    //                           input_const[in_l + 1] * w22;
-    //
-    //      out2in_mid = (out_l - 1) * 2;
-    //      output_data_tmp[out_l - 1] =
-    //          w10 * input_const[out2in_mid - 1] + w11 *
-    //          input_const[out2in_mid] + w20 * input_const[out2in_mid + in_w -
-    //          1] + w21 * input_const[out2in_mid + in_w] + (1 - if_pad) * (w12
-    //          * input_const[out2in_mid + 1] +
-    //                          w22 * input_const[out2in_mid + in_w + 1]);
-    //
-    //      out2in_mid = (out_l - 1) * 2 * in_w;
-    //
-    //      output_data_tmp[out_l * (out_l - 1)] =
-    //          w01 * input_const[out2in_mid - in_w] +
-    //          w02 * input_const[out2in_mid - in_w + 1] +
-    //          w11 * input_const[out2in_mid] + w12 * input_const[out2in_mid +
-    //          1] + (1 - if_pad) * (w21 * input_const[out2in_mid + in_w] +
-    //                          w22 * input_const[out2in_mid + in_w + 1]);
-    //      out2in_mid = (out_l - 1) * 2 * in_w + (out_l - 1) * 2;
-    //
-    //      output_data_tmp[out_l * out_l - 1] =
-    //          w00 * input_const[out2in_mid - in_w - 1] +
-    //          w01 * input_const[out2in_mid - in_w] +
-    //          w10 * input_const[out2in_mid - 1] + w11 *
-    //          input_const[out2in_mid] + (1 - if_pad) * (w20 *
-    //          input_const[out2in_mid + in_w - 1] +
-    //                          w21 * input_const[out2in_mid + in_w] +
-    //                          w02 * input_const[out2in_mid - in_w + 1] +
-    //                          w12 * input_const[out2in_mid + 1] +
-    //                          w22 * input_const[out2in_mid + in_w + 1]);
-    //      output_data_tmp[0] =
-    //          output_data_tmp[0] * newscale_data[j] + newbias_data[j];
-    //      output_data_tmp[out_l - 1] =
-    //          output_data_tmp[out_l - 1] * newscale_data[j] + newbias_data[j];
-    //      output_data_tmp[out_l * (out_l - 1)] =
-    //          output_data_tmp[out_l * (out_l - 1)] * newscale_data[j] +
-    //          newbias_data[j];
-    //      output_data_tmp[out_l * out_l - 1] =
-    //          output_data_tmp[out_l * out_l - 1] * newscale_data[j] +
-    //          newbias_data[j];
-    //      if (if_relu) {
-    //        output_data_tmp[0] = output_data_tmp[0] < 0 ? 0 :
-    //        output_data_tmp[0]; output_data_tmp[out_l - 1] =
-    //            output_data_tmp[out_l - 1] < 0 ? 0 : output_data_tmp[out_l -
-    //            1];
-    //        output_data_tmp[out_l * (out_l - 1)] =
-    //            output_data_tmp[out_l * (out_l - 1)] < 0
-    //                ? 0
-    //                : output_data_tmp[out_l * (out_l - 1)];
-    //        output_data_tmp[out_l * out_l - 1] =
-    //            output_data_tmp[out_l * out_l - 1] < 0
-    //                ? 0
-    //                : output_data_tmp[out_l * out_l - 1];
-    //      }
-    //      for (int i = 1; i < out_h - 1; i++) {
-    //        out2in_mid = i * 2 * in_w;
-    //        output_data_tmp[i * out_l] = w01 * input_const[out2in_mid - in_w]
-    //        +
-    //                                     w02 * input_const[out2in_mid - in_w +
-    //                                     1] + w11 * input_const[out2in_mid] +
-    //                                     w12 * input_const[out2in_mid + 1] +
-    //                                     w21 * input_const[out2in_mid + in_w]
-    //                                     + w22 * input_const[out2in_mid + in_w
-    //                                     + 1];
-    //
-    //        out2in_mid = i * 2 * in_w + (out_l - 1) * 2;
-    //        output_data_tmp[i * out_l + out_l - 1] =
-    //            w00 * input_const[out2in_mid - in_w - 1] +
-    //            w01 * input_const[out2in_mid - in_w] +
-    //            w10 * input_const[out2in_mid - 1] + w11 *
-    //            input_const[out2in_mid] + w20 * input_const[out2in_mid + in_w
-    //            - 1] + w21 * input_const[out2in_mid + in_w] + (1 - if_pad) *
-    //            (w02 * input_const[out2in_mid - in_w + 1] +
-    //                            w12 * input_const[out2in_mid + 1] +
-    //                            w22 * input_const[out2in_mid + in_w + 1]);
-    //        output_data_tmp[i * out_l] =
-    //            output_data_tmp[i * out_l] * newscale_data[j] +
-    //            newbias_data[j];
-    //        output_data_tmp[i * out_l + out_l - 1] =
-    //            output_data_tmp[i * out_l + out_l - 1] * newscale_data[j] +
-    //            newbias_data[j];
-    //        if (if_relu) {
-    //          output_data_tmp[i * out_l] =
-    //              output_data_tmp[i * out_l] < 0 ? 0 : output_data_tmp[i *
-    //              out_l];
-    //          output_data_tmp[i * out_l + out_l - 1] =
-    //              output_data_tmp[i * out_l + out_l - 1] < 0
-    //                  ? 0
-    //                  : output_data_tmp[i * out_l + out_l - 1];
-    //        }
-    //      }
-    //      filter_data_tmp += 9;
-    //    }
-    //    input_data += inhxw * c;
-    //    output_data += outhxw * c;
-    //  }
+#else
+
+  const float *input_data = input->data<float>();
+  const float *filter_data = filter->data<float>();
+  float *output_data = output->data<float>();
+  const float *newscale_data = new_scale->data<float>();
+  const float *newbias_data = new_bias->data<float>();
+
+  float32x4_t vnewbias = vdupq_n_f32(0.0);
+  float32x4_t vnewscale = vdupq_n_f32(1.0);
+
+  const int in_h = static_cast<int>(input->dims()[2]);
+  const int in_w = static_cast<int>(input->dims()[3]);
+  const int out_h = static_cast<int>(output->dims()[2]);
+  const int out_w = static_cast<int>(output->dims()[3]);
+  const int out_l = out_h;
+  const int in_l = in_h;
+  const int inhxw = in_h * in_w;
+  const int outhxw = out_h * out_w;
+  const int if_pad = in_l - 1 == (out_l - 1) * 2 ? 1 : 0;
+  const int batch_size = static_cast<int>(input->dims()[0]);
+  const int c = static_cast<int>(input->dims()[1]);
+  const float *input_row_ptr;
+  float *output_row_ptr;
+
+  const int w_times = (out_w - 2) / 3;
+
+  float32x4x2_t input_buff_mid{}, input_buff_bottom[w_times + 1];
+  float32x4_t elewise_res0, elewise_res1, elewise_res2, res3;
+  int out2in_mid;
+  float32x4_t zero = vdupq_n_f32(0.0);
+  for (int b = batch_size; b > 0; --b) {
+    const float *filter_data_tmp = filter_data;
+    for (int j = 0; j < c; ++j) {
+      auto output_data_tmp = output_data + j * out_h * out_w;
+      auto input_data_tmp = input_data + j * in_h * in_w;
+      auto input_const = input_data_tmp;
+
+      vnewbias = vdupq_n_f32(newbias_data[j]);
+      vnewscale = vdupq_n_f32(newscale_data[j]);
+
+      float w00 = filter_data_tmp[0];
+      float w01 = filter_data_tmp[1];
+      float w02 = filter_data_tmp[2];
+      float w10 = filter_data_tmp[3];
+      float w11 = filter_data_tmp[4];
+      float w12 = filter_data_tmp[5];
+      float w20 = filter_data_tmp[6];
+      float w21 = filter_data_tmp[7];
+      float w22 = filter_data_tmp[8];
+
+      int h_mid = 0;
+
+      for (; h_mid < out_h - 1; h_mid++) {
+        input_row_ptr = input_data_tmp + 1 + h_mid * 2 * in_w;
+        output_row_ptr = output_data_tmp + 1 + h_mid * out_w;
+
+        for (int w4 = 0; w4 < w_times + 1; w4++) {
+          if (h_mid == 0) {
+            elewise_res1 = zero;
+            elewise_res0 = zero;
+            elewise_res2 = zero;
+          } else {
+            elewise_res1 = vmulq_n_f32(input_buff_bottom[w4].val[1], w01);
+            elewise_res0 = vmulq_n_f32(input_buff_bottom[w4].val[0], w00);
+            elewise_res2 = vmulq_n_f32(input_buff_bottom[w4].val[0], w02);
+          }
+          input_buff_mid = vld2q_f32(input_row_ptr);
+          input_buff_bottom[w4] = vld2q_f32(input_row_ptr + in_w);
+
+          elewise_res1 = vmlaq_n_f32(elewise_res1, input_buff_mid.val[1], w11);
+          elewise_res0 = vmlaq_n_f32(elewise_res0, input_buff_mid.val[0], w10);
+          elewise_res2 = vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12);
+
+          elewise_res1 =
+              vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1], w21);
+          elewise_res0 =
+              vmlaq_n_f32(elewise_res0, input_buff_bottom[w4].val[0], w20);
+          elewise_res2 =
+              vmlaq_n_f32(elewise_res2, input_buff_bottom[w4].val[0], w22);
+
+          res3 = vaddq_f32(vextq_f32(elewise_res2, zero, 1),
+                           vaddq_f32(elewise_res0, elewise_res1));
+          res3 = vmlaq_f32(vnewbias, vnewscale, res3);
+
+          if (if_relu) {
+            res3 = vmaxq_f32(res3, zero);
+          }
+          vst1q_f32(output_row_ptr, res3);
+
+          input_row_ptr += 6;
+          output_row_ptr += 3;
+        }
+      }
+      clock();
+
+      input_row_ptr = input_data_tmp + 1 + h_mid * 2 * in_w;
+      output_row_ptr = output_data_tmp + 1 + h_mid * out_w;
+
+      for (int w4 = 0; w4 < w_times + 1; w4++) {
+        elewise_res1 = vmulq_n_f32(input_buff_bottom[w4].val[1], w01);
+        elewise_res0 = vmulq_n_f32(input_buff_bottom[w4].val[0], w00);
+        elewise_res2 = vmulq_n_f32(input_buff_bottom[w4].val[0], w02);
+
+        input_buff_mid = vld2q_f32(input_row_ptr);
+        input_buff_bottom[w4] = vld2q_f32(input_row_ptr + in_w);
+
+        elewise_res1 = vmlaq_n_f32(elewise_res1, input_buff_mid.val[1], w11);
+        elewise_res0 = vmlaq_n_f32(elewise_res0, input_buff_mid.val[0], w10);
+        elewise_res2 = vmlaq_n_f32(elewise_res2, input_buff_mid.val[0], w12);
+
+        if (!if_pad) {
+          elewise_res1 =
+              vmlaq_n_f32(elewise_res1, input_buff_bottom[w4].val[1], w21);
+          elewise_res0 =
+              vmlaq_n_f32(elewise_res0, input_buff_bottom[w4].val[0], w20);
+          elewise_res2 =
+              vmlaq_n_f32(elewise_res2, input_buff_bottom[w4].val[0], w22);
+        }
+        res3 = vaddq_f32(vextq_f32(elewise_res2, zero, 1),
+                         vaddq_f32(elewise_res0, elewise_res1));
+        res3 = vmlaq_f32(vnewbias, vnewscale, res3);
+
+        if (if_relu) {
+          res3 = vmaxq_f32(res3, zero);
+        }
+        if ((w4 != w_times)) {
+          vst1q_f32(output_row_ptr, res3);
+        } else {
+          if (out_l - 2 - w_times * 3 == 1) {
+            vst1q_lane_f32(output_row_ptr, res3, 0);
+          } else if (out_l - 2 - w_times * 3 == 2) {
+            vst1q_lane_f32(output_row_ptr, res3, 0);
+            vst1q_lane_f32(output_row_ptr + 1, res3, 1);
+          }
+        }
+        input_row_ptr += 6;
+        output_row_ptr += 3;
+      }
+
+      output_data_tmp[0] = input_const[0] * w11 + input_const[1] * w12 +
+                           input_const[in_l] * w21 +
+                           input_const[in_l + 1] * w22;
+
+      out2in_mid = (out_l - 1) * 2;
+      output_data_tmp[out_l - 1] =
+          w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] +
+          w20 * input_const[out2in_mid + in_w - 1] +
+          w21 * input_const[out2in_mid + in_w] +
+          (1 - if_pad) * (w12 * input_const[out2in_mid + 1] +
+                          w22 * input_const[out2in_mid + in_w + 1]);
 
+      out2in_mid = (out_l - 1) * 2 * in_w;
+
+      output_data_tmp[out_l * (out_l - 1)] =
+          w01 * input_const[out2in_mid - in_w] +
+          w02 * input_const[out2in_mid - in_w + 1] +
+          w11 * input_const[out2in_mid] + w12 * input_const[out2in_mid + 1] +
+          (1 - if_pad) * (w21 * input_const[out2in_mid + in_w] +
+                          w22 * input_const[out2in_mid + in_w + 1]);
+      out2in_mid = (out_l - 1) * 2 * in_w + (out_l - 1) * 2;
+
+      output_data_tmp[out_l * out_l - 1] =
+          w00 * input_const[out2in_mid - in_w - 1] +
+          w01 * input_const[out2in_mid - in_w] +
+          w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] +
+          (1 - if_pad) * (w20 * input_const[out2in_mid + in_w - 1] +
+                          w21 * input_const[out2in_mid + in_w] +
+                          w02 * input_const[out2in_mid - in_w + 1] +
+                          w12 * input_const[out2in_mid + 1] +
+                          w22 * input_const[out2in_mid + in_w + 1]);
+      output_data_tmp[0] =
+          output_data_tmp[0] * newscale_data[j] + newbias_data[j];
+      output_data_tmp[out_l - 1] =
+          output_data_tmp[out_l - 1] * newscale_data[j] + newbias_data[j];
+      output_data_tmp[out_l * (out_l - 1)] =
+          output_data_tmp[out_l * (out_l - 1)] * newscale_data[j] +
+          newbias_data[j];
+      output_data_tmp[out_l * out_l - 1] =
+          output_data_tmp[out_l * out_l - 1] * newscale_data[j] +
+          newbias_data[j];
+      if (if_relu) {
+        output_data_tmp[0] = output_data_tmp[0] < 0 ? 0 : output_data_tmp[0];
+        output_data_tmp[out_l - 1] =
+            output_data_tmp[out_l - 1] < 0 ? 0 : output_data_tmp[out_l - 1];
+        output_data_tmp[out_l * (out_l - 1)] =
+            output_data_tmp[out_l * (out_l - 1)] < 0
+                ? 0
+                : output_data_tmp[out_l * (out_l - 1)];
+        output_data_tmp[out_l * out_l - 1] =
+            output_data_tmp[out_l * out_l - 1] < 0
+                ? 0
+                : output_data_tmp[out_l * out_l - 1];
+      }
+      for (int i = 1; i < out_h - 1; i++) {
+        out2in_mid = i * 2 * in_w;
+        output_data_tmp[i * out_l] = w01 * input_const[out2in_mid - in_w] +
+                                     w02 * input_const[out2in_mid - in_w + 1] +
+                                     w11 * input_const[out2in_mid] +
+                                     w12 * input_const[out2in_mid + 1] +
+                                     w21 * input_const[out2in_mid + in_w] +
+                                     w22 * input_const[out2in_mid + in_w + 1];
+
+        out2in_mid = i * 2 * in_w + (out_l - 1) * 2;
+        output_data_tmp[i * out_l + out_l - 1] =
+            w00 * input_const[out2in_mid - in_w - 1] +
+            w01 * input_const[out2in_mid - in_w] +
+            w10 * input_const[out2in_mid - 1] + w11 * input_const[out2in_mid] +
+            w20 * input_const[out2in_mid + in_w - 1] +
+            w21 * input_const[out2in_mid + in_w] +
+            (1 - if_pad) * (w02 * input_const[out2in_mid - in_w + 1] +
+                            w12 * input_const[out2in_mid + 1] +
+                            w22 * input_const[out2in_mid + in_w + 1]);
+        output_data_tmp[i * out_l] =
+            output_data_tmp[i * out_l] * newscale_data[j] + newbias_data[j];
+        output_data_tmp[i * out_l + out_l - 1] =
+            output_data_tmp[i * out_l + out_l - 1] * newscale_data[j] +
+            newbias_data[j];
+        if (if_relu) {
+          output_data_tmp[i * out_l] =
+              output_data_tmp[i * out_l] < 0 ? 0 : output_data_tmp[i * out_l];
+          output_data_tmp[i * out_l + out_l - 1] =
+              output_data_tmp[i * out_l + out_l - 1] < 0
+                  ? 0
+                  : output_data_tmp[i * out_l + out_l - 1];
+        }
+      }
+      filter_data_tmp += 9;
+    }
+    input_data += inhxw * c;
+    output_data += outhxw * c;
+  }
+#endif
 #endif
 }
 
diff --git a/src/operators/math/gemm.cpp b/src/operators/math/gemm.cpp
index 3730cf350a1399e5f3c1473fd1ce8d7b1d13b1b6..0fb454c89d66dabdcdd40c6590120016182c6629 100644
--- a/src/operators/math/gemm.cpp
+++ b/src/operators/math/gemm.cpp
@@ -2957,8 +2957,8 @@ void AddDot6x8(int k, const float *a, const float *b, float *c, int ldc) {
       "vmov.f32   q15,    #0.0          \n\t"
 
       "subs       %[kc1], %[kc1], #1    \n\t"
-      "blt        end_kc1_%=            \n\t"
-      "loop_kc1_%=:                     \n\t"
+      "blt        2f                    \n\t"
+      "1:                               \n\t"
 
       //      "pld        [%[a_ptr], #128]       \n\t"
       //      "pld        [%[b_ptr], #128]       \n\t"
@@ -3030,12 +3030,12 @@ void AddDot6x8(int k, const float *a, const float *b, float *c, int ldc) {
       "vmla.f32   q15,  q3,   d2[1]       \n\t"
 
       "subs       %[kc1], %[kc1], #1      \n\t"
-      "bge        loop_kc1_%=             \n\t"
-      "end_kc1_%=:                        \n\t"
+      "bge        1b                      \n\t"
+      "2:                                 \n\t"
 
       "subs       %[kc2], %[kc2], #1      \n\t"
-      "blt        end_kc2_%=              \n\t"
-      "loop_kc2_%=:                       \n\t"
+      "blt        4f                      \n\t"
+      "3:                                 \n\t"
 
       "vld1.32    {d0-d2},  [%[a_ptr]]!   \n\t"
       "vld1.32    {q2, q3}, [%[b_ptr]]!   \n\t"
@@ -3054,8 +3054,8 @@ void AddDot6x8(int k, const float *a, const float *b, float *c, int ldc) {
       "vmla.f32   q15,  q3,   d2[1]       \n\t"
 
       "subs       %[kc2], %[kc2], #1      \n\t"
-      "bge        loop_kc2_%=             \n\t"
-      "end_kc2_%=:                        \n\t"
+      "bge        3b                      \n\t"
+      "4:                                 \n\t"
 
       "mov        r5,     %[c]            \n\t"
       "mov        r6,     %[step]         \n\t"
@@ -3113,8 +3113,8 @@ void AddDot8x12(int k, const float *a, const float *b, float *c, int ldc) {
       "dup      v28.4s,    wzr     \n\t"
 
       "subs       %[kc1], %[kc1], #1    \n\t"
-      "blt        end_kc1_%=            \n\t"
-      "loop_kc1_%=:                     \n\t"
+      "blt        2f                    \n\t"
+      "1:                               \n\t"
 
       "prfm     pldl1keep,         [%[a_ptr],   #32]  \n\t"
       "prfm     pldl1keep,         [%[b_ptr],   #48]  \n\t"
@@ -3149,8 +3149,8 @@ void AddDot8x12(int k, const float *a, const float *b, float *c, int ldc) {
       "fmla     v28.4s,   v4.4s,   v1.s[3]       \n\t"
 
       "subs       %[kc1], %[kc1], #1      \n\t"
-      "bge        loop_kc1_%=             \n\t"
-      "end_kc1_%=:                        \n\t"
+      "bge        1b                      \n\t"
+      "2:                                 \n\t"
 
       "st1      {v5.4s,   v6.4s,  v7.4s},    [%[c]],   %[step]   \n\t"
       "st1      {v8.4s,   v9.4s,  v10.4s},   [%[c]],   %[step]   \n\t"
@@ -3205,8 +3205,8 @@ void AddDot6x16(int k, const float *a, const float *b, float *c, int ldc) {
       "dup      v29.4s,    wzr     \n\t"
 
       "subs       %[kc1], %[kc1], #1    \n\t"
-      "blt        end_kc1_%=            \n\t"
-      "loop_kc1_%=:                     \n\t"
+      "blt        2f                    \n\t"
+      "1:                               \n\t"
 
       "prfm   pldl1keep,  [%[a_ptr],  #24]  \n\t"
       "prfm   pldl1keep,  [%[b_ptr],  #64]  \n\t"
@@ -3245,8 +3245,8 @@ void AddDot6x16(int k, const float *a, const float *b, float *c, int ldc) {
       "fmla     v29.4s,   v5.4s,   v1.s[1]       \n\t"
 
       "subs       %[kc1], %[kc1], #1      \n\t"
-      "bge        loop_kc1_%=             \n\t"
-      "end_kc1_%=:                        \n\t"
+      "bge        1b                      \n\t"
+      "2:                                 \n\t"
 
       "st1      {v6.4s,  v7.4s,  v8.4s,  v9.4s},    [%[c]],   %[step]   \n\t"
       "st1      {v10.4s, v11.4s, v12.4s, v13.4s},   [%[c]],   %[step]   \n\t"
diff --git a/src/operators/op_param.h b/src/operators/op_param.h
index 23bd5412ca55a2d54af80e35e37fd58df99aa9da..2ddc594bcc06f5b9ba2ac9ca9a91c74aca9812c2 100644
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -24,7 +24,7 @@ limitations under the License. */
 #include "framework/tensor.h"
 #include "framework/variable.h"
 #ifdef PADDLE_MOBILE_FPGA
-#include "fpga/api/fpga_api.h"
+#include "fpga/api.h"
 #endif
 
 namespace paddle_mobile {
@@ -73,6 +73,11 @@ struct DtypeTensorTrait<GPU_MALI> {
 
 class OpParam {
  protected:
+  template <typename T>
+  static T *InputAlphaFrom(const VariableNameMap &inputs, const Scope &scope) {
+    return GetVarValue<T>("Alpha", inputs, scope);
+  }
+
   template <typename T>
   static T *InputFrom(const VariableNameMap &inputs, const Scope &scope) {
     return GetVarValue<T>("Input", inputs, scope);
@@ -248,7 +253,7 @@ class ConvParam : OpParam {
 
   const RType *Input() const { return input_; }
 
-  const RType *Filter() const { return filter_; }
+  RType *Filter() const { return filter_; }
 
   RType *Output() const { return output_; }
 
@@ -655,6 +660,21 @@ class SoftmaxParam : public OpParam {
  private:
   RType *input_x_;
   RType *out_;
+
+#ifdef PADDLE_MOBILE_FPGA
+
+ private:
+  std::shared_ptr<RType> float_input_x_;
+  fpga::BypassArgs fpga_bypass_args;
+
+ public:
+  RType *FloatInput() {
+    return float_input_x_ == nullptr ? input_x_ : float_input_x_.get();
+  }
+  void SetFloatInput(Tensor *input) { float_input_x_.reset(input); }
+  const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; }
+  void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; }
+#endif
 };
 #endif
 
@@ -752,16 +772,6 @@ class FeedParam : public OpParam {
   RType *input_x_;
   RType *out_;
   int batch_size;
-
-#ifdef PADDLE_MOBILE_FPGA
-
- private:
-  fpga::BypassArgs fpga_bypass_args;
-
- public:
-  const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; }
-  void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; }
-#endif
 };
 
 template <typename Dtype>
@@ -1009,19 +1019,24 @@ class PReluParam : public OpParam {
  public:
   PReluParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
              const AttributeMap &attrs, const Scope &scope) {
+    DLOG << "PReluParam inputs before";
     input_x_ = InputXFrom<GType>(inputs, scope);
+    alpha_ = InputAlphaFrom<GType>(inputs, scope);
+    framework::DDim dims = alpha_->dims();
     out_ = OutFrom<GType>(outputs, scope);
-    slopes_ = GetAttr<vector<float>>("slopes", attrs);
+    mode_ = GetAttr<std::string>("mode", attrs);
+    DLOG << "PReluParam mode after" << mode_;
   }
-
   const RType *InputX() const { return input_x_; }
+  const RType *InputAlpha() const { return alpha_; }
   RType *Out() const { return out_; }
-  const vector<float> &Slopes() const { return slopes_; }
+  const std::string &Mode() const { return mode_; }
 
  private:
   RType *input_x_;
   RType *out_;
-  vector<float> slopes_;
+  RType *alpha_;
+  std::string mode_;
 };
 #endif
 
@@ -1043,7 +1058,11 @@ class FusionFcParam : public OpParam {
   }
   const RType *InputX() const { return input_x_; }
 
+#ifdef PADDLE_MOBILE_FPGA
+  RType *InputY() const { return input_y_; }
+#else
   const RType *InputY() const { return input_y_; }
+#endif
 
   const RType *InputZ() const { return input_z_; }
 
@@ -1104,7 +1123,11 @@ class FusionConvAddParam : public OpParam {
 
   const RType *Input() const { return input_; }
 
+#ifdef PADDLE_MOBILE_FPGA
+  RType *Filter() const { return filter_; }
+#else
   const RType *Filter() const { return filter_; }
+#endif
 
   RType *Output() const { return output_; }
 
@@ -1184,7 +1207,11 @@ class FusionConvAddBNReluParam : public OpParam {
 
   const RType *Input() const { return input_; }
 
+#ifdef PADDLE_MOBILE_FPGA
+  RType *Filter() const { return filter_; }
+#else
   const RType *Filter() const { return filter_; }
+#endif
 
   RType *Output() const { return output_; }
 
@@ -1249,6 +1276,99 @@ class FusionConvAddBNReluParam : public OpParam {
 };
 #endif
 
+#ifdef FUSION_CONVBN_OP
+template <typename Dtype>
+class FusionConvBNParam : public OpParam {
+ typedef typename DtypeTensorTrait<Dtype>::gtype GType;
+ typedef typename DtypeTensorTrait<Dtype>::rtype RType;
+ public:
+  FusionConvBNParam(const VariableNameMap &inputs,
+                    const VariableNameMap &outputs, const AttributeMap &attrs,
+                    const Scope &scope) {
+    filter_ = FilterFrom<GType>(inputs, scope);
+    input_ = InputFrom<GType>(inputs, scope);
+    output_y_ = OutputYFrom<GType>(outputs, scope);
+    strides_ = GetAttr<vector<int>>("strides", attrs);
+    paddings_ = GetAttr<vector<int>>("paddings", attrs);
+    dilations_ = GetAttr<vector<int>>("dilations", attrs);
+    groups = GetAttr<int>("groups", attrs);
+    input_bias_ = InputBiasFrom<GType>(inputs, scope);
+    input_mean_ = InputMeanFrom<GType>(inputs, scope);
+    input_scale_ = InputScaleFrom<GType>(inputs, scope);
+    input_variance_ = InputVarianceFrom<GType>(inputs, scope);
+    epsilon_ = GetAttr<float>("epsilon", attrs);
+    momentum_ = GetAttr<float>("momentum", attrs);
+    //    is_test_ = GetAttr<bool>("is_test", attrs);
+  }
+
+  const RType *Input() const { return input_; }
+
+#ifdef PADDLE_MOBILE_FPGA
+  RType *Filter() const { return filter_; }
+#else
+  const RType *Filter() const { return filter_; }
+#endif
+  RType *Output() const { return output_y_; }
+
+  const vector<int> &Strides() const { return strides_; }
+
+  const vector<int> &Paddings() const { return paddings_; }
+
+  const vector<int> &Dilations() const { return dilations_; }
+
+  const int &Groups() const { return groups; }
+
+  const RType *InputBias() const { return input_bias_; }
+
+  const RType *InputMean() const { return input_mean_; }
+
+  const RType *InputScale() const { return input_scale_; }
+
+  const RType *InputVariance() const { return input_variance_; }
+
+  const float &Epsilon() const { return epsilon_; }
+
+  const float &Momentum() const { return momentum_; }
+
+  const bool &IsTest() const { return is_test_; }
+
+  void SetNewScale(RType *new_scale) { new_scale_ = new_scale; }
+
+  void SetNewBias(RType *new_bias) { new_bias_ = new_bias; }
+
+  const RType *NewScale() const { return new_scale_; }
+
+  const RType *NewBias() const { return new_bias_; }
+
+ protected:
+  RType *input_;
+  RType *output_y_;
+  RType *filter_;
+  vector<int> strides_;
+  vector<int> paddings_;
+  vector<int> dilations_;
+  int groups;
+  RType *input_bias_;
+  RType *input_mean_;
+  RType *input_scale_;
+  RType *input_variance_;
+  float epsilon_;
+  float momentum_;
+  bool is_test_;
+  RType *new_bias_;
+  RType *new_scale_;
+#ifdef PADDLE_MOBILE_FPGA
+
+ private:
+  fpga::ConvArgs fpga_conv_args;
+
+ public:
+  const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
+  void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
+#endif
+};
+#endif
+
 #ifdef FUSION_CONVADDBN_OP
 template <typename Dtype>
 class FusionConvAddBNParam : public OpParam {
@@ -1282,8 +1402,11 @@ class FusionConvAddBNParam : public OpParam {
 
   const RType *Input() const { return input_; }
 
+#ifdef PADDLE_MOBILE_FPGA
+  RType *Filter() const { return filter_; }
+#else
   const RType *Filter() const { return filter_; }
-
+#endif
   RType *Output() const { return output_y_; }
 
   const vector<int> &Strides() const { return strides_; }
@@ -1459,7 +1582,11 @@ class FusionConvBNReluParam : public OpParam {
 
   const RType *Input() const { return input_; }
 
+#ifdef PADDLE_MOBILE_FPGA
+  RType *Filter() const { return filter_; }
+#else
   const RType *Filter() const { return filter_; }
+#endif
 
   RType *Output() const { return output_; }
 
@@ -1510,6 +1637,15 @@ class FusionConvBNReluParam : public OpParam {
   bool is_test_;
   RType *new_bias_;
   RType *new_scale_;
+#ifdef PADDLE_MOBILE_FPGA
+
+ private:
+  fpga::ConvArgs fpga_conv_args;
+
+ public:
+  const fpga::ConvArgs &FpgaArgs() const { return fpga_conv_args; }
+  void SetFpgaArgs(const fpga::ConvArgs &args) { fpga_conv_args = args; }
+#endif
 };
 #endif
 
diff --git a/src/operators/softmax_op.cpp b/src/operators/softmax_op.cpp
index c9edfccf4ff08e5a12d735526c3d63c689711357..e85edc69c3291c794f2eeb8119b91b2926c4d870 100644
--- a/src/operators/softmax_op.cpp
+++ b/src/operators/softmax_op.cpp
@@ -34,6 +34,7 @@ REGISTER_OPERATOR_CPU(softmax, ops::SoftmaxOp);
 REGISTER_OPERATOR_MALI_GPU(softmax, ops::SoftmaxOp);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_FPGA(softmax, ops::SoftmaxOp);
 #endif
 
 #endif
diff --git a/src/operators/softmax_op.h b/src/operators/softmax_op.h
index 967524f6e90be404116b84c7ea3b2e999843e2e8..579a2ed605cb3f3c8c4a3d0c2f1ccc7bd9595fc2 100644
--- a/src/operators/softmax_op.h
+++ b/src/operators/softmax_op.h
@@ -55,6 +55,7 @@ USE_OP_CPU(softmax);
 USE_OP_MALI_GPU(softmax);
 #endif
 #ifdef PADDLE_MOBILE_FPGA
+USE_OP_FPGA(softmax);
 #endif
 
 #endif
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index f4a14f1bc4197051594a0f8609b4662ad4c7cefb..8f92b6dab9e5c2c51c485f61fa2860926ce50b1f 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -9,6 +9,11 @@ elseif ("mobilenet" IN_LIST NET)
     # gen test
     ADD_EXECUTABLE(test-mobilenet net/test_mobilenet.cpp test_helper.h  test_include.h executor_for_test.h)
     target_link_libraries(test-mobilenet paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-mobilenet-combine paddle-mobile)
+
 elseif ("yolo" IN_LIST NET)
     # gen test
     ADD_EXECUTABLE(test-yolo net/test_yolo.cpp test_helper.h  test_include.h executor_for_test.h)
@@ -22,6 +27,18 @@ elseif("resnet" IN_LIST NET)
     ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h  test_include.h executor_for_test.h)
     target_link_libraries(test-resnet paddle-mobile)
 elseif("FPGAnets" IN_LIST NET)
+    ADD_EXECUTABLE(test-resnet net/test_resnet.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-resnet paddle-mobile)
+
+    ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-tensor-quant paddle-mobile)
+
+    ADD_EXECUTABLE(test-fpga-concat-op fpga/test_concat_op.cpp test_helper.h  test_include.h)
+    target_link_libraries(test-fpga-concat-op paddle-mobile)
+elseif("mobilenetssd" IN_LIST NET)
+    # gen test
+    ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-mobilenetssd paddle-mobile)
 else ()
 
     # gen test
@@ -138,6 +155,14 @@ else ()
     ADD_EXECUTABLE(test-mobilenetssd net/test_mobilenet+ssd.cpp test_helper.h  test_include.h executor_for_test.h)
     target_link_libraries(test-mobilenetssd paddle-mobile)
 
+     # gen test
+    ADD_EXECUTABLE(test-mobilenet-combine net/test_mobilenet_combine.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-mobilenet-combine paddle-mobile)
+
+    # gen test
+    ADD_EXECUTABLE(test-genet net/test_genet_combine.cpp test_helper.h  test_include.h executor_for_test.h)
+    target_link_libraries(test-genet paddle-mobile)
+
     # gen test
     ADD_EXECUTABLE(test-sigmoid operators/test_sigmoid_op.cpp  test_include.h)
     target_link_libraries(test-sigmoid paddle-mobile)
@@ -164,8 +189,7 @@ else ()
 
 endif()
 
-if(FPGA)
-    ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h  test_include.h executor_for_test.h)
-    target_link_libraries(test-tensor-quant paddle-mobile)
-
-endif()
+# if(FPGA)
+#     ADD_EXECUTABLE(test-tensor-quant fpga/test_tensor_quant.cpp test_helper.h  test_include.h executor_for_test.h)
+#     target_link_libraries(test-tensor-quant paddle-mobile)
+# endif()
diff --git a/test/common/test_gemm_perf.cpp b/test/common/test_gemm_perf.cpp
index c505c61fce21775136a368949a451999b97b3069..386c09d71a3d5709842991bffd2e8ea039edc940 100644
--- a/test/common/test_gemm_perf.cpp
+++ b/test/common/test_gemm_perf.cpp
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include <iostream>
 #include "../test_helper.h"
+#include "../test_include.h"
 #include "operators/math/gemm.h"
 #include "operators/math/math_function.h"
 
@@ -26,6 +27,8 @@ limitations under the License. */
 #define k 1024
 
 int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
   Tensor aa, bb, cc, scale, bias;
   auto aaptr = aa.mutable_data<float>({m, k});
   auto bbptr = bb.mutable_data<float>({k, n});
diff --git a/test/fpga/test_concat_op.cpp b/test/fpga/test_concat_op.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5d1a5828b36b3d9ed371a271af6db82657ff1596
--- /dev/null
+++ b/test/fpga/test_concat_op.cpp
@@ -0,0 +1,87 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "../test_include.h"
+#include "operators/concat_op.h"
+
+int main() {
+  paddle_mobile::Loader<paddle_mobile::FPGA> loader;
+  auto program = loader.Load(g_googlenet);
+  PADDLE_MOBILE_ENFORCE(program.originProgram != nullptr,
+                        "program file read fail");
+
+  Executor4Test<paddle_mobile::FPGA,
+                paddle_mobile::operators::ConcatOp<paddle_mobile::FPGA, float>>
+      executor(program, "concat");
+
+  // 1. input_tensors;
+  vector<Tensor> input_tensors;
+
+  Tensor input1;
+  auto input1_data = CreateInput<float>(&input1, {4, 10, 2, 2}, 0, 1);
+  input_tensors.push_back(input1);
+  Tensor input2;
+  auto input2_data = CreateInput<float>(&input2, {4, 20, 2, 2}, 0, 1);
+  input_tensors.push_back(input2);
+  Tensor input3;
+  auto input3_data = CreateInput<float>(&input3, {4, 30, 2, 2}, 0, 1);
+  input_tensors.push_back(input3);
+  Tensor input4;
+  auto input4_data = CreateInput<float>(&input4, {4, 40, 2, 2}, 0, 1);
+  input_tensors.push_back(input4);
+  // 2. input_names
+  vector<string> input_names({
+      "conv2d_3.tmp_1",
+      "conv2d_5.tmp_1",
+      "conv2d_7.tmp_1",
+      "conv2d_8.tmp_1",
+  });
+
+  // 3. output_names
+  vector<string> output_names({"concat_0.tmp_0"});
+
+  // 4. out_dims;
+  vector<DDim> out_ddims;
+  auto out_ddim = paddle_mobile::framework::make_ddim({3, 100, 2, 2});
+  out_ddims.push_back(out_ddim);
+
+  auto output = executor.Predict<LoDTensor>(input_tensors, input_names,
+                                            output_names, out_ddims);
+
+  auto output0_data = output[0]->data<float>();
+
+  // 5. test one example.
+  int input_n = 1;
+  int input_c = 2;
+  int input_h = 0;
+  int input_w = 1;
+  int stride0 = input3.numel() / input3.dims()[0];
+  int stride1 = input3.numel() / input3.dims()[0] / input3.dims()[1];
+  int stride2 = input3.dims()[3];
+  /// inputx1 (4,10,2,2),
+  /// inputx2 (4,20,2,2),
+  /// inputx3 (4,30,2,2),
+  /// inputx4 (4,40,2,2),
+  /// axis = 1
+  /// output (4,100,2,2)
+  int input_index =
+      input_n * stride0 + input_c * stride1 + input_h * stride2 + input_w;
+  int output_index = input_n * 100 * 2 * 2 +
+                     (input_c + input1.dims()[1] + input2.dims()[1]) * 2 * 2 +
+                     input_h * 2 + input_w;
+
+  DLOG << " input3 [1, 2,0,1] = " << input3_data[input_index];
+  DLOG << " output [1,32,0,1] = " << output0_data[output_index];
+  return 0;
+}
diff --git a/test/fpga/test_tensor_quant.cpp b/test/fpga/test_tensor_quant.cpp
index 3835c395a4764c3c978b6bba9c1af48305be1d58..6cfc27e91ced109e41bf5420649dbb762ee94d66 100644
--- a/test/fpga/test_tensor_quant.cpp
+++ b/test/fpga/test_tensor_quant.cpp
@@ -12,23 +12,34 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <fstream>
+#include <iostream>
 #include "../test_helper.h"
 #include "../test_include.h"
 
 int main() {
-  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
-  bool optimize = false;
-  if (paddle_mobile.Load(g_googlenet, optimize)) {
-    auto time1 = time();
-    DLOG << "load cost: " << time_diff(time1, time1) << "ms";
-    std::vector<float> input;
-    std::vector<int64_t> dims{1, 3, 224, 224};
-    GetInput<float>(g_test_image_1x3x224x224, &input, dims);
+  paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  auto time1 = time();
+  if (paddle_mobile.Load(g_resnet, true)) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+    std::vector<int64_t> dims{1, 3, 32, 32};
+    Tensor input_tensor;
+    SetupTensor<float>(&input_tensor, {1, 3, 32, 32}, static_cast<float>(0),
+                       static_cast<float>(1));
+
+    std::vector<float> input(input_tensor.data<float>(),
+                             input_tensor.data<float>() + input_tensor.numel());
+    // 预热一次
+    paddle_mobile.Predict(input, dims);
     auto time3 = time();
-    auto vec_result = paddle_mobile.Predict(input, dims);
+    for (int i = 0; i < 10; ++i) {
+      paddle_mobile.Predict(input, dims);
+    }
     auto time4 = time();
-    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+    std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
+              << std::endl;
   }
+
   return 0;
 }
diff --git a/test/net/test_genet_combine.cpp b/test/net/test_genet_combine.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e6b0505a670f1a58ed7d09cc4854ef52b05b0649
--- /dev/null
+++ b/test/net/test_genet_combine.cpp
@@ -0,0 +1,51 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  auto time1 = time();
+  if (paddle_mobile.Load(std::string(g_genet_combine) + "/model",
+                         std::string(g_genet_combine) + "/params", true)) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+
+    std::vector<float> input;
+    std::vector<int64_t> dims{1, 3, 128, 128};
+    GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
+
+    // 预热一次
+    auto vec_result = paddle_mobile.Predict(input, dims);
+    std::vector<float>::iterator biggest =
+        std::max_element(std::begin(vec_result), std::end(vec_result));
+    std::cout << " Max element is " << *biggest << " at position "
+              << std::distance(std::begin(vec_result), biggest) << std::endl;
+
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      auto vec_result = paddle_mobile.Predict(input, dims);
+    }
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
+  }
+  std::cout
+      << "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?"
+      << std::endl;
+  return 0;
+}
diff --git a/test/net/test_googlenet.cpp b/test/net/test_googlenet.cpp
index 02882bedb01df49b8032325e506c9118f3434a2f..b98d07ad16dcb15268e4638f9144bde36a1005a8 100644
--- a/test/net/test_googlenet.cpp
+++ b/test/net/test_googlenet.cpp
@@ -17,7 +17,14 @@ limitations under the License. */
 #include "../test_include.h"
 
 int main() {
+#ifdef PADDLE_MOBILE_FPGA
+  paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
+#endif
+
+#ifdef PADDLE_MOBILE_CPU
   paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+#endif
+
   paddle_mobile.SetThreadNum(4);
   bool optimize = true;
   auto time1 = time();
diff --git a/test/net/test_mobilenet.cpp b/test/net/test_mobilenet.cpp
index d7793f729866024e2560ad13ac5613172eecc4dd..56234c3c72b58869775238d78875c8bd3b94cf7c 100644
--- a/test/net/test_mobilenet.cpp
+++ b/test/net/test_mobilenet.cpp
@@ -20,7 +20,11 @@ int main() {
   paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
   paddle_mobile.SetThreadNum(4);
   auto time1 = time();
-  if (paddle_mobile.Load(g_mobilenet, true)) {
+  //  auto isok = paddle_mobile.Load(std::string(g_mobilenet_detect) + "/model",
+  //                     std::string(g_mobilenet_detect) + "/params", true);
+
+  auto isok = paddle_mobile.Load(g_mobilenet, true);
+  if (isok) {
     auto time2 = time();
     std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
 
@@ -39,10 +43,14 @@ int main() {
     for (int i = 0; i < 10; ++i) {
       auto vec_result = paddle_mobile.Predict(input, dims);
     }
+    DLOG << vec_result;
     auto time4 = time();
     std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
               << std::endl;
   }
 
+  std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
+               "是否存在?"
+            << std::endl;
   return 0;
 }
diff --git a/test/net/test_mobilenet_combine.cpp b/test/net/test_mobilenet_combine.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..af93d105ea0c290b1dd3a80310a39e0f52c8abaa
--- /dev/null
+++ b/test/net/test_mobilenet_combine.cpp
@@ -0,0 +1,51 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <iostream>
+#include "../test_helper.h"
+#include "../test_include.h"
+
+int main() {
+  paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
+  auto time1 = time();
+  if (paddle_mobile.Load(std::string(g_mobilenet_combined) + "/model",
+                         std::string(g_mobilenet_combined) + "/params", true)) {
+    auto time2 = time();
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
+
+    std::vector<float> input;
+    std::vector<int64_t> dims{1, 3, 224, 224};
+    GetInput<float>(g_test_image_1x3x224x224_banana, &input, dims);
+
+    // 预热一次
+    auto vec_result = paddle_mobile.Predict(input, dims);
+    std::vector<float>::iterator biggest =
+        std::max_element(std::begin(vec_result), std::end(vec_result));
+    std::cout << " Max element is " << *biggest << " at position "
+              << std::distance(std::begin(vec_result), biggest) << std::endl;
+
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      auto vec_result = paddle_mobile.Predict(input, dims);
+    }
+    auto time4 = time();
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
+  }
+  std::cout
+      << "如果结果Nan请查看: test/images/test_image_1x3x224x224_float 是否存在?"
+      << std::endl;
+  return 0;
+}
diff --git a/test/net/test_resnet.cpp b/test/net/test_resnet.cpp
index 883ad95392ad351a2634e1a56ac050f02d8767e6..82fdc22763d11d4b06439465d56d0e6fa663a317 100644
--- a/test/net/test_resnet.cpp
+++ b/test/net/test_resnet.cpp
@@ -12,16 +12,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <fstream>
+#include <iostream>
 #include "../test_helper.h"
 #include "../test_include.h"
 
 int main() {
+#ifdef PADDLE_MOBILE_FPGA
+  paddle_mobile::PaddleMobile<paddle_mobile::FPGA> paddle_mobile;
+#endif
+
+#ifdef PADDLE_MOBILE_CPU
   paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+#endif
+  paddle_mobile.SetThreadNum(4);
   auto time1 = time();
-  if (paddle_mobile.Load(g_resnet, false)) {
+  if (paddle_mobile.Load(g_resnet, true)) {
     auto time2 = time();
-    DLOG << "load cost :" << time_diff(time1, time1) << "ms";
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
     std::vector<int64_t> dims{1, 3, 32, 32};
     Tensor input_tensor;
     SetupTensor<float>(&input_tensor, {1, 3, 32, 32}, static_cast<float>(0),
@@ -29,10 +36,15 @@ int main() {
 
     std::vector<float> input(input_tensor.data<float>(),
                              input_tensor.data<float>() + input_tensor.numel());
-    auto time3 = time();
+    // 预热一次
     paddle_mobile.Predict(input, dims);
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      paddle_mobile.Predict(input, dims);
+    }
     auto time4 = time();
-    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+    std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
+              << std::endl;
   }
 
   return 0;
diff --git a/test/net/test_squeezenet.cpp b/test/net/test_squeezenet.cpp
index 39d4687ff3de37c571ee89213485fb0b6bc939df..5d89618859d47fd7d61d61871583e1ebbca3db33 100644
--- a/test/net/test_squeezenet.cpp
+++ b/test/net/test_squeezenet.cpp
@@ -12,18 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <fstream>
+#include <iostream>
 #include "../test_helper.h"
 #include "../test_include.h"
 
 int main() {
   paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
   //  ../../../test/models/googlenet
   //  ../../../test/models/mobilenet
   auto time1 = time();
-  if (paddle_mobile.Load(g_squeezenet, false)) {
+  if (paddle_mobile.Load(g_squeezenet, true)) {
     auto time2 = time();
-    DLOG << "load cost :" << time_diff(time1, time1) << "ms";
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
     std::vector<int64_t> dims{1, 3, 227, 227};
     Tensor input_tensor;
     SetupTensor<float>(&input_tensor, {1, 3, 227, 227}, static_cast<float>(0),
@@ -31,10 +32,15 @@ int main() {
 
     std::vector<float> input(input_tensor.data<float>(),
                              input_tensor.data<float>() + input_tensor.numel());
-    auto time3 = time();
+    // 预热一次
     paddle_mobile.Predict(input, dims);
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      paddle_mobile.Predict(input, dims);
+    }
     auto time4 = time();
-    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
   }
 
   return 0;
diff --git a/test/net/test_yolo.cpp b/test/net/test_yolo.cpp
index 65dec59ad0579d362c75ae6ec1d362fb957d4fc5..ffe3cdc22c4f847da2503192660a99f7f6d62e37 100644
--- a/test/net/test_yolo.cpp
+++ b/test/net/test_yolo.cpp
@@ -12,18 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
-#include <fstream>
+#include <iostream>
 #include "../test_helper.h"
 #include "../test_include.h"
 
 int main() {
   paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile;
+  paddle_mobile.SetThreadNum(4);
   //  ../../../test/models/googlenet
   //  ../../../test/models/mobilenet
   auto time1 = time();
-  if (paddle_mobile.Load(g_yolo, false)) {
+  if (paddle_mobile.Load(g_yolo, true)) {
     auto time2 = time();
-    DLOG << "load cost :" << time_diff(time1, time1) << "ms";
+    std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
 
     std::vector<int64_t> dims{1, 3, 227, 227};
     Tensor input_tensor;
@@ -32,10 +33,15 @@ int main() {
 
     std::vector<float> input(input_tensor.data<float>(),
                              input_tensor.data<float>() + input_tensor.numel());
-    auto time3 = time();
+    // 预热一次
     paddle_mobile.Predict(input, dims);
+    auto time3 = time();
+    for (int i = 0; i < 10; ++i) {
+      paddle_mobile.Predict(input, dims);
+    }
     auto time4 = time();
-    DLOG << "predict cost :" << time_diff(time3, time4) << "ms";
+    std::cout << "predict cost :" << time_diff(time3, time4) / 10 << "ms"
+              << std::endl;
   }
   return 0;
 }
diff --git a/test/operators/test_fusion_conv_add_bn_relu_op.cpp b/test/operators/test_fusion_conv_add_bn_relu_op.cpp
index 81400d987195364c06b4b93d0859469b43f90e7b..7764d95ed72da613459233bd55ddcffdc444318f 100644
--- a/test/operators/test_fusion_conv_add_bn_relu_op.cpp
+++ b/test/operators/test_fusion_conv_add_bn_relu_op.cpp
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
 
+#include <iostream>
 #include "../test_include.h"
 #include "operators/fusion_conv_add_bn_relu_op.h"
 
diff --git a/test/test_helper.h b/test/test_helper.h
index 658af447d6cfcd85c68ff350b104c2468d442e40..69ffa58847f2395dec59d87abae4128d885dd19a 100644
--- a/test/test_helper.h
+++ b/test/test_helper.h
@@ -26,7 +26,10 @@ limitations under the License. */
 
 static const char *g_ocr = "../models/ocr";
 static const char *g_mobilenet_ssd = "../models/mobilenet+ssd";
+static const char *g_genet_combine = "../models/enet";
 static const char *g_mobilenet_ssd_gesture = "../models/mobilenet+ssd_gesture";
+static const char *g_mobilenet_combined = "../models/mobilenet_combine";
+static const char *g_mobilenet_detect = "../models/mobilenet-detect";
 static const char *g_squeezenet = "../models/squeezenet";
 static const char *g_googlenet = "../models/googlenet";
 static const char *g_mobilenet = "../models/mobilenet";
diff --git a/tools/build.sh b/tools/build.sh
index bf3545ef162c86c16c0877f5f25f3a1e09de1fd4..4d877720a32c6c79df6aca60a62a4da7e997d803 100755
--- a/tools/build.sh
+++ b/tools/build.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 NETS=""
-declare -a supportedNets=("googlenet" "mobilenet" "yolo" "squeezenet" "resnet")
+declare -a supportedNets=("googlenet" "mobilenet" "yolo" "squeezenet" "resnet" "mobilenetssd")
 
 build_for_mac() {
     if [ ! `which brew` ]; then
diff --git a/tools/op.cmake b/tools/op.cmake
index 6b6cb13dbc49b2a6cd672ea4e637f6650d60f8d2..74f296646e7df7991e59daedc34ecc68895ad5cb 100644
--- a/tools/op.cmake
+++ b/tools/op.cmake
@@ -21,7 +21,7 @@ if ("mobilenet" IN_LIST NET)
   set(ELEMENTWISEADD_OP ON)
   set(RELU_OP ON)
   set(SOFTMAX_OP ON)
-  set(SOFTMAX_OP ON)
+  set(MUL_OP ON)
   set(DEPTHWISECONV_OP ON)
   set(BATCHNORM_OP ON)
   set(POOL_OP ON)
@@ -33,6 +33,28 @@ if ("mobilenet" IN_LIST NET)
 endif()
 
 
+if ("mobilenetssd" IN_LIST NET)
+  message("mobilenetssd enabled")
+  set(FUSION_CONVBNRELU_OP ON)
+  set(FUSION_CONVBNRELU_OP ON)
+  set(FUSION_DWCONVBNRELU_OP ON)
+  set(FUSION_CONVADD_OP ON)
+  set(MULTICLASSNMS_OP ON)
+  set(SOFTMAX_OP ON)
+  set(TRANSPOSE_OP ON)
+    #feed
+  set(PRIORBOX_OP ON)
+  set(CONCAT_OP ON)
+  set(BOXCODER_OP ON)
+  set(RESHAPE_OP ON)
+#fetch
+  #total
+
+  set(FOUND_MATCH ON)
+
+endif()
+
+
 if ("yolo" IN_LIST NET)
   message("yolo enabled")
   set(BATCHNORM_OP ON)
@@ -64,6 +86,8 @@ if ("resnet" IN_LIST NET)
   set(RELU_OP ON)
   set(ELEMENTWISEADD_OP ON)
   set(POOL_OP ON)
+  set(BATCHNORM_OP ON)
+  set(MUL_OP ON)
   set(RESHAPE_OP ON)
   set(SOFTMAX_OP ON)
 
@@ -82,6 +106,9 @@ if ("FPGAnets" IN_LIST NET)
   set(CONCAT_OP ON)
   set(SOFTMAX_OP ON)
   set(DROPOUT_OP ON)
+  set(FUSION_CONVBNRELU_OP ON)
+  set(FUSION_CONVBN_OP ON)
+  set(FUSION_CONVADD_OP ON)
 
   set(FOUND_MATCH ON)   
 endif()
@@ -240,8 +267,8 @@ endif()
 if (FUSION_ELEMENTWISEADDRELU_OP)
   add_definitions(-DFUSION_ELEMENTWISEADDRELU_OP)
 endif()
-if (REGION_OP)
-  add_definitions(-DREGION_OP)
+if (FUSION_CONVBN_OP)
+  add_definitions(-DFUSION_CONVBN_OP)
 endif()
 
 if (CONV_TRANSPOSE_OP)