diff --git a/metal/MobileNetDemo/MobileNetDemo.xcodeproj/project.pbxproj b/metal/MobileNetDemo/MobileNetDemo.xcodeproj/project.pbxproj
index 5596c6f0a24471b42e7aedf48db84384b1042e5c..dbacb00f0d857655ef6048cff24ad6cab5cb91f4 100644
--- a/metal/MobileNetDemo/MobileNetDemo.xcodeproj/project.pbxproj
+++ b/metal/MobileNetDemo/MobileNetDemo.xcodeproj/project.pbxproj
@@ -243,13 +243,13 @@
 			);
 			inputPaths = (
 				"${SRCROOT}/../Pods/Target Support Files/Pods-MobileNetDemo/Pods-MobileNetDemo-frameworks.sh",
-				"${BUILT_PRODUCTS_DIR}/SwiftProtobuf/SwiftProtobuf.framework",
+				"${BUILT_PRODUCTS_DIR}/Protobuf/Protobuf.framework",
 			);
 			name = "[CP] Embed Pods Frameworks";
 			outputFileListPaths = (
 			);
 			outputPaths = (
-				"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/SwiftProtobuf.framework",
+				"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/Protobuf.framework",
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 			shellPath = /bin/sh;
@@ -436,7 +436,7 @@
 			baseConfigurationReference = 4FE67FF667A24FCB0134F627 /* Pods-MobileNetDemo.debug.xcconfig */;
 			buildSettings = {
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
-				CODE_SIGN_STYLE = Automatic;
+				CODE_SIGN_STYLE = Manual;
 				DEVELOPMENT_TEAM = A798K58VVL;
 				INFOPLIST_FILE = MobileNetDemo/Info.plist;
 				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
@@ -446,6 +446,7 @@
 				);
 				PRODUCT_BUNDLE_IDENTIFIER = Ray.MobileNetDemo;
 				PRODUCT_NAME = "$(TARGET_NAME)";
+				PROVISIONING_PROFILE_SPECIFIER = ForAllDev;
 				SWIFT_VERSION = 4.0;
 				TARGETED_DEVICE_FAMILY = "1,2";
 			};
@@ -456,7 +457,7 @@
 			baseConfigurationReference = E57059FE3629E3A8DE6C7ECF /* Pods-MobileNetDemo.release.xcconfig */;
 			buildSettings = {
 				ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
-				CODE_SIGN_STYLE = Automatic;
+				CODE_SIGN_STYLE = Manual;
 				DEVELOPMENT_TEAM = A798K58VVL;
 				INFOPLIST_FILE = MobileNetDemo/Info.plist;
 				IPHONEOS_DEPLOYMENT_TARGET = 9.0;
@@ -466,6 +467,7 @@
 				);
 				PRODUCT_BUNDLE_IDENTIFIER = Ray.MobileNetDemo;
 				PRODUCT_NAME = "$(TARGET_NAME)";
+				PROVISIONING_PROFILE_SPECIFIER = ForAllDev;
 				SWIFT_VERSION = 4.0;
 				TARGETED_DEVICE_FAMILY = "1,2";
 			};
diff --git a/metal/Podfile b/metal/Podfile
index 0262c9beaf3e3d973de4cb2a3d7af041cbff0627..f07622c920f286102e29e9a09bdee52cbcebf116 100644
--- a/metal/Podfile
+++ b/metal/Podfile
@@ -5,21 +5,25 @@ workspace 'paddle-mobile.xcworkspace'
 
 target 'paddle-mobile-demo' do
 	project 'paddle-mobile-demo/paddle-mobile-demo.xcodeproj'
-    pod 'SwiftProtobuf', '~> 1.0'
+    # pod 'SwiftProtobuf', '~> 1.0'
+    pod 'Protobuf', '~> 3.0.0'
 end
 
 target 'paddle-mobile' do
 	project 'paddle-mobile/paddle-mobile.xcodeproj'
-	pod 'SwiftProtobuf', '~> 1.0'
+    # pod 'SwiftProtobuf', '~> 1.0'
+    pod 'Protobuf', '~> 3.0.0'
 end
 
 target 'paddle-mobile-unit-test' do
     project 'paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj'
-    pod 'SwiftProtobuf', '~> 1.0'
+    # pod 'SwiftProtobuf', '~> 1.0'
+    pod 'Protobuf', '~> 3.0.0'
 end
 
 target 'MobileNetDemo' do
     project 'MobileNetDemo/MobileNetDemo.xcodeproj'
-    pod 'SwiftProtobuf', '~> 1.0'
+    # pod 'SwiftProtobuf', '~> 1.0'
+    pod 'Protobuf', '~> 3.0.0'
 end
 
diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj
index 75e48f2d894c54ac385df4d71c014de370af0053..9e7bab8b8afa48656645d953049df8fb51cf5918 100644
--- a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj
+++ b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj
@@ -411,11 +411,11 @@
 			);
 			inputPaths = (
 				"${SRCROOT}/../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo-frameworks.sh",
-				"${BUILT_PRODUCTS_DIR}/SwiftProtobuf/SwiftProtobuf.framework",
+				"${BUILT_PRODUCTS_DIR}/Protobuf/Protobuf.framework",
 			);
 			name = "[CP] Embed Pods Frameworks";
 			outputPaths = (
-				"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/SwiftProtobuf.framework",
+				"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/Protobuf.framework",
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 			shellPath = /bin/sh;
diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.pbxproj b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.pbxproj
index 50d58bb45bb5c0e8e5ffbbe8f10ce3e41b770f7c..6de1a7f37225222c629841b89549bcdadda12753 100644
--- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.pbxproj
+++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.pbxproj
@@ -234,11 +234,11 @@
 			);
 			inputPaths = (
 				"${SRCROOT}/../Pods/Target Support Files/Pods-paddle-mobile-unit-test/Pods-paddle-mobile-unit-test-frameworks.sh",
-				"${BUILT_PRODUCTS_DIR}/SwiftProtobuf/SwiftProtobuf.framework",
+				"${BUILT_PRODUCTS_DIR}/Protobuf/Protobuf.framework",
 			);
 			name = "[CP] Embed Pods Frameworks";
 			outputPaths = (
-				"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/SwiftProtobuf.framework",
+				"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/Protobuf.framework",
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 			shellPath = /bin/sh;
diff --git a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj
index 4cc97d0e9eea2d79b0e9360aa4c9ce817709fced..17eeb75bfffcd5bb2b0d484b0fe2c9048049bebd 100644
--- a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj
+++ b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj
@@ -7,6 +7,8 @@
 	objects = {
 
 /* Begin PBXBuildFile section */
+		456BB7B421F5B356001474E2 /* Framework.pbobjc.m in Sources */ = {isa = PBXBuildFile; fileRef = 456BB7B221F5B356001474E2 /* Framework.pbobjc.m */; settings = {COMPILER_FLAGS = "-fno-objc-arc"; }; };
+		456BB7B521F5B356001474E2 /* Framework.pbobjc.h in Headers */ = {isa = PBXBuildFile; fileRef = 456BB7B321F5B356001474E2 /* Framework.pbobjc.h */; settings = {ATTRIBUTES = (Public, ); }; };
 		4AA1EA862146625E00D0F791 /* BilinearInterpOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA852146625E00D0F791 /* BilinearInterpOp.swift */; };
 		4AA1EA88214662BD00D0F791 /* BilinearInterpKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */; };
 		4AA1EA8A2146631C00D0F791 /* BilinearInterp.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA892146631C00D0F791 /* BilinearInterp.metal */; };
@@ -47,15 +49,14 @@
 		FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA620E11CBC0081E9F8 /* Operator.swift */; };
 		FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA720E11CBC0081E9F8 /* BatchNormOp.swift */; };
 		FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA820E11CBC0081E9F8 /* ReluOp.swift */; };
-		FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BAF20E11CC20081E9F8 /* framework.pb.swift */; };
 		FC039BB920E11CC20081E9F8 /* Scope.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB020E11CC20081E9F8 /* Scope.swift */; };
 		FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB120E11CC20081E9F8 /* TensorDesc.swift */; };
-		FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB220E11CC20081E9F8 /* ProgramDesc.swift */; };
-		FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB320E11CC20081E9F8 /* VarDesc.swift */; };
+		FC039BBB20E11CC20081E9F8 /* PMProgramDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB220E11CC20081E9F8 /* PMProgramDesc.swift */; };
+		FC039BBC20E11CC20081E9F8 /* PMVarDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB320E11CC20081E9F8 /* PMVarDesc.swift */; };
 		FC039BBD20E11CC20081E9F8 /* Program.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB420E11CC20081E9F8 /* Program.swift */; };
-		FC039BBE20E11CC20081E9F8 /* OpDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB520E11CC20081E9F8 /* OpDesc.swift */; };
+		FC039BBE20E11CC20081E9F8 /* PMOpDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB520E11CC20081E9F8 /* PMOpDesc.swift */; };
 		FC039BBF20E11CC20081E9F8 /* Attribute.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB620E11CC20081E9F8 /* Attribute.swift */; };
-		FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB720E11CC20081E9F8 /* BlockDesc.swift */; };
+		FC039BC020E11CC20081E9F8 /* PMBlockDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB720E11CC20081E9F8 /* PMBlockDesc.swift */; };
 		FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */; };
 		FC0E2DBC20EE45FE009C1FAC /* ConvKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */; };
 		FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */; };
@@ -93,6 +94,7 @@
 		FCA67CD7213827AC00BD58AA /* ConvAddBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */; };
 		FCA67CD92138287B00BD58AA /* ConvBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */; };
 		FCB40E5921E0DCAB0075EC91 /* FetchKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCB40E5821E0DCAB0075EC91 /* FetchKernel.swift */; };
+		FCB91DC221FEEE990051C6B2 /* BufferToTexture.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCB91DC121FEEE990051C6B2 /* BufferToTexture.metal */; };
 		FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */; };
 		FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */; };
 		FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */; };
@@ -137,6 +139,8 @@
 /* End PBXBuildFile section */
 
 /* Begin PBXFileReference section */
+		456BB7B221F5B356001474E2 /* Framework.pbobjc.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = Framework.pbobjc.m; sourceTree = "<group>"; };
+		456BB7B321F5B356001474E2 /* Framework.pbobjc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Framework.pbobjc.h; sourceTree = "<group>"; };
 		4AA1EA852146625E00D0F791 /* BilinearInterpOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BilinearInterpOp.swift; sourceTree = "<group>"; };
 		4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BilinearInterpKernel.swift; sourceTree = "<group>"; };
 		4AA1EA892146631C00D0F791 /* BilinearInterp.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BilinearInterp.metal; sourceTree = "<group>"; };
@@ -182,15 +186,14 @@
 		FC039BA620E11CBC0081E9F8 /* Operator.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Operator.swift; sourceTree = "<group>"; };
 		FC039BA720E11CBC0081E9F8 /* BatchNormOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BatchNormOp.swift; sourceTree = "<group>"; };
 		FC039BA820E11CBC0081E9F8 /* ReluOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ReluOp.swift; sourceTree = "<group>"; };
-		FC039BAF20E11CC20081E9F8 /* framework.pb.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = framework.pb.swift; sourceTree = "<group>"; };
 		FC039BB020E11CC20081E9F8 /* Scope.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Scope.swift; sourceTree = "<group>"; };
 		FC039BB120E11CC20081E9F8 /* TensorDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TensorDesc.swift; sourceTree = "<group>"; };
-		FC039BB220E11CC20081E9F8 /* ProgramDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ProgramDesc.swift; sourceTree = "<group>"; };
-		FC039BB320E11CC20081E9F8 /* VarDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VarDesc.swift; sourceTree = "<group>"; };
+		FC039BB220E11CC20081E9F8 /* PMProgramDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PMProgramDesc.swift; sourceTree = "<group>"; };
+		FC039BB320E11CC20081E9F8 /* PMVarDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PMVarDesc.swift; sourceTree = "<group>"; };
 		FC039BB420E11CC20081E9F8 /* Program.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Program.swift; sourceTree = "<group>"; };
-		FC039BB520E11CC20081E9F8 /* OpDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = OpDesc.swift; sourceTree = "<group>"; };
+		FC039BB520E11CC20081E9F8 /* PMOpDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PMOpDesc.swift; sourceTree = "<group>"; };
 		FC039BB620E11CC20081E9F8 /* Attribute.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Attribute.swift; sourceTree = "<group>"; };
-		FC039BB720E11CC20081E9F8 /* BlockDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BlockDesc.swift; sourceTree = "<group>"; };
+		FC039BB720E11CC20081E9F8 /* PMBlockDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PMBlockDesc.swift; sourceTree = "<group>"; };
 		FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ReluKernel.swift; sourceTree = "<group>"; };
 		FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvKernel.swift; sourceTree = "<group>"; };
 		FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BatchNormKernel.swift; sourceTree = "<group>"; };
@@ -229,6 +232,7 @@
 		FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddBNReluKernel.metal; sourceTree = "<group>"; };
 		FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvBNReluKernel.metal; sourceTree = "<group>"; };
 		FCB40E5821E0DCAB0075EC91 /* FetchKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FetchKernel.swift; sourceTree = "<group>"; };
+		FCB91DC121FEEE990051C6B2 /* BufferToTexture.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BufferToTexture.metal; sourceTree = "<group>"; };
 		FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DwConvBNReluOp.swift; sourceTree = "<group>"; };
 		FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluOp.swift; sourceTree = "<group>"; };
 		FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluKernel.swift; sourceTree = "<group>"; };
@@ -403,15 +407,16 @@
 		FC039BAE20E11CC20081E9F8 /* Program */ = {
 			isa = PBXGroup;
 			children = (
-				FC039BAF20E11CC20081E9F8 /* framework.pb.swift */,
+				456BB7B321F5B356001474E2 /* Framework.pbobjc.h */,
+				456BB7B221F5B356001474E2 /* Framework.pbobjc.m */,
 				FC039BB020E11CC20081E9F8 /* Scope.swift */,
 				FC039BB120E11CC20081E9F8 /* TensorDesc.swift */,
-				FC039BB220E11CC20081E9F8 /* ProgramDesc.swift */,
-				FC039BB320E11CC20081E9F8 /* VarDesc.swift */,
+				FC039BB220E11CC20081E9F8 /* PMProgramDesc.swift */,
+				FC039BB320E11CC20081E9F8 /* PMVarDesc.swift */,
 				FC039BB420E11CC20081E9F8 /* Program.swift */,
-				FC039BB520E11CC20081E9F8 /* OpDesc.swift */,
+				FC039BB520E11CC20081E9F8 /* PMOpDesc.swift */,
 				FC039BB620E11CC20081E9F8 /* Attribute.swift */,
-				FC039BB720E11CC20081E9F8 /* BlockDesc.swift */,
+				FC039BB720E11CC20081E9F8 /* PMBlockDesc.swift */,
 				FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */,
 			);
 			path = Program;
@@ -496,6 +501,7 @@
 		FCEB6837212F00B100D2448E /* metal */ = {
 			isa = PBXGroup;
 			children = (
+				FCB91DC121FEEE990051C6B2 /* BufferToTexture.metal */,
 				4AF928812135673D005B6C3A /* ConcatKernel.metal */,
 				4AA1EA9D2148D6F900D0F791 /* ConcatKernel.inc.metal */,
 				4AF9288321357BE3005B6C3A /* Elementwise.metal */,
@@ -545,6 +551,7 @@
 			isa = PBXHeadersBuildPhase;
 			buildActionMask = 2147483647;
 			files = (
+				456BB7B521F5B356001474E2 /* Framework.pbobjc.h in Headers */,
 				FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
@@ -582,7 +589,7 @@
 				TargetAttributes = {
 					FC039B6920E11C3C0081E9F8 = {
 						CreatedOnToolsVersion = 9.3.1;
-						LastSwiftMigration = 0940;
+						LastSwiftMigration = 1000;
 					};
 				};
 			};
@@ -659,7 +666,7 @@
 				FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */,
 				FCE3A1B12153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal in Sources */,
 				FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */,
-				FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */,
+				FC039BBB20E11CC20081E9F8 /* PMProgramDesc.swift in Sources */,
 				FCE3A1AB2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift in Sources */,
 				FC9D037920E229E4000F735A /* OpParam.swift in Sources */,
 				FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */,
@@ -670,6 +677,7 @@
 				FCA67CD52138272900BD58AA /* ConvAddMetal.metal in Sources */,
 				FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */,
 				4AA1EA8C2146640900D0F791 /* SplitOp.swift in Sources */,
+				FCB91DC221FEEE990051C6B2 /* BufferToTexture.metal in Sources */,
 				4AA1EAAC214F55C800D0F791 /* Softmax.inc.metal in Sources */,
 				FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */,
 				4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */,
@@ -684,7 +692,6 @@
 				FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */,
 				FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */,
 				FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */,
-				FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */,
 				FC039B9920E11C9A0081E9F8 /* Types.swift in Sources */,
 				FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */,
 				FCA3A1632132A4AC00084FE5 /* ReshapeKernel.metal in Sources */,
@@ -698,6 +705,7 @@
 				4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */,
 				4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */,
 				FC2BFCC221DF2F9100C262B2 /* GlobalConfig.swift in Sources */,
+				456BB7B421F5B356001474E2 /* Framework.pbobjc.m in Sources */,
 				FCBCCC652122FCD700D94F7E /* TransposeOp.swift in Sources */,
 				4AA1EAA6214B5F6800D0F791 /* Shape.metal in Sources */,
 				FCD04E6E20F31B4B0007374F /* ReshapeOp.swift in Sources */,
@@ -711,7 +719,7 @@
 				FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */,
 				FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */,
 				FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */,
-				FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */,
+				FC039BBC20E11CC20081E9F8 /* PMVarDesc.swift in Sources */,
 				FC803BC5214CB8F00094B8E5 /* ConvAddPrelu.inc.metal in Sources */,
 				4AF928822135673D005B6C3A /* ConcatKernel.metal in Sources */,
 				FCBCCC632122FCC000D94F7E /* TransposeKernel.swift in Sources */,
@@ -749,14 +757,14 @@
 				FCE3A1A92153DE5100C37CDE /* ConvAddAddPreluOp.swift in Sources */,
 				FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */,
 				FCE3A1AD2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift in Sources */,
-				FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */,
+				FC039BC020E11CC20081E9F8 /* PMBlockDesc.swift in Sources */,
 				FC803BC3214CB79C0094B8E5 /* ConvAddPreluKernel.metal in Sources */,
 				4AA1EA90214664CD00D0F791 /* Split.metal in Sources */,
 				FCD04E6820F315020007374F /* PoolKernel.swift in Sources */,
 				FC0226582138F38D00F395E2 /* PoolKernel.metal in Sources */,
 				FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */,
 				FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */,
-				FC039BBE20E11CC20081E9F8 /* OpDesc.swift in Sources */,
+				FC039BBE20E11CC20081E9F8 /* PMOpDesc.swift in Sources */,
 				FC9797C921D6101D00F2FD90 /* ResizeBilinearOp.swift in Sources */,
 				4AA1EA88214662BD00D0F791 /* BilinearInterpKernel.swift in Sources */,
 				FC2BFD4621DF685F00C262B2 /* Scale.swift in Sources */,
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Framework/Loader.swift b/metal/paddle-mobile/paddle-mobile/Src/Framework/Loader.swift
index b94104099635382b3b2e4ec902cc3eed55533d26..1d4f0ec14fa6442be708e729ce841969a12f5582 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Framework/Loader.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Framework/Loader.swift
@@ -13,7 +13,7 @@
  limitations under the License. */
 
 import Foundation
-import SwiftProtobuf
+//import SwiftProtobuf
 
 public class Loader<P: PrecisionType> {
   class ParaLoader {
@@ -145,13 +145,17 @@ public class Loader<P: PrecisionType> {
   public init(){}
   func loadModelandParam(_ device:MTLDevice,_ modelData:Data, _ paraLoaderPointer:ParaLoaderWithPointer?, _ paraLoader:ParaLoader?) throws -> Program {
     do {
-      let protoProgram = try PaddleMobile_Framework_Proto_ProgramDesc.init(
-        serializedData: modelData)
+        /// swift protobuf serialized Data to instance class
+        //      let protoProgram = try PaddleMobile_Framework_Proto_ProgramDesc.init(
+        //        serializedData: modelData)
+        
+        /// oc protobuf serialized Data to instance class
+      let protoProgram = try ProgramDesc.init(data: (modelData as NSData) as Data)
       
-      let originProgramDesc = ProgramDesc.init(protoProgram: protoProgram)
+      let originProgramDesc = PMProgramDesc.init(protoProgram: protoProgram)
       let programDesc = ProgramOptimize<P>.init().optimize(originProgramDesc: originProgramDesc)
       
-//      let programDesc = ProgramDesc.init(protoProgram: protoProgram)
+//      let programDesc = PMProgramDesc.init(protoProgram: protoProgram)
 
       print(programDesc)
       
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/OpCreator.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/OpCreator.swift
index 06b078a78699599f692c9976070bb74563c907b2..fcedbd36f7f50b348aab97de18c9fee414f182cf 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/OpCreator.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/OpCreator.swift
@@ -27,7 +27,7 @@ class OpCreator<P: PrecisionType> {
         }
     }
     
-  func creat(device: MTLDevice, opDesc: OpDesc, scope: Scope, initContext: InitContext) throws -> Runable & InferShaperable {
+  func creat(device: MTLDevice, opDesc: PMOpDesc, scope: Scope, initContext: InitContext) throws -> Runable & InferShaperable {
         guard let opCreator = opCreators[opDesc.type] else {
             throw PaddleMobileError.opError(message: "there is no " + opDesc.type + " yet")
         }
@@ -39,7 +39,7 @@ class OpCreator<P: PrecisionType> {
         }
     }
     
-    let opCreators: [String : (MTLDevice, OpDesc, Scope, InitContext) throws -> Runable & InferShaperable] =
+    let opCreators: [String : (MTLDevice, PMOpDesc, Scope, InitContext) throws -> Runable & InferShaperable] =
         [gConvType                  :     ConvOp<P>.creat,
          gBatchNormType             :     BatchNormOp<P>.creat,
          gReluType                  :     ReluOp<P>.creat,
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/OpParam.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/OpParam.swift
index b0ac4c231a6d1ad8473eddb1109df1eabb0c59a7..01c22166642a1e16717f2cad3d434d2fb1ed0f76 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/OpParam.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/OpParam.swift
@@ -27,7 +27,7 @@ protocol OpParam {
   func outputDesc() -> String
   
   //associatedtype ParamPrecisionType: PrecisionType
-  init(opDesc: OpDesc, inScope: Scope) throws
+  init(opDesc: PMOpDesc, inScope: Scope) throws
   static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType
   static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
   static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/Operator.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/Operator.swift
index d4dd117fec638819f2ef32ac6e8853e711f4603b..532d1b661d4cb0e9823e09a9fc82d13af4f40f76 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/Operator.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/Operator.swift
@@ -72,11 +72,11 @@ public class InitContext {
 
 protocol Creator where Self: OperatorProtocol{
   associatedtype OpType: OperatorProtocol & Runable & InferShaperable
-  static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws -> OpType
+  static func creat(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws -> OpType
 }
 
 extension Creator where Self: OperatorProtocol {
-  static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws -> OpType {
+  static func creat(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws -> OpType {
     do {
       return try OpType.provide(device:device, opDesc: opDesc, inScope: inScope, initContext: initContext)
     } catch let error {
@@ -100,11 +100,11 @@ protocol OperatorProtocol {
   var attrs: [String : Attr] { get }
   var para: ParamType { get }
   var kernel: KerType { get }
-  init(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws
+  init(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws
 }
 
 extension OperatorProtocol {
-  static func provide(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws -> Self {
+  static func provide(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws -> Self {
     do {
       return try Self.init(device: device, opDesc: opDesc, inScope: inScope, initContext: initContext)
     } catch let error {
@@ -114,7 +114,7 @@ extension OperatorProtocol {
 }
 
 class Operator <KernelType:  Computable , ParameterType>: OperatorProtocol where KernelType.ParamType == ParameterType {
-  required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws {
+  required init(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws {
     type = opDesc.type
     scope = inScope
     inputs = opDesc.inputs
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/BatchNormOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/BatchNormOp.swift
index ccf27a9d4021cf8607bf80555430ee90469b2741..a877620416cb1b12be1ac1ef2a86f198fe75fc60 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/BatchNormOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/BatchNormOp.swift
@@ -17,7 +17,7 @@ import Metal
 
 class BatchNormParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       input = try BatchNormParam.inputX(inputs: opDesc.inputs, from: inScope)
       if input.transpose != [0, 2, 3, 1] {
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/BilinearInterpOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/BilinearInterpOp.swift
index 6a749e772bff70c78237304f3567a42e2492f20c..a19dd1039073812b024a55c60bfad8c3c1387e71 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/BilinearInterpOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/BilinearInterpOp.swift
@@ -17,7 +17,7 @@ import Metal
 
 class BilinearInterpParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       input = try BilinearInterpParam.inputX(inputs: opDesc.inputs, from: inScope)
       output = try BilinearInterpParam.outputOut(outputs: opDesc.outputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/BoxcoderOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/BoxcoderOp.swift
index 50a9f5ad88c58f8896857524c6bd4451b34850ad..4679885ab6e5c946d9b335f8b59f8537e37ea967 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/BoxcoderOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/BoxcoderOp.swift
@@ -16,7 +16,7 @@ import Foundation
 
 class BoxcoderParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       priorBox = try BoxcoderParam.getFirstTensor(key: "PriorBox", map: opDesc.inputs, from: inScope)
       priorBoxVar = try BoxcoderParam.getFirstTensor(key: "PriorBoxVar", map: opDesc.inputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConcatOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConcatOp.swift
index cb757edaa0852108864b2f9bef2b945b0575ffba..c2c22d55af6fc33ca69cbc028f149d54285459e7 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConcatOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConcatOp.swift
@@ -16,7 +16,7 @@ import Foundation
 
 class ConcatParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       guard let xlist = opDesc.inputs["X"] else {
         fatalError()
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddAddPreluOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddAddPreluOp.swift
index f6fa4dd0a7820cf4dcfe8899a7355332a06f841c..552d72f436bf6de89f52bae186f72a0a778b1f4c 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddAddPreluOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddAddPreluOp.swift
@@ -17,7 +17,7 @@ import Metal
 
 class ConvAddAddPreluParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       filter = try ConvAddAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
       input = try ConvAddAddPreluParam.input(inputs: opDesc.inputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddBatchNormReluOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddBatchNormReluOp.swift
index 4f4b23cf5809446b5ed321940c09466497fdcf05..6aacd4208e0a46ba6c88f9e2073c6ef3d4753952 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddBatchNormReluOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddBatchNormReluOp.swift
@@ -17,7 +17,7 @@ import Foundation
 
 class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       
       filter = try ConvAddBatchNormReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddOp.swift
index bc3d0de85dbbf9c20096dafc4e08cff1a0115de4..923c2c210ddba99dcebec77ae91299cd28ed638e 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddOp.swift
@@ -16,7 +16,7 @@ import Foundation
 
 class ConvAddParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       filter = try ConvAddParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
       input = try ConvAddParam.input(inputs: opDesc.inputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddPreluOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddPreluOp.swift
index 1157c0538392d8f211214ab7bb1a3a1a2afac8e6..1c0bbba8d9dba61560ce4be97369fbb406fe238a 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddPreluOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddPreluOp.swift
@@ -16,7 +16,7 @@ import Foundation
 
 class ConvAddPreluParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       filter = try ConvAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
       input = try ConvAddPreluParam.input(inputs: opDesc.inputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvBNReluOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvBNReluOp.swift
index 104ff302872c3549c07f27437ea3c8f01ffbafb5..423e55e391ad9a110fb71af09a16373a322d3d5f 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvBNReluOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvBNReluOp.swift
@@ -16,7 +16,7 @@ import Foundation
 
 class ConvBNReluParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       filter = try ConvBNReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
       input = try ConvBNReluParam.input(inputs: opDesc.inputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvOp.swift
index 9532ee720cf053e76f4ac2094ed4de1fab0792f5..c66813b166fefd8fe5f139c94d73cf55ff83d682 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvOp.swift
@@ -16,7 +16,7 @@ import Foundation
 
 class ConvParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       filter = try ConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
       input = try ConvParam.input(inputs: opDesc.inputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvTransposeOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvTransposeOp.swift
index d4bd4140e5097f77fd5bc295a940a8df800a7258..c035f403a62875da14df291bad01766731caf380 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvTransposeOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvTransposeOp.swift
@@ -16,7 +16,7 @@ import Foundation
 
 class ConvTransposeParam<P: PrecisionType>: ConvParam<P> {
   //typealias ParamPrecisionType = P
-    required init(opDesc: OpDesc, inScope: Scope) throws {
+    required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       try super.init(opDesc: opDesc, inScope: inScope)
     } catch let error {
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ElementwiseAddOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ElementwiseAddOp.swift
index 6588e59194f8550ad63a0adfc2a8bc46e877a437..5fa69d4f44e48603dec9213be78d08b11b433edd 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ElementwiseAddOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ElementwiseAddOp.swift
@@ -17,7 +17,7 @@ import Metal
 
 class ElementwiseAddParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       inputX = try ElementwiseAddParam.inputX(inputs: opDesc.inputs, from: inScope)
       output = try ElementwiseAddParam.outputOut(outputs: opDesc.outputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ElementwiseAddPreluOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ElementwiseAddPreluOp.swift
index 72426c1900d0b8a1ff87cc6d9507c5b66228147b..6a49d7bfa2fe4f060eedc84d47a8c1f8d64ee4d0 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ElementwiseAddPreluOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ElementwiseAddPreluOp.swift
@@ -17,7 +17,7 @@ import Metal
 
 class ElementwiseAddPreluParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       alpha = try ElementwiseAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope)
       mode = try ElementwiseAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/FeedOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/FeedOp.swift
index 5b3179191b172ae6ec777e392c7832d42cf26d43..46defcb58332a02cbc365a087708e792a66c6e5c 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/FeedOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/FeedOp.swift
@@ -23,7 +23,7 @@ class FeedParam<P: PrecisionType>: OpParam{
   }
   let scope: Scope
   
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     scope = inScope
     do {
       output = try FeedParam.outputOut(outputs: opDesc.outputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/FetchOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/FetchOp.swift
index 477384cdac8bdc7dcfdd2c1bbea087d697ca5028..a5d04a4b03a182a4e843a31628bd2892de597093 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/FetchOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/FetchOp.swift
@@ -19,7 +19,7 @@ class FetchParam<P: PrecisionType>: OpParam{
   var output: FetchHolder
   let input: Texture
   let scope: Scope
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     scope = inScope
     do {
       input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/FlattenOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/FlattenOp.swift
index dabb7792a2fc2314a797cd8ea412c7bf689c91cd..8500798adc75f9fac9e960857e9b0de319157c95 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/FlattenOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/FlattenOp.swift
@@ -16,7 +16,7 @@ import Foundation
 
 class FlattenParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       input = try FlattenParam.inputX(inputs: opDesc.inputs, from: inScope)
       output = try FlattenParam.outputOut(outputs: opDesc.outputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/metal/BufferToTexture.metal b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/metal/BufferToTexture.metal
new file mode 100644
index 0000000000000000000000000000000000000000..3c07872616bb7c2f130d92247feeeeaa60ece21e
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/metal/BufferToTexture.metal
@@ -0,0 +1,36 @@
+//
+//  RGBToYCrCb_Y.metal
+//  paddle-mobile-demo
+//
+//  Created by liuRuiLong on 2018/12/28.
+//  Copyright © 2018 orange. All rights reserved.
+//
+
+#include <metal_stdlib>
+using namespace metal;
+
+kernel void buffer_to_texture_kernel(
+                                     const device float *input [[buffer(0)]],
+                                     texture2d<float, access::write> outTexture [[texture(0)]],
+                                     uint2 gid [[thread_position_in_grid]]){
+  if (gid.x >= outTexture.get_width() ||
+      gid.y >= outTexture.get_height()) {
+    return;
+  }
+  
+  float y = input[outTexture.get_width() * gid.y + gid.x];
+  outTexture.write(float4(y, 0.0f, 0.0f, 0.0f), gid);
+}
+
+kernel void buffer_to_texture_kernel_half(const device float *input [[buffer(0)]],
+                                          texture2d<half, access::write> outTexture [[texture(0)]],
+                                          uint2 gid [[thread_position_in_grid]]){
+  if (gid.x >= outTexture.get_width() ||
+      gid.y >= outTexture.get_height()) {
+    return;
+  }
+  
+  float y = input[outTexture.get_width() * gid.y + gid.x];
+  outTexture.write(half4(y, 0.0f, 0.0f, 0.0f), gid);
+}
+
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/MulticlassNMSOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/MulticlassNMSOp.swift
index 3a10dfdfcb13ccfed2d040107da663337bcf2a12..6d2e46b64986300556898596ea881a254709f472 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/MulticlassNMSOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/MulticlassNMSOp.swift
@@ -16,7 +16,7 @@ import Foundation
 
 class MulticlassNMSParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       scores = try MulticlassNMSParam.getFirstTensor(key: "Scores", map: opDesc.inputs, from: inScope)
       bboxes = try MulticlassNMSParam.getFirstTensor(key: "BBoxes", map: opDesc.inputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/PoolOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/PoolOp.swift
index 4f5ba3a52d745cbe3db3dc4628b89852e840f7b6..e57c8f48e362af8cae8fedbb5a0292775f0ce923 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/PoolOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/PoolOp.swift
@@ -16,7 +16,7 @@ import Foundation
 
 class PoolParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       input = try PoolParam.inputX(inputs: opDesc.inputs, from: inScope)
       output = try PoolParam.outputOut(outputs: opDesc.outputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/PreluOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/PreluOp.swift
index 06732b87aa3eefc44b0beabe3c07b18057ba42f9..b7150c2fea85b7a6da6ae883e95c751484db6af6 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/PreluOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/PreluOp.swift
@@ -16,7 +16,7 @@ import Foundation
 
 class PreluParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       input = try PreluParam.inputX(inputs: opDesc.inputs, from: inScope)
       output = try PreluParam.outputOut(outputs: opDesc.outputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/PriorBoxOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/PriorBoxOp.swift
index fe6dea8f437ed63bed76b475fbd03dd64f966ef1..bff7c9870a3dc70e820b02ad775ca8a19527c26d 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/PriorBoxOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/PriorBoxOp.swift
@@ -16,7 +16,7 @@ import Foundation
 
 class PriorBoxParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       min_max_aspect_ratios_order = try PriorBoxParam.getAttr(key: "min_max_aspect_ratios_order", attrs: opDesc.attrs)
     } catch _ {
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ReluOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ReluOp.swift
index 488e3408df67ea08d75f7cc080395e44d9651699..ef109081061c601fb17a23e943dcd01af618b724 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ReluOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ReluOp.swift
@@ -17,7 +17,7 @@ import Foundation
 
 class ReluParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       input = try ReluParam.inputX(inputs: opDesc.inputs, from: inScope)
       output = try ReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ReshapeOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ReshapeOp.swift
index 71019014e5be259ecfa7d9ef287d75729d791501..e40eae02d0c11c0bd372514466b28cef27dea96b 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ReshapeOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ReshapeOp.swift
@@ -17,7 +17,7 @@ import Metal
 
 class ReshapeParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       input = try ReshapeParam.inputX(inputs: opDesc.inputs, from: inScope)
       output = try ReshapeParam.outputOut(outputs: opDesc.outputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ResizeBilinearOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ResizeBilinearOp.swift
index e915f561c86a64c24d34a7345b2eb8d49041188d..980bb734a796c067012855f8a0d0c4ccef33afdb 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ResizeBilinearOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ResizeBilinearOp.swift
@@ -16,7 +16,7 @@ import Foundation
 
 class ResizeBilinearParam<P: PrecisionType>: OpParam {
   typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       input = try ResizeBilinearParam.inputX(inputs: opDesc.inputs, from: inScope)
 //      if (input.transpose != [0, 2, 3, 1]) || (input.tensorDim.cout() != 4) {
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ShapeOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ShapeOp.swift
index 76d63ed86dc96a6d90a4cde29909992cbfa35ceb..c13c3864e4f73bdad1b83e19ca9f66051eea266d 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ShapeOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ShapeOp.swift
@@ -16,7 +16,7 @@ import Foundation
 
 class ShapeParam<P: PrecisionType>: OpParam {
  // typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       input = try ShapeParam.input(inputs: opDesc.inputs, from: inScope)
       output = try ShapeParam.outputOut(outputs: opDesc.outputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/SoftmaxOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/SoftmaxOp.swift
index d1c7d46fef08ba8c6792758fd4d661aff089b327..2b2455eaa60142f890c7ee5e14244c77854a0ccd 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/SoftmaxOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/SoftmaxOp.swift
@@ -17,7 +17,7 @@ import Metal
 
 class SoftmaxParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       input = try SoftmaxParam.inputX(inputs: opDesc.inputs, from: inScope)
       output = try SoftmaxParam.outputOut(outputs: opDesc.outputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/SplitOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/SplitOp.swift
index e3fc8814e21e1081721b3fc522b2f7ccf414d725..4d9933f39275d522cec71ca08a591182433d7bae 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/SplitOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/SplitOp.swift
@@ -16,7 +16,7 @@ import Foundation
 
 class SplitParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       input = try SplitParam.inputX(inputs: opDesc.inputs, from: inScope)
       output = Texture.init(device: input.metalTexture!.device, inDim: input.dim)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/TransposeOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/TransposeOp.swift
index 187442e6a2e78686b826e1d6ad36ad2f00a106fd..064955fcac20937ae3ac8a12f51ef52ab5a00ba9 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Operators/TransposeOp.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/TransposeOp.swift
@@ -17,7 +17,7 @@ import Metal
 
 class TransposeParam<P: PrecisionType>: OpParam {
   //typealias ParamPrecisionType = P
-  required init(opDesc: OpDesc, inScope: Scope) throws {
+  required init(opDesc: PMOpDesc, inScope: Scope) throws {
     do {
       input = try TransposeParam.inputX(inputs: opDesc.inputs, from: inScope)
       output = try TransposeParam.outputOut(outputs: opDesc.outputs, from: inScope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/Attribute.swift b/metal/paddle-mobile/paddle-mobile/Src/Program/Attribute.swift
index c26fd2132e6134dbbd05af08835229a31c231b9d..cc8afc994d12eb8a1de7f06ba97011b16f56c4b5 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Program/Attribute.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Program/Attribute.swift
@@ -35,7 +35,11 @@ extension Array: Attr {
 extension String: Attr {
 }
 
-func attrWithProtoDesc(attrDesc: PaddleMobile_Framework_Proto_OpDesc.Attr) -> Attr {
+extension NSMutableArray :Attr {
+}
+
+
+func attrWithProtoDesc(attrDesc: OpDesc_Attr) -> Attr {
     switch attrDesc.type {
     case .boolean:
         return attrDesc.b
@@ -47,14 +51,33 @@ func attrWithProtoDesc(attrDesc: PaddleMobile_Framework_Proto_OpDesc.Attr) -> At
         return attrDesc.l
     case .float:
         return attrDesc.f
+    ///  convert GPB class to swift class
     case .booleans:
-        return attrDesc.bools
+        var dimsArray = [Bool]()
+        let dimsCount = attrDesc.boolsArray.count
+        for i in 0..<dimsCount {
+            let dim = Bool(attrDesc.boolsArray.value(at: i))
+            dimsArray.append(dim)
+        }
+        return dimsArray
     case .floats:
-        return attrDesc.floats
+        var dimsArray = [Float]()
+        let dimsCount = attrDesc.floatsArray.count
+        for i in 0..<dimsCount {
+            let dim = Float(attrDesc.floatsArray.value(at: i))
+            dimsArray.append(dim)
+        }
+        return dimsArray
     case .ints:
-        return attrDesc.ints
+        var dimsArray = [Int32]()
+        let dimsCount = attrDesc.intsArray.count
+        for i in 0..<dimsCount {
+            let dim = Int32(attrDesc.intsArray.value(at: i))
+            dimsArray.append(dim)
+        }
+        return dimsArray
     case .strings:
-        return attrDesc.strings
+        return attrDesc.stringsArray
     default:
         fatalError(" not support this attr type: \(attrDesc.type)")
     }
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/Framework.pbobjc.h b/metal/paddle-mobile/paddle-mobile/Src/Program/Framework.pbobjc.h
new file mode 100755
index 0000000000000000000000000000000000000000..49ba5d45b40ef10d9ef223c4a17ead9e4422ac58
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Src/Program/Framework.pbobjc.h
@@ -0,0 +1,599 @@
+// Generated by the protocol buffer compiler.  DO NOT EDIT!
+// source: Framework.proto
+
+// This CPP symbol can be defined to use imports that match up to the framework
+// imports needed when using CocoaPods.
+#if !defined(GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS)
+ #define GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS 0
+#endif
+
+//#if GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS
+// #import <Protobuf/GPBProtocolBuffers.h>
+//#else
+// #import "GPBProtocolBuffers.h"
+//#endif
+#if GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS
+#import <Protobuf/GPBProtocolBuffers.h>
+#else
+#import <ProtocolBuffers/ProtocolBuffers.h>
+#endif
+
+#if GOOGLE_PROTOBUF_OBJC_GEN_VERSION != 30001
+#error This file was generated by a different version of protoc which is incompatible with your Protocol Buffer library sources.
+#endif
+
+// @@protoc_insertion_point(imports)
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+
+CF_EXTERN_C_BEGIN
+
+@class BlockDesc;
+@class OpDesc;
+@class OpDesc_Attr;
+@class OpDesc_Var;
+@class OpProto_Attr;
+@class OpProto_Var;
+@class VarDesc;
+@class VarType;
+@class VarType_ChannelDesc;
+@class VarType_LoDTensorArrayDesc;
+@class VarType_LoDTensorDesc;
+@class VarType_ReaderDesc;
+@class VarType_TensorDesc;
+@class VarType_Tuple;
+@class Version;
+
+NS_ASSUME_NONNULL_BEGIN
+
+#pragma mark - Enum AttrType
+
+typedef GPB_ENUM(AttrType) {
+  AttrType_Int = 0,
+  AttrType_Float = 1,
+  AttrType_String = 2,
+  AttrType_Ints = 3,
+  AttrType_Floats = 4,
+  AttrType_Strings = 5,
+  AttrType_Boolean = 6,
+  AttrType_Booleans = 7,
+  AttrType_Block = 8,
+  AttrType_Long = 9,
+  AttrType_Blocks = 10,
+};
+
+GPBEnumDescriptor *AttrType_EnumDescriptor(void);
+
+/// Checks to see if the given value is defined by the enum or was not known at
+/// the time this source was generated.
+BOOL AttrType_IsValidValue(int32_t value);
+
+#pragma mark - Enum VarType_Type
+
+typedef GPB_ENUM(VarType_Type) {
+  /// Pod Types
+  VarType_Type_Bool = 0,
+  VarType_Type_Int16 = 1,
+  VarType_Type_Int32 = 2,
+  VarType_Type_Int64 = 3,
+  VarType_Type_Fp16 = 4,
+  VarType_Type_Fp32 = 5,
+  VarType_Type_Fp64 = 6,
+
+  /// Tensor<size_t> is used in C++.
+  VarType_Type_SizeT = 19,
+  VarType_Type_Uint8 = 20,
+  VarType_Type_Int8 = 21,
+
+  /// Other types that may need additional descriptions
+  VarType_Type_LodTensor = 7,
+  VarType_Type_SelectedRows = 8,
+  VarType_Type_FeedMinibatch = 9,
+  VarType_Type_FetchList = 10,
+  VarType_Type_StepScopes = 11,
+  VarType_Type_LodRankTable = 12,
+  VarType_Type_LodTensorArray = 13,
+  VarType_Type_PlaceList = 14,
+  VarType_Type_Reader = 15,
+  VarType_Type_Channel = 16,
+
+  /// Any runtime decided variable type is raw
+  /// raw variables should manage their own allocations
+  /// in operators like nccl_op
+  VarType_Type_Raw = 17,
+  VarType_Type_Tuple = 18,
+};
+
+GPBEnumDescriptor *VarType_Type_EnumDescriptor(void);
+
+/// Checks to see if the given value is defined by the enum or was not known at
+/// the time this source was generated.
+BOOL VarType_Type_IsValidValue(int32_t value);
+
+#pragma mark - FrameworkRoot
+
+/// Exposes the extension registry for this file.
+///
+/// The base class provides:
+/// @code
+///   + (GPBExtensionRegistry *)extensionRegistry;
+/// @endcode
+/// which is a @c GPBExtensionRegistry that includes all the extensions defined by
+/// this file and all files that it depends on.
+@interface FrameworkRoot : GPBRootObject
+@end
+
+#pragma mark - Version
+
+typedef GPB_ENUM(Version_FieldNumber) {
+  Version_FieldNumber_Version = 1,
+};
+
+/// Any incompatible changes to ProgramDesc and its dependencies should
+/// raise the version defined version.h.
+///
+/// Serailization and Deserialization codes should be modified in a way
+/// that supports old versions following the version and compatibility policy.
+@interface Version : GPBMessage
+
+@property(nonatomic, readwrite) int64_t version;
+
+@property(nonatomic, readwrite) BOOL hasVersion;
+@end
+
+#pragma mark - OpDesc
+
+typedef GPB_ENUM(OpDesc_FieldNumber) {
+  OpDesc_FieldNumber_InputsArray = 1,
+  OpDesc_FieldNumber_OutputsArray = 2,
+  OpDesc_FieldNumber_Type = 3,
+  OpDesc_FieldNumber_AttrsArray = 4,
+  OpDesc_FieldNumber_IsTarget = 5,
+};
+
+/// OpDesc describes an instance of a C++ framework::OperatorBase
+/// derived class type.
+@interface OpDesc : GPBMessage
+
+@property(nonatomic, readwrite, copy, null_resettable) NSString *type;
+/// Test to see if @c type has been set.
+@property(nonatomic, readwrite) BOOL hasType;
+
+@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpDesc_Var*> *inputsArray;
+/// The number of items in @c inputsArray without causing the array to be created.
+@property(nonatomic, readonly) NSUInteger inputsArray_Count;
+
+@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpDesc_Var*> *outputsArray;
+/// The number of items in @c outputsArray without causing the array to be created.
+@property(nonatomic, readonly) NSUInteger outputsArray_Count;
+
+@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpDesc_Attr*> *attrsArray;
+/// The number of items in @c attrsArray without causing the array to be created.
+@property(nonatomic, readonly) NSUInteger attrsArray_Count;
+
+@property(nonatomic, readwrite) BOOL isTarget;
+
+@property(nonatomic, readwrite) BOOL hasIsTarget;
+@end
+
+#pragma mark - OpDesc_Attr
+
+typedef GPB_ENUM(OpDesc_Attr_FieldNumber) {
+  OpDesc_Attr_FieldNumber_Name = 1,
+  OpDesc_Attr_FieldNumber_Type = 2,
+  OpDesc_Attr_FieldNumber_I = 3,
+  OpDesc_Attr_FieldNumber_F = 4,
+  OpDesc_Attr_FieldNumber_S = 5,
+  OpDesc_Attr_FieldNumber_IntsArray = 6,
+  OpDesc_Attr_FieldNumber_FloatsArray = 7,
+  OpDesc_Attr_FieldNumber_StringsArray = 8,
+  OpDesc_Attr_FieldNumber_B = 10,
+  OpDesc_Attr_FieldNumber_BoolsArray = 11,
+  OpDesc_Attr_FieldNumber_BlockIdx = 12,
+  OpDesc_Attr_FieldNumber_L = 13,
+  OpDesc_Attr_FieldNumber_BlocksIdxArray = 14,
+};
+
+@interface OpDesc_Attr : GPBMessage
+
+@property(nonatomic, readwrite, copy, null_resettable) NSString *name;
+/// Test to see if @c name has been set.
+@property(nonatomic, readwrite) BOOL hasName;
+
+@property(nonatomic, readwrite) AttrType type;
+
+@property(nonatomic, readwrite) BOOL hasType;
+@property(nonatomic, readwrite) int32_t i;
+
+@property(nonatomic, readwrite) BOOL hasI;
+@property(nonatomic, readwrite) float f;
+
+@property(nonatomic, readwrite) BOOL hasF;
+@property(nonatomic, readwrite, copy, null_resettable) NSString *s;
+/// Test to see if @c s has been set.
+@property(nonatomic, readwrite) BOOL hasS;
+
+@property(nonatomic, readwrite, strong, null_resettable) GPBInt32Array *intsArray;
+/// The number of items in @c intsArray without causing the array to be created.
+@property(nonatomic, readonly) NSUInteger intsArray_Count;
+
+@property(nonatomic, readwrite, strong, null_resettable) GPBFloatArray *floatsArray;
+/// The number of items in @c floatsArray without causing the array to be created.
+@property(nonatomic, readonly) NSUInteger floatsArray_Count;
+
+@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<NSString*> *stringsArray;
+/// The number of items in @c stringsArray without causing the array to be created.
+@property(nonatomic, readonly) NSUInteger stringsArray_Count;
+
+@property(nonatomic, readwrite) BOOL b;
+
+@property(nonatomic, readwrite) BOOL hasB;
+@property(nonatomic, readwrite, strong, null_resettable) GPBBoolArray *boolsArray;
+/// The number of items in @c boolsArray without causing the array to be created.
+@property(nonatomic, readonly) NSUInteger boolsArray_Count;
+
+@property(nonatomic, readwrite) int32_t blockIdx;
+
+@property(nonatomic, readwrite) BOOL hasBlockIdx;
+@property(nonatomic, readwrite) int64_t l;
+
+@property(nonatomic, readwrite) BOOL hasL;
+@property(nonatomic, readwrite, strong, null_resettable) GPBInt32Array *blocksIdxArray;
+/// The number of items in @c blocksIdxArray without causing the array to be created.
+@property(nonatomic, readonly) NSUInteger blocksIdxArray_Count;
+
+@end
+
+#pragma mark - OpDesc_Var
+
+typedef GPB_ENUM(OpDesc_Var_FieldNumber) {
+  OpDesc_Var_FieldNumber_Parameter = 1,
+  OpDesc_Var_FieldNumber_ArgumentsArray = 2,
+};
+
+@interface OpDesc_Var : GPBMessage
+
+@property(nonatomic, readwrite, copy, null_resettable) NSString *parameter;
+/// Test to see if @c parameter has been set.
+@property(nonatomic, readwrite) BOOL hasParameter;
+
+@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<NSString*> *argumentsArray;
+/// The number of items in @c argumentsArray without causing the array to be created.
+@property(nonatomic, readonly) NSUInteger argumentsArray_Count;
+
+@end
+
+#pragma mark - OpProto
+
+typedef GPB_ENUM(OpProto_FieldNumber) {
+  OpProto_FieldNumber_Type = 1,
+  OpProto_FieldNumber_InputsArray = 2,
+  OpProto_FieldNumber_OutputsArray = 3,
+  OpProto_FieldNumber_AttrsArray = 4,
+  OpProto_FieldNumber_Comment = 5,
+};
+
+/// OpProto describes a C++ framework::OperatorBase derived class.
+@interface OpProto : GPBMessage
+
+@property(nonatomic, readwrite, copy, null_resettable) NSString *type;
+/// Test to see if @c type has been set.
+@property(nonatomic, readwrite) BOOL hasType;
+
+@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpProto_Var*> *inputsArray;
+/// The number of items in @c inputsArray without causing the array to be created.
+@property(nonatomic, readonly) NSUInteger inputsArray_Count;
+
+@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpProto_Var*> *outputsArray;
+/// The number of items in @c outputsArray without causing the array to be created.
+@property(nonatomic, readonly) NSUInteger outputsArray_Count;
+
+@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpProto_Attr*> *attrsArray;
+/// The number of items in @c attrsArray without causing the array to be created.
+@property(nonatomic, readonly) NSUInteger attrsArray_Count;
+
+@property(nonatomic, readwrite, copy, null_resettable) NSString *comment;
+/// Test to see if @c comment has been set.
+@property(nonatomic, readwrite) BOOL hasComment;
+
+@end
+
+#pragma mark - OpProto_Var
+
+typedef GPB_ENUM(OpProto_Var_FieldNumber) {
+  OpProto_Var_FieldNumber_Name = 1,
+  OpProto_Var_FieldNumber_Comment = 2,
+  OpProto_Var_FieldNumber_Duplicable = 3,
+  OpProto_Var_FieldNumber_Intermediate = 4,
+  OpProto_Var_FieldNumber_Dispensable = 5,
+  OpProto_Var_FieldNumber_Reuse = 6,
+};
+
+/// VarProto describes the C++ type framework::Variable.
+@interface OpProto_Var : GPBMessage
+
+@property(nonatomic, readwrite, copy, null_resettable) NSString *name;
+/// Test to see if @c name has been set.
+@property(nonatomic, readwrite) BOOL hasName;
+
+@property(nonatomic, readwrite, copy, null_resettable) NSString *comment;
+/// Test to see if @c comment has been set.
+@property(nonatomic, readwrite) BOOL hasComment;
+
+@property(nonatomic, readwrite) BOOL duplicable;
+
+@property(nonatomic, readwrite) BOOL hasDuplicable;
+@property(nonatomic, readwrite) BOOL intermediate;
+
+@property(nonatomic, readwrite) BOOL hasIntermediate;
+@property(nonatomic, readwrite) BOOL dispensable;
+
+@property(nonatomic, readwrite) BOOL hasDispensable;
+@property(nonatomic, readwrite, copy, null_resettable) NSString *reuse;
+/// Test to see if @c reuse has been set.
+@property(nonatomic, readwrite) BOOL hasReuse;
+
+@end
+
+#pragma mark - OpProto_Attr
+
+typedef GPB_ENUM(OpProto_Attr_FieldNumber) {
+  OpProto_Attr_FieldNumber_Name = 1,
+  OpProto_Attr_FieldNumber_Type = 2,
+  OpProto_Attr_FieldNumber_Comment = 3,
+  OpProto_Attr_FieldNumber_Generated = 4,
+};
+
+/// AttrProto describes the C++ type Attribute.
+@interface OpProto_Attr : GPBMessage
+
+@property(nonatomic, readwrite, copy, null_resettable) NSString *name;
+/// Test to see if @c name has been set.
+@property(nonatomic, readwrite) BOOL hasName;
+
+@property(nonatomic, readwrite) AttrType type;
+
+@property(nonatomic, readwrite) BOOL hasType;
+@property(nonatomic, readwrite, copy, null_resettable) NSString *comment;
+/// Test to see if @c comment has been set.
+@property(nonatomic, readwrite) BOOL hasComment;
+
+/// If that attribute is generated, it means the Paddle third
+/// language binding has responsibility to fill that
+/// attribute. End-User should not set that attribute.
+@property(nonatomic, readwrite) BOOL generated;
+
+@property(nonatomic, readwrite) BOOL hasGenerated;
+@end
+
+#pragma mark - VarType
+
+typedef GPB_ENUM(VarType_FieldNumber) {
+  VarType_FieldNumber_Type = 1,
+  VarType_FieldNumber_SelectedRows = 2,
+  VarType_FieldNumber_LodTensor = 3,
+  VarType_FieldNumber_TensorArray_p = 4,
+  VarType_FieldNumber_Reader = 5,
+  VarType_FieldNumber_Channel = 6,
+  VarType_FieldNumber_Tuple = 7,
+};
+
+@interface VarType : GPBMessage
+
+@property(nonatomic, readwrite) VarType_Type type;
+
+@property(nonatomic, readwrite) BOOL hasType;
+@property(nonatomic, readwrite, strong, null_resettable) VarType_TensorDesc *selectedRows;
+/// Test to see if @c selectedRows has been set.
+@property(nonatomic, readwrite) BOOL hasSelectedRows;
+
+@property(nonatomic, readwrite, strong, null_resettable) VarType_LoDTensorDesc *lodTensor;
+/// Test to see if @c lodTensor has been set.
+@property(nonatomic, readwrite) BOOL hasLodTensor;
+
+@property(nonatomic, readwrite, strong, null_resettable) VarType_LoDTensorArrayDesc *tensorArray_p;
+/// Test to see if @c tensorArray_p has been set.
+@property(nonatomic, readwrite) BOOL hasTensorArray_p;
+
+@property(nonatomic, readwrite, strong, null_resettable) VarType_ReaderDesc *reader;
+/// Test to see if @c reader has been set.
+@property(nonatomic, readwrite) BOOL hasReader;
+
+@property(nonatomic, readwrite, strong, null_resettable) VarType_ChannelDesc *channel;
+/// Test to see if @c channel has been set.
+@property(nonatomic, readwrite) BOOL hasChannel;
+
+@property(nonatomic, readwrite, strong, null_resettable) VarType_Tuple *tuple;
+/// Test to see if @c tuple has been set.
+@property(nonatomic, readwrite) BOOL hasTuple;
+
+@end
+
+#pragma mark - VarType_TensorDesc
+
+typedef GPB_ENUM(VarType_TensorDesc_FieldNumber) {
+  VarType_TensorDesc_FieldNumber_DataType = 1,
+  VarType_TensorDesc_FieldNumber_DimsArray = 2,
+};
+
+@interface VarType_TensorDesc : GPBMessage
+
+/// Should only be PODType. Is enforced in C++
+@property(nonatomic, readwrite) VarType_Type dataType;
+
+@property(nonatomic, readwrite) BOOL hasDataType;
+/// [UNK, 640, 480] is saved as [-1, 640, 480]
+@property(nonatomic, readwrite, strong, null_resettable) GPBInt64Array *dimsArray;
+/// The number of items in @c dimsArray without causing the array to be created.
+@property(nonatomic, readonly) NSUInteger dimsArray_Count;
+
+@end
+
+#pragma mark - VarType_LoDTensorDesc
+
+typedef GPB_ENUM(VarType_LoDTensorDesc_FieldNumber) {
+  VarType_LoDTensorDesc_FieldNumber_Tensor = 1,
+  VarType_LoDTensorDesc_FieldNumber_LodLevel = 2,
+};
+
+@interface VarType_LoDTensorDesc : GPBMessage
+
+@property(nonatomic, readwrite, strong, null_resettable) VarType_TensorDesc *tensor;
+/// Test to see if @c tensor has been set.
+@property(nonatomic, readwrite) BOOL hasTensor;
+
+@property(nonatomic, readwrite) int32_t lodLevel;
+
+@property(nonatomic, readwrite) BOOL hasLodLevel;
+@end
+
+#pragma mark - VarType_LoDTensorArrayDesc
+
+typedef GPB_ENUM(VarType_LoDTensorArrayDesc_FieldNumber) {
+  VarType_LoDTensorArrayDesc_FieldNumber_Tensor = 1,
+  VarType_LoDTensorArrayDesc_FieldNumber_LodLevel = 2,
+};
+
+@interface VarType_LoDTensorArrayDesc : GPBMessage
+
+@property(nonatomic, readwrite, strong, null_resettable) VarType_TensorDesc *tensor;
+/// Test to see if @c tensor has been set.
+@property(nonatomic, readwrite) BOOL hasTensor;
+
+@property(nonatomic, readwrite) int32_t lodLevel;
+
+@property(nonatomic, readwrite) BOOL hasLodLevel;
+@end
+
+#pragma mark - VarType_ReaderDesc
+
+typedef GPB_ENUM(VarType_ReaderDesc_FieldNumber) {
+  VarType_ReaderDesc_FieldNumber_LodTensorArray = 1,
+};
+
+@interface VarType_ReaderDesc : GPBMessage
+
+@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<VarType_LoDTensorDesc*> *lodTensorArray;
+/// The number of items in @c lodTensorArray without causing the array to be created.
+@property(nonatomic, readonly) NSUInteger lodTensorArray_Count;
+
+@end
+
+#pragma mark - VarType_ChannelDesc
+
+typedef GPB_ENUM(VarType_ChannelDesc_FieldNumber) {
+  VarType_ChannelDesc_FieldNumber_DataType = 1,
+  VarType_ChannelDesc_FieldNumber_Capacity = 2,
+};
+
+@interface VarType_ChannelDesc : GPBMessage
+
+@property(nonatomic, readwrite) VarType_Type dataType;
+
+@property(nonatomic, readwrite) BOOL hasDataType;
+@property(nonatomic, readwrite) int64_t capacity;
+
+@property(nonatomic, readwrite) BOOL hasCapacity;
+@end
+
+#pragma mark - VarType_Tuple
+
+typedef GPB_ENUM(VarType_Tuple_FieldNumber) {
+  VarType_Tuple_FieldNumber_ElementTypeArray = 1,
+};
+
+@interface VarType_Tuple : GPBMessage
+
+// |elementTypeArray| contains |VarType_Type|
+@property(nonatomic, readwrite, strong, null_resettable) GPBEnumArray *elementTypeArray;
+/// The number of items in @c elementTypeArray without causing the array to be created.
+@property(nonatomic, readonly) NSUInteger elementTypeArray_Count;
+
+@end
+
+#pragma mark - VarDesc
+
+typedef GPB_ENUM(VarDesc_FieldNumber) {
+  VarDesc_FieldNumber_Name = 1,
+  VarDesc_FieldNumber_Type = 2,
+  VarDesc_FieldNumber_Persistable = 3,
+};
+
+@interface VarDesc : GPBMessage
+
+@property(nonatomic, readwrite, copy, null_resettable) NSString *name;
+/// Test to see if @c name has been set.
+@property(nonatomic, readwrite) BOOL hasName;
+
+@property(nonatomic, readwrite, strong, null_resettable) VarType *type;
+/// Test to see if @c type has been set.
+@property(nonatomic, readwrite) BOOL hasType;
+
+@property(nonatomic, readwrite) BOOL persistable;
+
+@property(nonatomic, readwrite) BOOL hasPersistable;
+@end
+
+#pragma mark - BlockDesc
+
+typedef GPB_ENUM(BlockDesc_FieldNumber) {
+  BlockDesc_FieldNumber_Idx = 1,
+  BlockDesc_FieldNumber_ParentIdx = 2,
+  BlockDesc_FieldNumber_VarsArray = 3,
+  BlockDesc_FieldNumber_OpsArray = 4,
+  BlockDesc_FieldNumber_ForwardBlockIdx = 5,
+};
+
+@interface BlockDesc : GPBMessage
+
+@property(nonatomic, readwrite) int32_t idx;
+
+@property(nonatomic, readwrite) BOOL hasIdx;
+@property(nonatomic, readwrite) int32_t parentIdx;
+
+@property(nonatomic, readwrite) BOOL hasParentIdx;
+@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<VarDesc*> *varsArray;
+/// The number of items in @c varsArray without causing the array to be created.
+@property(nonatomic, readonly) NSUInteger varsArray_Count;
+
+@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpDesc*> *opsArray;
+/// The number of items in @c opsArray without causing the array to be created.
+@property(nonatomic, readonly) NSUInteger opsArray_Count;
+
+@property(nonatomic, readwrite) int32_t forwardBlockIdx;
+
+@property(nonatomic, readwrite) BOOL hasForwardBlockIdx;
+@end
+
+#pragma mark - ProgramDesc
+
+typedef GPB_ENUM(ProgramDesc_FieldNumber) {
+  ProgramDesc_FieldNumber_BlocksArray = 1,
+  ProgramDesc_FieldNumber_Version = 2,
+};
+
+/// Please refer to
+/// https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/program.md
+/// for more details.
+/// TODO(panyx0718): A model can have multiple programs. Need a
+/// way to distinguish them. Maybe ID or name?
+@interface ProgramDesc : GPBMessage
+
+@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<BlockDesc*> *blocksArray;
+/// The number of items in @c blocksArray without causing the array to be created.
+@property(nonatomic, readonly) NSUInteger blocksArray_Count;
+
+@property(nonatomic, readwrite, strong, null_resettable) Version *version;
+/// Test to see if @c version has been set.
+@property(nonatomic, readwrite) BOOL hasVersion;
+
+@end
+
+NS_ASSUME_NONNULL_END
+
+CF_EXTERN_C_END
+
+#pragma clang diagnostic pop
+
+// @@protoc_insertion_point(global_scope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/Framework.pbobjc.m b/metal/paddle-mobile/paddle-mobile/Src/Program/Framework.pbobjc.m
new file mode 100755
index 0000000000000000000000000000000000000000..00dad2662c232fdc6183179b4068916cc71a596c
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Src/Program/Framework.pbobjc.m
@@ -0,0 +1,1417 @@
+// Generated by the protocol buffer compiler.  DO NOT EDIT!
+// source: Framework.proto
+
+// This CPP symbol can be defined to use imports that match up to the framework
+// imports needed when using CocoaPods.
+#if !defined(GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS)
+ #define GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS 0
+#endif
+
+//#if GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS
+// #import <Protobuf/GPBProtocolBuffers_RuntimeSupport.h>
+//#else
+// #import "GPBProtocolBuffers_RuntimeSupport.h"
+//#endif
+
+#if GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS
+#import <Protobuf/GPBProtocolBuffers_RuntimeSupport.h>
+#else
+#import <ProtocolBuffers/GPBProtocolBuffers_RuntimeSupport.h>
+#endif
+
+ #import "Framework.pbobjc.h"
+// @@protoc_insertion_point(imports)
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+
+#pragma mark - FrameworkRoot
+
+@implementation FrameworkRoot
+
+@end
+
+#pragma mark - FrameworkRoot_FileDescriptor
+
+static GPBFileDescriptor *FrameworkRoot_FileDescriptor(void) {
+  // This is called by +initialize so there is no need to worry
+  // about thread safety of the singleton.
+  static GPBFileDescriptor *descriptor = NULL;
+  if (!descriptor) {
+    GPBDebugCheckRuntimeVersion();
+    descriptor = [[GPBFileDescriptor alloc] initWithPackage:@"paddle_mobile.framework.proto"
+                                                     syntax:GPBFileSyntaxProto2];
+  }
+  return descriptor;
+}
+
+#pragma mark - Enum AttrType
+
+GPBEnumDescriptor *AttrType_EnumDescriptor(void) {
+  static GPBEnumDescriptor *descriptor = NULL;
+  if (!descriptor) {
+    static const char *valueNames =
+        "Int\000Float\000String\000Ints\000Floats\000Strings\000Boo"
+        "lean\000Booleans\000Block\000Long\000Blocks\000";
+    static const int32_t values[] = {
+        AttrType_Int,
+        AttrType_Float,
+        AttrType_String,
+        AttrType_Ints,
+        AttrType_Floats,
+        AttrType_Strings,
+        AttrType_Boolean,
+        AttrType_Booleans,
+        AttrType_Block,
+        AttrType_Long,
+        AttrType_Blocks,
+    };
+    GPBEnumDescriptor *worker =
+        [GPBEnumDescriptor allocDescriptorForName:GPBNSStringifySymbol(AttrType)
+                                       valueNames:valueNames
+                                           values:values
+                                            count:(uint32_t)(sizeof(values) / sizeof(int32_t))
+                                     enumVerifier:AttrType_IsValidValue];
+    if (!OSAtomicCompareAndSwapPtrBarrier(nil, worker, (void * volatile *)&descriptor)) {
+      [worker release];
+    }
+  }
+  return descriptor;
+}
+
+BOOL AttrType_IsValidValue(int32_t value__) {
+  switch (value__) {
+    case AttrType_Int:
+    case AttrType_Float:
+    case AttrType_String:
+    case AttrType_Ints:
+    case AttrType_Floats:
+    case AttrType_Strings:
+    case AttrType_Boolean:
+    case AttrType_Booleans:
+    case AttrType_Block:
+    case AttrType_Long:
+    case AttrType_Blocks:
+      return YES;
+    default:
+      return NO;
+  }
+}
+
+#pragma mark - Version
+
+@implementation Version
+
+@dynamic hasVersion, version;
+
+typedef struct Version__storage_ {
+  uint32_t _has_storage_[1];
+  int64_t version;
+} Version__storage_;
+
+// This method is threadsafe because it is initially called
+// in +initialize for each subclass.
++ (GPBDescriptor *)descriptor {
+  static GPBDescriptor *descriptor = nil;
+  if (!descriptor) {
+    static GPBMessageFieldDescription fields[] = {
+      {
+        .name = "version",
+        .dataTypeSpecific.className = NULL,
+        .number = Version_FieldNumber_Version,
+        .hasIndex = 0,
+        .offset = (uint32_t)offsetof(Version__storage_, version),
+        .flags = GPBFieldOptional | GPBFieldHasDefaultValue,
+        .dataType = GPBDataTypeInt64,
+      },
+    };
+    GPBDescriptor *localDescriptor =
+        [GPBDescriptor allocDescriptorForClass:[Version class]
+                                     rootClass:[FrameworkRoot class]
+                                          file:FrameworkRoot_FileDescriptor()
+                                        fields:fields
+                                    fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription))
+                                   storageSize:sizeof(Version__storage_)
+                                         flags:0];
+    NSAssert(descriptor == nil, @"Startup recursed!");
+    descriptor = localDescriptor;
+  }
+  return descriptor;
+}
+
+@end
+
+#pragma mark - OpDesc
+
+@implementation OpDesc
+
+@dynamic hasType, type;
+@dynamic inputsArray, inputsArray_Count;
+@dynamic outputsArray, outputsArray_Count;
+@dynamic attrsArray, attrsArray_Count;
+@dynamic hasIsTarget, isTarget;
+
+typedef struct OpDesc__storage_ {
+  uint32_t _has_storage_[1];
+  NSMutableArray *inputsArray;
+  NSMutableArray *outputsArray;
+  NSString *type;
+  NSMutableArray *attrsArray;
+} OpDesc__storage_;
+
+// This method is threadsafe because it is initially called
+// in +initialize for each subclass.
++ (GPBDescriptor *)descriptor {
+  static GPBDescriptor *descriptor = nil;
+  if (!descriptor) {
+    static GPBMessageFieldDescription fields[] = {
+      {
+        .name = "inputsArray",
+        .dataTypeSpecific.className = GPBStringifySymbol(OpDesc_Var),
+        .number = OpDesc_FieldNumber_InputsArray,
+        .hasIndex = GPBNoHasBit,
+        .offset = (uint32_t)offsetof(OpDesc__storage_, inputsArray),
+        .flags = GPBFieldRepeated,
+        .dataType = GPBDataTypeMessage,
+      },
+      {
+        .name = "outputsArray",
+        .dataTypeSpecific.className = GPBStringifySymbol(OpDesc_Var),
+        .number = OpDesc_FieldNumber_OutputsArray,
+        .hasIndex = GPBNoHasBit,
+        .offset = (uint32_t)offsetof(OpDesc__storage_, outputsArray),
+        .flags = GPBFieldRepeated,
+        .dataType = GPBDataTypeMessage,
+      },
+      {
+        .name = "type",
+        .dataTypeSpecific.className = NULL,
+        .number = OpDesc_FieldNumber_Type,
+        .hasIndex = 0,
+        .offset = (uint32_t)offsetof(OpDesc__storage_, type),
+        .flags = GPBFieldRequired,
+        .dataType = GPBDataTypeString,
+      },
+      {
+        .name = "attrsArray",
+        .dataTypeSpecific.className = GPBStringifySymbol(OpDesc_Attr),
+        .number = OpDesc_FieldNumber_AttrsArray,
+        .hasIndex = GPBNoHasBit,
+        .offset = (uint32_t)offsetof(OpDesc__storage_, attrsArray),
+        .flags = GPBFieldRepeated,
+        .dataType = GPBDataTypeMessage,
+      },
+      {
+        .name = "isTarget",
+        .dataTypeSpecific.className = NULL,
+        .number = OpDesc_FieldNumber_IsTarget,
+        .hasIndex = 1,
+        .offset = 2,  // Stored in _has_storage_ to save space.
+        .flags = GPBFieldOptional | GPBFieldHasDefaultValue,
+        .dataType = GPBDataTypeBool,
+      },
+    };
+    GPBDescriptor *localDescriptor =
+        [GPBDescriptor allocDescriptorForClass:[OpDesc class]
+                                     rootClass:[FrameworkRoot class]
+                                          file:FrameworkRoot_FileDescriptor()
+                                        fields:fields
+                                    fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription))
+                                   storageSize:sizeof(OpDesc__storage_)
+                                         flags:0];
+    NSAssert(descriptor == nil, @"Startup recursed!");
+    descriptor = localDescriptor;
+  }
+  return descriptor;
+}
+
+@end
+
+#pragma mark - OpDesc_Attr
+
+@implementation OpDesc_Attr
+
+@dynamic hasName, name;
+@dynamic hasType, type;
+@dynamic hasI, i;
+@dynamic hasF, f;
+@dynamic hasS, s;
+@dynamic intsArray, intsArray_Count;
+@dynamic floatsArray, floatsArray_Count;
+@dynamic stringsArray, stringsArray_Count;
+@dynamic hasB, b;
+@dynamic boolsArray, boolsArray_Count;
+@dynamic hasBlockIdx, blockIdx;
+@dynamic hasL, l;
+@dynamic blocksIdxArray, blocksIdxArray_Count;
+
+typedef struct OpDesc_Attr__storage_ {
+  uint32_t _has_storage_[1];
+  AttrType type;
+  int32_t i;
+  float f;
+  int32_t blockIdx;
+  NSString *name;
+  NSString *s;
+  GPBInt32Array *intsArray;
+  GPBFloatArray *floatsArray;
+  NSMutableArray *stringsArray;
+  GPBBoolArray *boolsArray;
+  GPBInt32Array *blocksIdxArray;
+  int64_t l;
+} OpDesc_Attr__storage_;
+
+// This method is threadsafe because it is initially called
+// in +initialize for each subclass.
++ (GPBDescriptor *)descriptor {
+  static GPBDescriptor *descriptor = nil;
+  if (!descriptor) {
+    static GPBMessageFieldDescription fields[] = {
+      {
+        .name = "name",
+        .dataTypeSpecific.className = NULL,
+        .number = OpDesc_Attr_FieldNumber_Name,
+        .hasIndex = 0,
+        .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, name),
+        .flags = GPBFieldRequired,
+        .dataType = GPBDataTypeString,
+      },
+      {
+        .name = "type",
+        .dataTypeSpecific.enumDescFunc = AttrType_EnumDescriptor,
+        .number = OpDesc_Attr_FieldNumber_Type,
+        .hasIndex = 1,
+        .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, type),
+        .flags = GPBFieldRequired | GPBFieldHasEnumDescriptor,
+        .dataType = GPBDataTypeEnum,
+      },
+      {
+        .name = "i",
+        .dataTypeSpecific.className = NULL,
+        .number = OpDesc_Attr_FieldNumber_I,
+        .hasIndex = 2,
+        .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, i),
+        .flags = GPBFieldOptional,
+        .dataType = GPBDataTypeInt32,
+      },
+      {
+        .name = "f",
+        .dataTypeSpecific.className = NULL,
+        .number = OpDesc_Attr_FieldNumber_F,
+        .hasIndex = 3,
+        .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, f),
+        .flags = GPBFieldOptional,
+        .dataType = GPBDataTypeFloat,
+      },
+      {
+        .name = "s",
+        .dataTypeSpecific.className = NULL,
+        .number = OpDesc_Attr_FieldNumber_S,
+        .hasIndex = 4,
+        .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, s),
+        .flags = GPBFieldOptional,
+        .dataType = GPBDataTypeString,
+      },
+      {
+        .name = "intsArray",
+        .dataTypeSpecific.className = NULL,
+        .number = OpDesc_Attr_FieldNumber_IntsArray,
+        .hasIndex = GPBNoHasBit,
+        .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, intsArray),
+        .flags = GPBFieldRepeated,
+        .dataType = GPBDataTypeInt32,
+      },
+      {
+        .name = "floatsArray",
+        .dataTypeSpecific.className = NULL,
+        .number = OpDesc_Attr_FieldNumber_FloatsArray,
+        .hasIndex = GPBNoHasBit,
+        .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, floatsArray),
+        .flags = GPBFieldRepeated,
+        .dataType = GPBDataTypeFloat,
+      },
+      {
+        .name = "stringsArray",
+        .dataTypeSpecific.className = NULL,
+        .number = OpDesc_Attr_FieldNumber_StringsArray,
+        .hasIndex = GPBNoHasBit,
+        .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, stringsArray),
+        .flags = GPBFieldRepeated,
+        .dataType = GPBDataTypeString,
+      },
+      {
+        .name = "b",
+        .dataTypeSpecific.className = NULL,
+        .number = OpDesc_Attr_FieldNumber_B,
+        .hasIndex = 5,
+        .offset = 6,  // Stored in _has_storage_ to save space.
+        .flags = GPBFieldOptional,
+        .dataType = GPBDataTypeBool,
+      },
+      {
+        .name = "boolsArray",
+        .dataTypeSpecific.className = NULL,
+        .number = OpDesc_Attr_FieldNumber_BoolsArray,
+        .hasIndex = GPBNoHasBit,
+        .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, boolsArray),
+        .flags = GPBFieldRepeated,
+        .dataType = GPBDataTypeBool,
+      },
+      {
+        .name = "blockIdx",
+        .dataTypeSpecific.className = NULL,
+        .number = OpDesc_Attr_FieldNumber_BlockIdx,
+        .hasIndex = 7,
+        .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, blockIdx),
+        .flags = GPBFieldOptional,
+        .dataType = GPBDataTypeInt32,
+      },
+      {
+        .name = "l",
+        .dataTypeSpecific.className = NULL,
+        .number = OpDesc_Attr_FieldNumber_L,
+        .hasIndex = 8,
+        .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, l),
+        .flags = GPBFieldOptional,
+        .dataType = GPBDataTypeInt64,
+      },
+      {
+        .name = "blocksIdxArray",
+        .dataTypeSpecific.className = NULL,
+        .number = OpDesc_Attr_FieldNumber_BlocksIdxArray,
+        .hasIndex = GPBNoHasBit,
+        .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, blocksIdxArray),
+        .flags = GPBFieldRepeated,
+        .dataType = GPBDataTypeInt32,
+      },
+    };
+    GPBDescriptor *localDescriptor =
+        [GPBDescriptor allocDescriptorForClass:[OpDesc_Attr class]
+                                     rootClass:[FrameworkRoot class]
+                                          file:FrameworkRoot_FileDescriptor()
+                                        fields:fields
+                                    fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription))
+                                   storageSize:sizeof(OpDesc_Attr__storage_)
+                                         flags:0];
+    NSAssert(descriptor == nil, @"Startup recursed!");
+    descriptor = localDescriptor;
+  }
+  return descriptor;
+}
+
+@end
+
+#pragma mark - OpDesc_Var
+
+@implementation OpDesc_Var
+
+@dynamic hasParameter, parameter;
+@dynamic argumentsArray, argumentsArray_Count;
+
+typedef struct OpDesc_Var__storage_ {
+  uint32_t _has_storage_[1];
+  NSString *parameter;
+  NSMutableArray *argumentsArray;
+} OpDesc_Var__storage_;
+
+// This method is threadsafe because it is initially called
+// in +initialize for each subclass.
++ (GPBDescriptor *)descriptor {
+  static GPBDescriptor *descriptor = nil;
+  if (!descriptor) {
+    static GPBMessageFieldDescription fields[] = {
+      {
+        .name = "parameter",
+        .dataTypeSpecific.className = NULL,
+        .number = OpDesc_Var_FieldNumber_Parameter,
+        .hasIndex = 0,
+        .offset = (uint32_t)offsetof(OpDesc_Var__storage_, parameter),
+        .flags = GPBFieldRequired,
+        .dataType = GPBDataTypeString,
+      },
+      {
+        .name = "argumentsArray",
+        .dataTypeSpecific.className = NULL,
+        .number = OpDesc_Var_FieldNumber_ArgumentsArray,
+        .hasIndex = GPBNoHasBit,
+        .offset = (uint32_t)offsetof(OpDesc_Var__storage_, argumentsArray),
+        .flags = GPBFieldRepeated,
+        .dataType = GPBDataTypeString,
+      },
+    };
+    GPBDescriptor *localDescriptor =
+        [GPBDescriptor allocDescriptorForClass:[OpDesc_Var class]
+                                     rootClass:[FrameworkRoot class]
+                                          file:FrameworkRoot_FileDescriptor()
+                                        fields:fields
+                                    fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription))
+                                   storageSize:sizeof(OpDesc_Var__storage_)
+                                         flags:0];
+    NSAssert(descriptor == nil, @"Startup recursed!");
+    descriptor = localDescriptor;
+  }
+  return descriptor;
+}
+
+@end
+
+#pragma mark - OpProto
+
+@implementation OpProto
+
+@dynamic hasType, type;
+@dynamic inputsArray, inputsArray_Count;
+@dynamic outputsArray, outputsArray_Count;
+@dynamic attrsArray, attrsArray_Count;
+@dynamic hasComment, comment;
+
+typedef struct OpProto__storage_ {
+  uint32_t _has_storage_[1];
+  NSString *type;
+  NSMutableArray *inputsArray;
+  NSMutableArray *outputsArray;
+  NSMutableArray *attrsArray;
+  NSString *comment;
+} OpProto__storage_;
+
+// This method is threadsafe because it is initially called
+// in +initialize for each subclass.
++ (GPBDescriptor *)descriptor {
+  static GPBDescriptor *descriptor = nil;
+  if (!descriptor) {
+    static GPBMessageFieldDescription fields[] = {
+      {
+        .name = "type",
+        .dataTypeSpecific.className = NULL,
+        .number = OpProto_FieldNumber_Type,
+        .hasIndex = 0,
+        .offset = (uint32_t)offsetof(OpProto__storage_, type),
+        .flags = GPBFieldRequired,
+        .dataType = GPBDataTypeString,
+      },
+      {
+        .name = "inputsArray",
+        .dataTypeSpecific.className = GPBStringifySymbol(OpProto_Var),
+        .number = OpProto_FieldNumber_InputsArray,
+        .hasIndex = GPBNoHasBit,
+        .offset = (uint32_t)offsetof(OpProto__storage_, inputsArray),
+        .flags = GPBFieldRepeated,
+        .dataType = GPBDataTypeMessage,
+      },
+      {
+        .name = "outputsArray",
+        .dataTypeSpecific.className = GPBStringifySymbol(OpProto_Var),
+        .number = OpProto_FieldNumber_OutputsArray,
+        .hasIndex = GPBNoHasBit,
+        .offset = (uint32_t)offsetof(OpProto__storage_, outputsArray),
+        .flags = GPBFieldRepeated,
+        .dataType = GPBDataTypeMessage,
+      },
+      {
+        .name = "attrsArray",
+        .dataTypeSpecific.className = GPBStringifySymbol(OpProto_Attr),
+        .number = OpProto_FieldNumber_AttrsArray,
+        .hasIndex = GPBNoHasBit,
+        .offset = (uint32_t)offsetof(OpProto__storage_, attrsArray),
+        .flags = GPBFieldRepeated,
+        .dataType = GPBDataTypeMessage,
+      },
+      {
+        .name = "comment",
+        .dataTypeSpecific.className = NULL,
+        .number = OpProto_FieldNumber_Comment,
+        .hasIndex = 1,
+        .offset = (uint32_t)offsetof(OpProto__storage_, comment),
+        .flags = GPBFieldRequired,
+        .dataType = GPBDataTypeString,
+      },
+    };
+    GPBDescriptor *localDescriptor =
+        [GPBDescriptor allocDescriptorForClass:[OpProto class]
+                                     rootClass:[FrameworkRoot class]
+                                          file:FrameworkRoot_FileDescriptor()
+                                        fields:fields
+                                    fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription))
+                                   storageSize:sizeof(OpProto__storage_)
+                                         flags:0];
+    NSAssert(descriptor == nil, @"Startup recursed!");
+    descriptor = localDescriptor;
+  }
+  return descriptor;
+}
+
+@end
+
+#pragma mark - OpProto_Var
+
+@implementation OpProto_Var
+
+@dynamic hasName, name;
+@dynamic hasComment, comment;
+@dynamic hasDuplicable, duplicable;
+@dynamic hasIntermediate, intermediate;
+@dynamic hasDispensable, dispensable;
+@dynamic hasReuse, reuse;
+
+typedef struct OpProto_Var__storage_ {
+  uint32_t _has_storage_[1];
+  NSString *name;
+  NSString *comment;
+  NSString *reuse;
+} OpProto_Var__storage_;
+
+// This method is threadsafe because it is initially called
+// in +initialize for each subclass.
++ (GPBDescriptor *)descriptor {
+  static GPBDescriptor *descriptor = nil;
+  if (!descriptor) {
+    static GPBMessageFieldDescription fields[] = {
+      {
+        .name = "name",
+        .dataTypeSpecific.className = NULL,
+        .number = OpProto_Var_FieldNumber_Name,
+        .hasIndex = 0,
+        .offset = (uint32_t)offsetof(OpProto_Var__storage_, name),
+        .flags = GPBFieldRequired,
+        .dataType = GPBDataTypeString,
+      },
+      {
+        .name = "comment",
+        .dataTypeSpecific.className = NULL,
+        .number = OpProto_Var_FieldNumber_Comment,
+        .hasIndex = 1,
+        .offset = (uint32_t)offsetof(OpProto_Var__storage_, comment),
+        .flags = GPBFieldRequired,
+        .dataType = GPBDataTypeString,
+      },
+      {
+        .name = "duplicable",
+        .dataTypeSpecific.className = NULL,
+        .number = OpProto_Var_FieldNumber_Duplicable,
+        .hasIndex = 2,
+        .offset = 3,  // Stored in _has_storage_ to save space.
+        .flags = GPBFieldOptional | GPBFieldHasDefaultValue,
+        .dataType = GPBDataTypeBool,
+      },
+      {
+        .name = "intermediate",
+        .dataTypeSpecific.className = NULL,
+        .number = OpProto_Var_FieldNumber_Intermediate,
+        .hasIndex = 4,
+        .offset = 5,  // Stored in _has_storage_ to save space.
+        .flags = GPBFieldOptional | GPBFieldHasDefaultValue,
+        .dataType = GPBDataTypeBool,
+      },
+      {
+        .name = "dispensable",
+        .dataTypeSpecific.className = NULL,
+        .number = OpProto_Var_FieldNumber_Dispensable,
+        .hasIndex = 6,
+        .offset = 7,  // Stored in _has_storage_ to save space.
+        .flags = GPBFieldOptional | GPBFieldHasDefaultValue,
+        .dataType = GPBDataTypeBool,
+      },
+      {
+        .name = "reuse",
+        .dataTypeSpecific.className = NULL,
+        .number = OpProto_Var_FieldNumber_Reuse,
+        .hasIndex = 8,
+        .offset = (uint32_t)offsetof(OpProto_Var__storage_, reuse),
+        .flags = GPBFieldOptional,
+        .dataType = GPBDataTypeString,
+      },
+    };
+    GPBDescriptor *localDescriptor =
+        [GPBDescriptor allocDescriptorForClass:[OpProto_Var class]
+                                     rootClass:[FrameworkRoot class]
+                                          file:FrameworkRoot_FileDescriptor()
+                                        fields:fields
+                                    fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription))
+                                   storageSize:sizeof(OpProto_Var__storage_)
+                                         flags:0];
+    NSAssert(descriptor == nil, @"Startup recursed!");
+    descriptor = localDescriptor;
+  }
+  return descriptor;
+}
+
+@end
+
+#pragma mark - OpProto_Attr
+
+@implementation OpProto_Attr
+
+@dynamic hasName, name;
+@dynamic hasType, type;
+@dynamic hasComment, comment;
+@dynamic hasGenerated, generated;
+
+typedef struct OpProto_Attr__storage_ {
+  uint32_t _has_storage_[1];
+  AttrType type;
+  NSString *name;
+  NSString *comment;
+} OpProto_Attr__storage_;
+
+// This method is threadsafe because it is initially called
+// in +initialize for each subclass.
++ (GPBDescriptor *)descriptor {
+  static GPBDescriptor *descriptor = nil;
+  if (!descriptor) {
+    static GPBMessageFieldDescription fields[] = {
+      {
+        .name = "name",
+        .dataTypeSpecific.className = NULL,
+        .number = OpProto_Attr_FieldNumber_Name,
+        .hasIndex = 0,
+        .offset = (uint32_t)offsetof(OpProto_Attr__storage_, name),
+        .flags = GPBFieldRequired,
+        .dataType = GPBDataTypeString,
+      },
+      {
+        .name = "type",
+        .dataTypeSpecific.enumDescFunc = AttrType_EnumDescriptor,
+        .number = OpProto_Attr_FieldNumber_Type,
+        .hasIndex = 1,
+        .offset = (uint32_t)offsetof(OpProto_Attr__storage_, type),
+        .flags = GPBFieldRequired | GPBFieldHasEnumDescriptor,
+        .dataType = GPBDataTypeEnum,
+      },
+      {
+        .name = "comment",
+        .dataTypeSpecific.className = NULL,
+        .number = OpProto_Attr_FieldNumber_Comment,
+        .hasIndex = 2,
+        .offset = (uint32_t)offsetof(OpProto_Attr__storage_, comment),
+        .flags = GPBFieldRequired,
+        .dataType = GPBDataTypeString,
+      },
+      {
+        .name = "generated",
+        .dataTypeSpecific.className = NULL,
+        .number = OpProto_Attr_FieldNumber_Generated,
+        .hasIndex = 3,
+        .offset = 4,  // Stored in _has_storage_ to save space.
+        .flags = GPBFieldOptional | GPBFieldHasDefaultValue,
+        .dataType = GPBDataTypeBool,
+      },
+    };
+    GPBDescriptor *localDescriptor =
+        [GPBDescriptor allocDescriptorForClass:[OpProto_Attr class]
+                                     rootClass:[FrameworkRoot class]
+                                          file:FrameworkRoot_FileDescriptor()
+                                        fields:fields
+                                    fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription))
+                                   storageSize:sizeof(OpProto_Attr__storage_)
+                                         flags:0];
+    NSAssert(descriptor == nil, @"Startup recursed!");
+    descriptor = localDescriptor;
+  }
+  return descriptor;
+}
+
+@end
+
+#pragma mark - VarType
+
+@implementation VarType
+
+@dynamic hasType, type;
+@dynamic hasSelectedRows, selectedRows;
+@dynamic hasLodTensor, lodTensor;
+@dynamic hasTensorArray_p, tensorArray_p;
+@dynamic hasReader, reader;
+@dynamic hasChannel, channel;
+@dynamic hasTuple, tuple;
+
+typedef struct VarType__storage_ {
+  uint32_t _has_storage_[1];
+  VarType_Type type;
+  VarType_TensorDesc *selectedRows;
+  VarType_LoDTensorDesc *lodTensor;
+  VarType_LoDTensorArrayDesc *tensorArray_p;
+  VarType_ReaderDesc *reader;
+  VarType_ChannelDesc *channel;
+  VarType_Tuple *tuple;
+} VarType__storage_;
+
+// This method is threadsafe because it is initially called
+// in +initialize for each subclass.
++ (GPBDescriptor *)descriptor {
+  static GPBDescriptor *descriptor = nil;
+  if (!descriptor) {
+    static GPBMessageFieldDescription fields[] = {
+      {
+        .name = "type",
+        .dataTypeSpecific.enumDescFunc = VarType_Type_EnumDescriptor,
+        .number = VarType_FieldNumber_Type,
+        .hasIndex = 0,
+        .offset = (uint32_t)offsetof(VarType__storage_, type),
+        .flags = GPBFieldRequired | GPBFieldHasEnumDescriptor,
+        .dataType = GPBDataTypeEnum,
+      },
+      {
+        .name = "selectedRows",
+        .dataTypeSpecific.className = GPBStringifySymbol(VarType_TensorDesc),
+        .number = VarType_FieldNumber_SelectedRows,
+        .hasIndex = 1,
+        .offset = (uint32_t)offsetof(VarType__storage_, selectedRows),
+        .flags = GPBFieldOptional,
+        .dataType = GPBDataTypeMessage,
+      },
+      {
+        .name = "lodTensor",
+        .dataTypeSpecific.className = GPBStringifySymbol(VarType_LoDTensorDesc),
+        .number = VarType_FieldNumber_LodTensor,
+        .hasIndex = 2,
+        .offset = (uint32_t)offsetof(VarType__storage_, lodTensor),
+        .flags = GPBFieldOptional,
+        .dataType = GPBDataTypeMessage,
+      },
+      {
+        .name = "tensorArray_p",
+        .dataTypeSpecific.className = GPBStringifySymbol(VarType_LoDTensorArrayDesc),
+        .number = VarType_FieldNumber_TensorArray_p,
+        .hasIndex = 3,
+        .offset = (uint32_t)offsetof(VarType__storage_, tensorArray_p),
+        .flags = GPBFieldOptional,
+        .dataType = GPBDataTypeMessage,
+      },
+      {
+        .name = "reader",
+        .dataTypeSpecific.className = GPBStringifySymbol(VarType_ReaderDesc),
+        .number = VarType_FieldNumber_Reader,
+        .hasIndex = 4,
+        .offset = (uint32_t)offsetof(VarType__storage_, reader),
+        .flags = GPBFieldOptional,
+        .dataType = GPBDataTypeMessage,
+      },
+      {
+        .name = "channel",
+        .dataTypeSpecific.className = GPBStringifySymbol(VarType_ChannelDesc),
+        .number = VarType_FieldNumber_Channel,
+        .hasIndex = 5,
+        .offset = (uint32_t)offsetof(VarType__storage_, channel),
+        .flags = GPBFieldOptional,
+        .dataType = GPBDataTypeMessage,
+      },
+      {
+        .name = "tuple",
+        .dataTypeSpecific.className = GPBStringifySymbol(VarType_Tuple),
+        .number = VarType_FieldNumber_Tuple,
+        .hasIndex = 6,
+        .offset = (uint32_t)offsetof(VarType__storage_, tuple),
+        .flags = GPBFieldOptional,
+        .dataType = GPBDataTypeMessage,
+      },
+    };
+    GPBDescriptor *localDescriptor =
+        [GPBDescriptor allocDescriptorForClass:[VarType class]
+                                     rootClass:[FrameworkRoot class]
+                                          file:FrameworkRoot_FileDescriptor()
+                                        fields:fields
+                                    fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription))
+                                   storageSize:sizeof(VarType__storage_)
+                                         flags:0];
+    NSAssert(descriptor == nil, @"Startup recursed!");
+    descriptor = localDescriptor;
+  }
+  return descriptor;
+}
+
+@end
+
+#pragma mark - Enum VarType_Type
+
+GPBEnumDescriptor *VarType_Type_EnumDescriptor(void) {
+  static GPBEnumDescriptor *descriptor = NULL;
+  if (!descriptor) {
+    static const char *valueNames =
+        "Bool\000Int16\000Int32\000Int64\000Fp16\000Fp32\000Fp64\000Si"
+        "zeT\000Uint8\000Int8\000LodTensor\000SelectedRows\000Fe"
+        "edMinibatch\000FetchList\000StepScopes\000LodRank"
+        "Table\000LodTensorArray\000PlaceList\000Reader\000Ch"
+        "annel\000Raw\000Tuple\000";
+    static const int32_t values[] = {
+        VarType_Type_Bool,
+        VarType_Type_Int16,
+        VarType_Type_Int32,
+        VarType_Type_Int64,
+        VarType_Type_Fp16,
+        VarType_Type_Fp32,
+        VarType_Type_Fp64,
+        VarType_Type_SizeT,
+        VarType_Type_Uint8,
+        VarType_Type_Int8,
+        VarType_Type_LodTensor,
+        VarType_Type_SelectedRows,
+        VarType_Type_FeedMinibatch,
+        VarType_Type_FetchList,
+        VarType_Type_StepScopes,
+        VarType_Type_LodRankTable,
+        VarType_Type_LodTensorArray,
+        VarType_Type_PlaceList,
+        VarType_Type_Reader,
+        VarType_Type_Channel,
+        VarType_Type_Raw,
+        VarType_Type_Tuple,
+    };
+    GPBEnumDescriptor *worker =
+        [GPBEnumDescriptor allocDescriptorForName:GPBNSStringifySymbol(VarType_Type)
+                                       valueNames:valueNames
+                                           values:values
+                                            count:(uint32_t)(sizeof(values) / sizeof(int32_t))
+                                     enumVerifier:VarType_Type_IsValidValue];
+    if (!OSAtomicCompareAndSwapPtrBarrier(nil, worker, (void * volatile *)&descriptor)) {
+      [worker release];
+    }
+  }
+  return descriptor;
+}
+
+BOOL VarType_Type_IsValidValue(int32_t value__) {
+  switch (value__) {
+    case VarType_Type_Bool:
+    case VarType_Type_Int16:
+    case VarType_Type_Int32:
+    case VarType_Type_Int64:
+    case VarType_Type_Fp16:
+    case VarType_Type_Fp32:
+    case VarType_Type_Fp64:
+    case VarType_Type_SizeT:
+    case VarType_Type_Uint8:
+    case VarType_Type_Int8:
+    case VarType_Type_LodTensor:
+    case VarType_Type_SelectedRows:
+    case VarType_Type_FeedMinibatch:
+    case VarType_Type_FetchList:
+    case VarType_Type_StepScopes:
+    case VarType_Type_LodRankTable:
+    case VarType_Type_LodTensorArray:
+    case VarType_Type_PlaceList:
+    case VarType_Type_Reader:
+    case VarType_Type_Channel:
+    case VarType_Type_Raw:
+    case VarType_Type_Tuple:
+      return YES;
+    default:
+      return NO;
+  }
+}
+
+#pragma mark - VarType_TensorDesc
+
+@implementation VarType_TensorDesc
+
+@dynamic hasDataType, dataType;
+@dynamic dimsArray, dimsArray_Count;
+
+typedef struct VarType_TensorDesc__storage_ {
+  uint32_t _has_storage_[1];
+  VarType_Type dataType;
+  GPBInt64Array *dimsArray;
+} VarType_TensorDesc__storage_;
+
+// This method is threadsafe because it is initially called
+// in +initialize for each subclass.
++ (GPBDescriptor *)descriptor {
+  static GPBDescriptor *descriptor = nil;
+  if (!descriptor) {
+    static GPBMessageFieldDescription fields[] = {
+      {
+        .name = "dataType",
+        .dataTypeSpecific.enumDescFunc = VarType_Type_EnumDescriptor,
+        .number = VarType_TensorDesc_FieldNumber_DataType,
+        .hasIndex = 0,
+        .offset = (uint32_t)offsetof(VarType_TensorDesc__storage_, dataType),
+        .flags = GPBFieldRequired | GPBFieldHasEnumDescriptor,
+        .dataType = GPBDataTypeEnum,
+      },
+      {
+        .name = "dimsArray",
+        .dataTypeSpecific.className = NULL,
+        .number = VarType_TensorDesc_FieldNumber_DimsArray,
+        .hasIndex = GPBNoHasBit,
+        .offset = (uint32_t)offsetof(VarType_TensorDesc__storage_, dimsArray),
+        .flags = GPBFieldRepeated,
+        .dataType = GPBDataTypeInt64,
+      },
+    };
+    GPBDescriptor *localDescriptor =
+        [GPBDescriptor allocDescriptorForClass:[VarType_TensorDesc class]
+                                     rootClass:[FrameworkRoot class]
+                                          file:FrameworkRoot_FileDescriptor()
+                                        fields:fields
+                                    fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription))
+                                   storageSize:sizeof(VarType_TensorDesc__storage_)
+                                         flags:0];
+    NSAssert(descriptor == nil, @"Startup recursed!");
+    descriptor = localDescriptor;
+  }
+  return descriptor;
+}
+
+@end
+
+#pragma mark - VarType_LoDTensorDesc
+
+@implementation VarType_LoDTensorDesc
+
+@dynamic hasTensor, tensor;
+@dynamic hasLodLevel, lodLevel;
+
+typedef struct VarType_LoDTensorDesc__storage_ {
+  uint32_t _has_storage_[1];
+  int32_t lodLevel;
+  VarType_TensorDesc *tensor;
+} VarType_LoDTensorDesc__storage_;
+
+// This method is threadsafe because it is initially called
+// in +initialize for each subclass.
++ (GPBDescriptor *)descriptor {
+  static GPBDescriptor *descriptor = nil;
+  if (!descriptor) {
+    static GPBMessageFieldDescription fields[] = {
+      {
+        .name = "tensor",
+        .dataTypeSpecific.className = GPBStringifySymbol(VarType_TensorDesc),
+        .number = VarType_LoDTensorDesc_FieldNumber_Tensor,
+        .hasIndex = 0,
+        .offset = (uint32_t)offsetof(VarType_LoDTensorDesc__storage_, tensor),
+        .flags = GPBFieldRequired,
+        .dataType = GPBDataTypeMessage,
+      },
+      {
+        .name = "lodLevel",
+        .dataTypeSpecific.className = NULL,
+        .number = VarType_LoDTensorDesc_FieldNumber_LodLevel,
+        .hasIndex = 1,
+        .offset = (uint32_t)offsetof(VarType_LoDTensorDesc__storage_, lodLevel),
+        .flags = GPBFieldOptional | GPBFieldHasDefaultValue,
+        .dataType = GPBDataTypeInt32,
+      },
+    };
+    GPBDescriptor *localDescriptor =
+        [GPBDescriptor allocDescriptorForClass:[VarType_LoDTensorDesc class]
+                                     rootClass:[FrameworkRoot class]
+                                          file:FrameworkRoot_FileDescriptor()
+                                        fields:fields
+                                    fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription))
+                                   storageSize:sizeof(VarType_LoDTensorDesc__storage_)
+                                         flags:0];
+    NSAssert(descriptor == nil, @"Startup recursed!");
+    descriptor = localDescriptor;
+  }
+  return descriptor;
+}
+
+@end
+
+#pragma mark - VarType_LoDTensorArrayDesc
+
+@implementation VarType_LoDTensorArrayDesc
+
+@dynamic hasTensor, tensor;
+@dynamic hasLodLevel, lodLevel;
+
+typedef struct VarType_LoDTensorArrayDesc__storage_ {
+  uint32_t _has_storage_[1];
+  int32_t lodLevel;
+  VarType_TensorDesc *tensor;
+} VarType_LoDTensorArrayDesc__storage_;
+
+// This method is threadsafe because it is initially called
+// in +initialize for each subclass.
++ (GPBDescriptor *)descriptor {
+  static GPBDescriptor *descriptor = nil;
+  if (!descriptor) {
+    static GPBMessageFieldDescription fields[] = {
+      {
+        .name = "tensor",
+        .dataTypeSpecific.className = GPBStringifySymbol(VarType_TensorDesc),
+        .number = VarType_LoDTensorArrayDesc_FieldNumber_Tensor,
+        .hasIndex = 0,
+        .offset = (uint32_t)offsetof(VarType_LoDTensorArrayDesc__storage_, tensor),
+        .flags = GPBFieldRequired,
+        .dataType = GPBDataTypeMessage,
+      },
+      {
+        .name = "lodLevel",
+        .dataTypeSpecific.className = NULL,
+        .number = VarType_LoDTensorArrayDesc_FieldNumber_LodLevel,
+        .hasIndex = 1,
+        .offset = (uint32_t)offsetof(VarType_LoDTensorArrayDesc__storage_, lodLevel),
+        .flags = GPBFieldOptional | GPBFieldHasDefaultValue,
+        .dataType = GPBDataTypeInt32,
+      },
+    };
+    GPBDescriptor *localDescriptor =
+        [GPBDescriptor allocDescriptorForClass:[VarType_LoDTensorArrayDesc class]
+                                     rootClass:[FrameworkRoot class]
+                                          file:FrameworkRoot_FileDescriptor()
+                                        fields:fields
+                                    fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription))
+                                   storageSize:sizeof(VarType_LoDTensorArrayDesc__storage_)
+                                         flags:0];
+    NSAssert(descriptor == nil, @"Startup recursed!");
+    descriptor = localDescriptor;
+  }
+  return descriptor;
+}
+
+@end
+
+#pragma mark - VarType_ReaderDesc
+
+@implementation VarType_ReaderDesc
+
+@dynamic lodTensorArray, lodTensorArray_Count;
+
+typedef struct VarType_ReaderDesc__storage_ {
+  uint32_t _has_storage_[1];
+  NSMutableArray *lodTensorArray;
+} VarType_ReaderDesc__storage_;
+
+// This method is threadsafe because it is initially called
+// in +initialize for each subclass.
++ (GPBDescriptor *)descriptor {
+  static GPBDescriptor *descriptor = nil;
+  if (!descriptor) {
+    static GPBMessageFieldDescription fields[] = {
+      {
+        .name = "lodTensorArray",
+        .dataTypeSpecific.className = GPBStringifySymbol(VarType_LoDTensorDesc),
+        .number = VarType_ReaderDesc_FieldNumber_LodTensorArray,
+        .hasIndex = GPBNoHasBit,
+        .offset = (uint32_t)offsetof(VarType_ReaderDesc__storage_, lodTensorArray),
+        .flags = GPBFieldRepeated,
+        .dataType = GPBDataTypeMessage,
+      },
+    };
+    GPBDescriptor *localDescriptor =
+        [GPBDescriptor allocDescriptorForClass:[VarType_ReaderDesc class]
+                                     rootClass:[FrameworkRoot class]
+                                          file:FrameworkRoot_FileDescriptor()
+                                        fields:fields
+                                    fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription))
+                                   storageSize:sizeof(VarType_ReaderDesc__storage_)
+                                         flags:0];
+    NSAssert(descriptor == nil, @"Startup recursed!");
+    descriptor = localDescriptor;
+  }
+  return descriptor;
+}
+
+@end
+
+#pragma mark - VarType_ChannelDesc
+
+@implementation VarType_ChannelDesc
+
+@dynamic hasDataType, dataType;
+@dynamic hasCapacity, capacity;
+
+typedef struct VarType_ChannelDesc__storage_ {
+  uint32_t _has_storage_[1];
+  VarType_Type dataType;
+  int64_t capacity;
+} VarType_ChannelDesc__storage_;
+
+// This method is threadsafe because it is initially called
+// in +initialize for each subclass.
++ (GPBDescriptor *)descriptor {
+  static GPBDescriptor *descriptor = nil;
+  if (!descriptor) {
+    static GPBMessageFieldDescription fields[] = {
+      {
+        .name = "dataType",
+        .dataTypeSpecific.enumDescFunc = VarType_Type_EnumDescriptor,
+        .number = VarType_ChannelDesc_FieldNumber_DataType,
+        .hasIndex = 0,
+        .offset = (uint32_t)offsetof(VarType_ChannelDesc__storage_, dataType),
+        .flags = GPBFieldRequired | GPBFieldHasEnumDescriptor,
+        .dataType = GPBDataTypeEnum,
+      },
+      {
+        .name = "capacity",
+        .dataTypeSpecific.className = NULL,
+        .number = VarType_ChannelDesc_FieldNumber_Capacity,
+        .hasIndex = 1,
+        .offset = (uint32_t)offsetof(VarType_ChannelDesc__storage_, capacity),
+        .flags = GPBFieldRequired,
+        .dataType = GPBDataTypeInt64,
+      },
+    };
+    GPBDescriptor *localDescriptor =
+        [GPBDescriptor allocDescriptorForClass:[VarType_ChannelDesc class]
+                                     rootClass:[FrameworkRoot class]
+                                          file:FrameworkRoot_FileDescriptor()
+                                        fields:fields
+                                    fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription))
+                                   storageSize:sizeof(VarType_ChannelDesc__storage_)
+                                         flags:0];
+    NSAssert(descriptor == nil, @"Startup recursed!");
+    descriptor = localDescriptor;
+  }
+  return descriptor;
+}
+
+@end
+
+#pragma mark - VarType_Tuple
+
+@implementation VarType_Tuple
+
+@dynamic elementTypeArray, elementTypeArray_Count;
+
+typedef struct VarType_Tuple__storage_ {
+  uint32_t _has_storage_[1];
+  GPBEnumArray *elementTypeArray;
+} VarType_Tuple__storage_;
+
+// This method is threadsafe because it is initially called
+// in +initialize for each subclass.
++ (GPBDescriptor *)descriptor {
+  static GPBDescriptor *descriptor = nil;
+  if (!descriptor) {
+    static GPBMessageFieldDescription fields[] = {
+      {
+        .name = "elementTypeArray",
+        .dataTypeSpecific.enumDescFunc = VarType_Type_EnumDescriptor,
+        .number = VarType_Tuple_FieldNumber_ElementTypeArray,
+        .hasIndex = GPBNoHasBit,
+        .offset = (uint32_t)offsetof(VarType_Tuple__storage_, elementTypeArray),
+        .flags = GPBFieldRepeated | GPBFieldHasEnumDescriptor,
+        .dataType = GPBDataTypeEnum,
+      },
+    };
+    GPBDescriptor *localDescriptor =
+        [GPBDescriptor allocDescriptorForClass:[VarType_Tuple class]
+                                     rootClass:[FrameworkRoot class]
+                                          file:FrameworkRoot_FileDescriptor()
+                                        fields:fields
+                                    fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription))
+                                   storageSize:sizeof(VarType_Tuple__storage_)
+                                         flags:0];
+    NSAssert(descriptor == nil, @"Startup recursed!");
+    descriptor = localDescriptor;
+  }
+  return descriptor;
+}
+
+@end
+
+#pragma mark - VarDesc
+
+@implementation VarDesc
+
+@dynamic hasName, name;
+@dynamic hasType, type;
+@dynamic hasPersistable, persistable;
+
+typedef struct VarDesc__storage_ {
+  uint32_t _has_storage_[1];
+  NSString *name;
+  VarType *type;
+} VarDesc__storage_;
+
+// This method is threadsafe because it is initially called
+// in +initialize for each subclass.
++ (GPBDescriptor *)descriptor {
+  static GPBDescriptor *descriptor = nil;
+  if (!descriptor) {
+    static GPBMessageFieldDescription fields[] = {
+      {
+        .name = "name",
+        .dataTypeSpecific.className = NULL,
+        .number = VarDesc_FieldNumber_Name,
+        .hasIndex = 0,
+        .offset = (uint32_t)offsetof(VarDesc__storage_, name),
+        .flags = GPBFieldRequired,
+        .dataType = GPBDataTypeString,
+      },
+      {
+        .name = "type",
+        .dataTypeSpecific.className = GPBStringifySymbol(VarType),
+        .number = VarDesc_FieldNumber_Type,
+        .hasIndex = 1,
+        .offset = (uint32_t)offsetof(VarDesc__storage_, type),
+        .flags = GPBFieldRequired,
+        .dataType = GPBDataTypeMessage,
+      },
+      {
+        .name = "persistable",
+        .dataTypeSpecific.className = NULL,
+        .number = VarDesc_FieldNumber_Persistable,
+        .hasIndex = 2,
+        .offset = 3,  // Stored in _has_storage_ to save space.
+        .flags = GPBFieldOptional | GPBFieldHasDefaultValue,
+        .dataType = GPBDataTypeBool,
+      },
+    };
+    GPBDescriptor *localDescriptor =
+        [GPBDescriptor allocDescriptorForClass:[VarDesc class]
+                                     rootClass:[FrameworkRoot class]
+                                          file:FrameworkRoot_FileDescriptor()
+                                        fields:fields
+                                    fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription))
+                                   storageSize:sizeof(VarDesc__storage_)
+                                         flags:0];
+    NSAssert(descriptor == nil, @"Startup recursed!");
+    descriptor = localDescriptor;
+  }
+  return descriptor;
+}
+
+@end
+
+#pragma mark - BlockDesc
+
+@implementation BlockDesc
+
+@dynamic hasIdx, idx;
+@dynamic hasParentIdx, parentIdx;
+@dynamic varsArray, varsArray_Count;
+@dynamic opsArray, opsArray_Count;
+@dynamic hasForwardBlockIdx, forwardBlockIdx;
+
+typedef struct BlockDesc__storage_ {
+  uint32_t _has_storage_[1];
+  int32_t idx;
+  int32_t parentIdx;
+  int32_t forwardBlockIdx;
+  NSMutableArray *varsArray;
+  NSMutableArray *opsArray;
+} BlockDesc__storage_;
+
+// This method is threadsafe because it is initially called
+// in +initialize for each subclass.
++ (GPBDescriptor *)descriptor {
+  static GPBDescriptor *descriptor = nil;
+  if (!descriptor) {
+    static GPBMessageFieldDescriptionWithDefault fields[] = {
+      {
+        .defaultValue.valueInt32 = 0,
+        .core.name = "idx",
+        .core.dataTypeSpecific.className = NULL,
+        .core.number = BlockDesc_FieldNumber_Idx,
+        .core.hasIndex = 0,
+        .core.offset = (uint32_t)offsetof(BlockDesc__storage_, idx),
+        .core.flags = GPBFieldRequired,
+        .core.dataType = GPBDataTypeInt32,
+      },
+      {
+        .defaultValue.valueInt32 = 0,
+        .core.name = "parentIdx",
+        .core.dataTypeSpecific.className = NULL,
+        .core.number = BlockDesc_FieldNumber_ParentIdx,
+        .core.hasIndex = 1,
+        .core.offset = (uint32_t)offsetof(BlockDesc__storage_, parentIdx),
+        .core.flags = GPBFieldRequired,
+        .core.dataType = GPBDataTypeInt32,
+      },
+      {
+        .defaultValue.valueMessage = nil,
+        .core.name = "varsArray",
+        .core.dataTypeSpecific.className = GPBStringifySymbol(VarDesc),
+        .core.number = BlockDesc_FieldNumber_VarsArray,
+        .core.hasIndex = GPBNoHasBit,
+        .core.offset = (uint32_t)offsetof(BlockDesc__storage_, varsArray),
+        .core.flags = GPBFieldRepeated,
+        .core.dataType = GPBDataTypeMessage,
+      },
+      {
+        .defaultValue.valueMessage = nil,
+        .core.name = "opsArray",
+        .core.dataTypeSpecific.className = GPBStringifySymbol(OpDesc),
+        .core.number = BlockDesc_FieldNumber_OpsArray,
+        .core.hasIndex = GPBNoHasBit,
+        .core.offset = (uint32_t)offsetof(BlockDesc__storage_, opsArray),
+        .core.flags = GPBFieldRepeated,
+        .core.dataType = GPBDataTypeMessage,
+      },
+      {
+        .defaultValue.valueInt32 = -1,
+        .core.name = "forwardBlockIdx",
+        .core.dataTypeSpecific.className = NULL,
+        .core.number = BlockDesc_FieldNumber_ForwardBlockIdx,
+        .core.hasIndex = 2,
+        .core.offset = (uint32_t)offsetof(BlockDesc__storage_, forwardBlockIdx),
+        .core.flags = GPBFieldOptional | GPBFieldHasDefaultValue,
+        .core.dataType = GPBDataTypeInt32,
+      },
+    };
+    GPBDescriptor *localDescriptor =
+        [GPBDescriptor allocDescriptorForClass:[BlockDesc class]
+                                     rootClass:[FrameworkRoot class]
+                                          file:FrameworkRoot_FileDescriptor()
+                                        fields:fields
+                                    fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescriptionWithDefault))
+                                   storageSize:sizeof(BlockDesc__storage_)
+                                         flags:GPBDescriptorInitializationFlag_FieldsWithDefault];
+    NSAssert(descriptor == nil, @"Startup recursed!");
+    descriptor = localDescriptor;
+  }
+  return descriptor;
+}
+
+@end
+
+#pragma mark - ProgramDesc
+
+@implementation ProgramDesc
+
+@dynamic blocksArray, blocksArray_Count;
+@dynamic hasVersion, version;
+
+typedef struct ProgramDesc__storage_ {
+  uint32_t _has_storage_[1];
+  NSMutableArray *blocksArray;
+  Version *version;
+} ProgramDesc__storage_;
+
+// This method is threadsafe because it is initially called
+// in +initialize for each subclass.
++ (GPBDescriptor *)descriptor {
+  static GPBDescriptor *descriptor = nil;
+  if (!descriptor) {
+    static GPBMessageFieldDescription fields[] = {
+      {
+        .name = "blocksArray",
+        .dataTypeSpecific.className = GPBStringifySymbol(BlockDesc),
+        .number = ProgramDesc_FieldNumber_BlocksArray,
+        .hasIndex = GPBNoHasBit,
+        .offset = (uint32_t)offsetof(ProgramDesc__storage_, blocksArray),
+        .flags = GPBFieldRepeated,
+        .dataType = GPBDataTypeMessage,
+      },
+      {
+        .name = "version",
+        .dataTypeSpecific.className = GPBStringifySymbol(Version),
+        .number = ProgramDesc_FieldNumber_Version,
+        .hasIndex = 0,
+        .offset = (uint32_t)offsetof(ProgramDesc__storage_, version),
+        .flags = GPBFieldOptional,
+        .dataType = GPBDataTypeMessage,
+      },
+    };
+    GPBDescriptor *localDescriptor =
+        [GPBDescriptor allocDescriptorForClass:[ProgramDesc class]
+                                     rootClass:[FrameworkRoot class]
+                                          file:FrameworkRoot_FileDescriptor()
+                                        fields:fields
+                                    fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription))
+                                   storageSize:sizeof(ProgramDesc__storage_)
+                                         flags:0];
+    NSAssert(descriptor == nil, @"Startup recursed!");
+    descriptor = localDescriptor;
+  }
+  return descriptor;
+}
+
+@end
+
+
+#pragma clang diagnostic pop
+
+// @@protoc_insertion_point(global_scope)
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/OpDesc.swift b/metal/paddle-mobile/paddle-mobile/Src/Program/OpDesc.swift
deleted file mode 100644
index 44fc09a29db0deec67e7682b303b1d0947b47a51..0000000000000000000000000000000000000000
--- a/metal/paddle-mobile/paddle-mobile/Src/Program/OpDesc.swift
+++ /dev/null
@@ -1,81 +0,0 @@
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
- 
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
- 
- http://www.apache.org/licenses/LICENSE-2.0
- 
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-import Foundation
-
-class OpDesc {
-  let inputs: [String : [String]]
-  var paraInputs: [String : [String]]
-  var outputs: [String : [String]]
-  let unusedOutputs: [String : [String]]
-  var attrs: [String : Attr] = [:]
-  var type: String
-  init(protoOpDesc: PaddleMobile_Framework_Proto_OpDesc) {
-    type = protoOpDesc.type
-    let creator = { (vars: [PaddleMobile_Framework_Proto_OpDesc.Var], canAdd: (String) -> Bool) -> [String : [String]] in
-      var map: [String : [String]] = [:]
-      for opDescVar  in vars {
-        if (canAdd(opDescVar.parameter)) {
-          map[opDescVar.parameter] = opDescVar.arguments
-        }
-      }
-      return map
-    }
-    
-    inputs = creator(protoOpDesc.inputs) {
-      opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false
-    }
-    
-    paraInputs = creator(protoOpDesc.inputs) {
-      !(opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false)
-    }
-    
-    outputs = creator(protoOpDesc.outputs) {
-      opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false
-    }
-    
-    unusedOutputs = creator(protoOpDesc.outputs) {
-      !(opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false)
-    }
-    
-    for attr in protoOpDesc.attrs {
-      if (attr.type != .block) {
-        attrs[attr.name] = attrWithProtoDesc(attrDesc: attr)
-      }
-    }
-  }
-}
-
-extension OpDesc: CustomStringConvertible, CustomDebugStringConvertible {
-  var description: String {
-    var str = ""
-    str += "op type: \(type): \n"
-    str += "    op inputs: \n"
-    str += "        \(inputs) \n"
-    str += "    op para inputs: \n"
-    str += "        \(paraInputs) \n"
-    str += "    op para outputs: \n"
-    str += "        \(outputs) \n"
-    str += "    op attrs: \n"
-    str += "        \(attrs) \n"
-    
-    return str
-  }
-  
-  var debugDescription: String {
-    return description
-  }
-  
-  
-}
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/BlockDesc.swift b/metal/paddle-mobile/paddle-mobile/Src/Program/PMBlockDesc.swift
similarity index 71%
rename from metal/paddle-mobile/paddle-mobile/Src/Program/BlockDesc.swift
rename to metal/paddle-mobile/paddle-mobile/Src/Program/PMBlockDesc.swift
index 1a66a69128b8692ae634363ca05b659dd6e604af..b021b09008b1f3bef3ba01d5a51fe7b7803fedaa 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Program/BlockDesc.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Program/PMBlockDesc.swift
@@ -14,28 +14,28 @@
 
 import Foundation
 
-public class BlockDesc {
+public class PMBlockDesc {
     let index: Int
     let parentIndex: Int
-    public let vars: [VarDesc]
-    let ops: [OpDesc]
-    init(block: PaddleMobile_Framework_Proto_BlockDesc) {
+    public let vars: [PMVarDesc]
+    let ops: [PMOpDesc]
+    init(block: BlockDesc) {
         index = Int(block.idx)
         parentIndex = Int(block.parentIdx)
-        var vars: [VarDesc] = []
-        for varOfBlock in block.vars {
-            vars.append(VarDesc.init(protoVarDesc: varOfBlock))
+        var vars: [PMVarDesc] = []
+        for varOfBlock in block.varsArray {
+            vars.append(PMVarDesc.init(protoVarDesc: varOfBlock as! VarDesc))
         }
         vars.sort { $0.name < $1.name }
         self.vars = vars
-        var ops: [OpDesc] = []
-        for op in block.ops {
-            ops.append(OpDesc.init(protoOpDesc: op))
+        var ops: [PMOpDesc] = []
+        for op in block.opsArray {
+            ops.append(PMOpDesc.init(protoOpDesc: op as! OpDesc))
         }
         self.ops = ops
     }
     
-    init(inVars: [VarDesc], inOps: [OpDesc]) {
+    init(inVars: [PMVarDesc], inOps: [PMOpDesc]) {
         vars = inVars
         ops = inOps
         index = 0
@@ -44,7 +44,7 @@ public class BlockDesc {
     
 }
 
-extension BlockDesc: CustomStringConvertible, CustomDebugStringConvertible {
+extension PMBlockDesc: CustomStringConvertible, CustomDebugStringConvertible {
   public var description: String {
         var str = ""
         
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/PMOpDesc.swift b/metal/paddle-mobile/paddle-mobile/Src/Program/PMOpDesc.swift
new file mode 100644
index 0000000000000000000000000000000000000000..663677150eb0f0240b032a713424aac8ed66c86a
--- /dev/null
+++ b/metal/paddle-mobile/paddle-mobile/Src/Program/PMOpDesc.swift
@@ -0,0 +1,81 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+ 
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+ 
+ http://www.apache.org/licenses/LICENSE-2.0
+ 
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License. */
+
+import Foundation
+
+class PMOpDesc {
+  let inputs: [String : [String]]
+  var paraInputs: [String : [String]]
+  var outputs: [String : [String]]
+  let unusedOutputs: [String : [String]]
+  var attrs: [String : Attr] = [:]
+  var type: String
+    init(protoOpDesc: OpDesc) {
+        type = protoOpDesc.type
+        let creator = { (vars: [OpDesc_Var], canAdd: (String) -> Bool) -> [String : [String]] in
+            var map: [String : [String]] = [:]
+            for opDescVar  in vars {
+                if (canAdd(opDescVar.parameter)) {
+                    map[opDescVar.parameter] = opDescVar.argumentsArray as? [String]
+                }
+            }
+            return map
+        }
+        
+        inputs = creator(protoOpDesc.inputsArray as! [OpDesc_Var]) {
+            opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false
+        }
+        
+        paraInputs = creator(protoOpDesc.inputsArray as! [OpDesc_Var]) {
+            !(opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false)
+        }
+        
+        outputs = creator(protoOpDesc.outputsArray as! [OpDesc_Var]) {
+            opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false
+        }
+        
+        unusedOutputs = creator(protoOpDesc.outputsArray as! [OpDesc_Var]) {
+            !(opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false)
+        }
+        
+        for attr in protoOpDesc.attrsArray {
+            if ((attr as! OpDesc_Attr).type != .block) {
+                attrs[(attr as! OpDesc_Attr).name] = attrWithProtoDesc(attrDesc: attr as! OpDesc_Attr)
+            }
+        }
+    }
+}
+
+extension PMOpDesc: CustomStringConvertible, CustomDebugStringConvertible {
+  var description: String {
+    var str = ""
+    str += "op type: \(type): \n"
+    str += "    op inputs: \n"
+    str += "        \(inputs) \n"
+    str += "    op para inputs: \n"
+    str += "        \(paraInputs) \n"
+    str += "    op para outputs: \n"
+    str += "        \(outputs) \n"
+    str += "    op attrs: \n"
+    str += "        \(attrs) \n"
+    
+    return str
+  }
+  
+  var debugDescription: String {
+    return description
+  }
+  
+  
+}
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/ProgramDesc.swift b/metal/paddle-mobile/paddle-mobile/Src/Program/PMProgramDesc.swift
similarity index 75%
rename from metal/paddle-mobile/paddle-mobile/Src/Program/ProgramDesc.swift
rename to metal/paddle-mobile/paddle-mobile/Src/Program/PMProgramDesc.swift
index a95dcb771b511cc5534d15d028ad06451d72ecda..79b8875976dd42eb57ff981441103f001ddb7a6e 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Program/ProgramDesc.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Program/PMProgramDesc.swift
@@ -14,11 +14,11 @@
 
 import Foundation
 
-public class ProgramDesc {
-    public var blocks: [BlockDesc] = []
-    init(protoProgram: PaddleMobile_Framework_Proto_ProgramDesc) {
-        for block in protoProgram.blocks {
-            self.blocks.append(BlockDesc.init(block: block))
+public class PMProgramDesc {
+    public var blocks: [PMBlockDesc] = []
+    init(protoProgram: ProgramDesc) {
+        for block in protoProgram.blocksArray {
+            self.blocks.append(PMBlockDesc.init(block: block as! BlockDesc))
         }
     }
     
@@ -26,7 +26,7 @@ public class ProgramDesc {
     }
 }
 
-extension ProgramDesc: CustomStringConvertible, CustomDebugStringConvertible {
+extension PMProgramDesc: CustomStringConvertible, CustomDebugStringConvertible {
     public var description: String {
         var str: String = ""
         for i in 0..<blocks.count {
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/VarDesc.swift b/metal/paddle-mobile/paddle-mobile/Src/Program/PMVarDesc.swift
similarity index 90%
rename from metal/paddle-mobile/paddle-mobile/Src/Program/VarDesc.swift
rename to metal/paddle-mobile/paddle-mobile/Src/Program/PMVarDesc.swift
index e3ae87201cb9aa46840b31254793d629aa1f5458..130e6f49fb61b1488538849e86ff793b53f31a86 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Program/VarDesc.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Program/PMVarDesc.swift
@@ -56,13 +56,13 @@ public enum VarTypeType: Int {
     }
 }
 
-public class VarDesc {
+public class PMVarDesc {
     public let name: String
     public let persistable: Bool
     public let type: VarTypeType
     let tensorDesc: TensorDesc?
-    init(protoVarDesc: PaddleMobile_Framework_Proto_VarDesc) {
-        type = VarTypeType.init(rawValue: protoVarDesc.type.type.rawValue) ?? .ErrorType
+    init(protoVarDesc: VarDesc) {
+        type = VarTypeType.init(rawValue: Int(protoVarDesc.type.type.rawValue)) ?? .ErrorType
         name = protoVarDesc.name
         persistable = protoVarDesc.persistable
         switch type {
@@ -71,14 +71,14 @@ public class VarDesc {
         case .LodTensor:
             tensorDesc = TensorDesc.init(protoTensorDesc: protoVarDesc.type.lodTensor.tensor)
         case .StepLodTensorArray:
-            tensorDesc = TensorDesc.init(protoTensorDesc: protoVarDesc.type.tensorArray.tensor);
+            tensorDesc = TensorDesc.init(protoTensorDesc: protoVarDesc.type.tensorArray_p.tensor);
         default:
             tensorDesc = .none
         }
     }
 }
 
-extension VarDesc: CustomStringConvertible, CustomDebugStringConvertible {
+extension PMVarDesc: CustomStringConvertible, CustomDebugStringConvertible {
   public var description: String {
         var str = ""
         str += "var name \(name): \n"
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/Program.swift b/metal/paddle-mobile/paddle-mobile/Src/Program/Program.swift
index 7940e70d56d72449b5304de4c3e8355966217de5..f74879556b2473198436d76a2486f74cb0d9b9fd 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Program/Program.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Program/Program.swift
@@ -16,14 +16,14 @@ import Foundation
 
 @objc public class Program: NSObject {
     public let paramPath: String
-    public let programDesc: ProgramDesc
+    public let programDesc: PMProgramDesc
     public let scope: Scope
-    init(inProgramDesc: ProgramDesc, inParamPath: String, inScope: Scope) {
+    init(inProgramDesc: PMProgramDesc, inParamPath: String, inScope: Scope) {
         programDesc = inProgramDesc
         paramPath = inParamPath
         scope = inScope
     }
-    init(inProgramDesc: ProgramDesc, inScope: Scope) {
+    init(inProgramDesc: PMProgramDesc, inScope: Scope) {
         programDesc = inProgramDesc
         scope = inScope
         paramPath = ""
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/ProgramOptimize.swift b/metal/paddle-mobile/paddle-mobile/Src/Program/ProgramOptimize.swift
index a62b4688aa5a806bbb823f22ba3329ee38de719f..dcb065de3d8c6e7ec6cf437cbc2a19305def08ae 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Program/ProgramOptimize.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Program/ProgramOptimize.swift
@@ -25,8 +25,8 @@ class Node {
   var inputs: [Node] = []
   var outputs: [Node] = []
   var type: String
-  var opDesc: OpDesc?
-  init(inOpDesc: OpDesc) {
+  var opDesc: PMOpDesc?
+  init(inOpDesc: PMOpDesc) {
     type = inOpDesc.type
     opDesc = inOpDesc
   }
@@ -192,7 +192,7 @@ class ProgramOptimize<P: PrecisionType> {
                                   ElementwiseAddPreluOp<P>.self
   ]
   
-  func optimize(originProgramDesc: ProgramDesc) -> ProgramDesc {
+  func optimize(originProgramDesc: PMProgramDesc) -> PMProgramDesc {
     
     guard originProgramDesc.blocks.count == 1 else {
       fatalError(" not support yet")
@@ -287,13 +287,13 @@ class ProgramOptimize<P: PrecisionType> {
       }
     }
     
-    var ops: [OpDesc] = []
+    var ops: [PMOpDesc] = []
     for node in nodes {
       ops.append(node.opDesc!)
     }
     
-    var newProgramDesc = ProgramDesc.init()
-    let newBlock = BlockDesc.init(inVars: block.vars, inOps: ops)
+    let newProgramDesc = PMProgramDesc.init()
+    let newBlock = PMBlockDesc.init(inVars: block.vars, inOps: ops)
     newProgramDesc.blocks.append(newBlock)
     return newProgramDesc
   }
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/TensorDesc.swift b/metal/paddle-mobile/paddle-mobile/Src/Program/TensorDesc.swift
index 1a72f5ef717063136c4708c881befd789a57219c..7565fffc99bd15862304bdf7d7dd1bc31a6ffaf4 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Program/TensorDesc.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Program/TensorDesc.swift
@@ -52,9 +52,19 @@ class TensorDesc {
         }
     }
     
-    init(protoTensorDesc: PaddleMobile_Framework_Proto_VarType.TensorDesc) {
-        dims = protoTensorDesc.dims.map{ Int($0) > 0 ? Int($0) : abs(Int($0)) }
-        dataType = VarTypeType.init(rawValue: protoTensorDesc.dataType.rawValue) ?? .ErrorType
+    init(protoTensorDesc: VarType_TensorDesc) {
+        //        dims = protoTensorDesc.dimsArray.map{ Int64($0)! > 0 ? Int64($0) : abs(Int64($0)) }
+        
+        var dimsArray = [Int]()
+        
+        let dimsCount = protoTensorDesc.dimsArray.count
+        for i in 0..<dimsCount {
+            let dim = Int(protoTensorDesc.dimsArray.value(at: i)) > 0 ?Int(protoTensorDesc.dimsArray.value(at: i)) :abs(Int(protoTensorDesc.dimsArray.value(at: i)))
+            dimsArray.append(dim)
+        }
+        dims = dimsArray
+        
+        dataType = VarTypeType.init(rawValue: Int(protoTensorDesc.dataType.rawValue)) ?? .ErrorType
     }
     
 }
diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/framework.pb.swift b/metal/paddle-mobile/paddle-mobile/Src/Program/framework.pb.swift
index df4af3bcc91853e507321d46d3edfd04045f29ab..4a320a9eff27bf583ef1eccee3344f5571e0bbac 100644
--- a/metal/paddle-mobile/paddle-mobile/Src/Program/framework.pb.swift
+++ b/metal/paddle-mobile/paddle-mobile/Src/Program/framework.pb.swift
@@ -21,7 +21,6 @@
 //limitations under the License. 
 
 import Foundation
-import SwiftProtobuf
 
 // If the compiler emits an error on this type, it is because this file
 // was generated by a version of the `protoc` Swift plug-in that is
diff --git a/metal/paddle-mobile/paddle-mobile/paddle_mobile.h b/metal/paddle-mobile/paddle-mobile/paddle_mobile.h
index 07a6478eb8206264cf739c80e9287a679585ae50..32b69c0b1438d0e18bae5e7c24a2731a473e1a67 100644
--- a/metal/paddle-mobile/paddle-mobile/paddle_mobile.h
+++ b/metal/paddle-mobile/paddle-mobile/paddle_mobile.h
@@ -15,6 +15,7 @@
 #pragma once
 
 #import <UIKit/UIKit.h>
+#import <paddle_mobile/Framework.pbobjc.h>
 
 //! Project version number for paddle_mobile.
 //FOUNDATION_EXPORT double paddle_mobileVersionNumber;
diff --git a/src/fpga/V1/api.cpp b/src/fpga/V1/api.cpp
index 137ac73512b9d88716ab585ba315f26aa3b14ea8..9a408a8f2fbe3c600679ddb2e3eadb493f323165 100644
--- a/src/fpga/V1/api.cpp
+++ b/src/fpga/V1/api.cpp
@@ -151,6 +151,30 @@ void format_dwconv_filter(framework::Tensor *filter_tensor, float *scale_ptr) {
   filter_tensor->reset_data_ptr(new_data);
 }
 
+void format_DWDconv_filter(framework::Tensor *filter_tensor, float *scale_ptr,
+                           int stride) {
+  auto dims = filter_tensor->dims();
+  auto num = dims[0], height = dims[2], width = dims[3];
+  auto data_ptr = filter_tensor->data<float>();
+  size_t memory_size = num * height * width * sizeof(float);
+  auto new_data = (float *)fpga_malloc(memory_size);  // NOLINT
+  fpga_copy(new_data, data_ptr, memory_size);
+
+  int hw = height * width;
+  deconv_filter::deconv_NC_convert(&new_data, num, 1, hw);
+
+  num = dims[1];
+  int channel = dims[0];
+
+  deconv_filter::DWDconv_format_filter(&new_data, num, channel, height, width,
+                                       scale_ptr, stride);
+
+  //  framework::DDim dims_new =
+  //      framework::make_ddim({num, 1, height, width});
+  //  filter_tensor->Resize(dims_new);
+  filter_tensor->reset_data_ptr(new_data);
+}
+
 void format_fc_filter(framework::Tensor *filter_tensor, float max_value) {
   filter_tensor->scale[0] = float(max_value / 127.0);  // NOLINT
   filter_tensor->scale[1] = float(127.0 / max_value);  // NOLINT
@@ -243,6 +267,17 @@ void format_dwconv_data(framework::Tensor *filter_tensor,
   format_bias_array(bias_ptr, channel);
   format_fp16_ofm(ofm_tensor);
 }
+void format_DWDeconv_data(framework::Tensor *filter_tensor,
+                          framework::Tensor *ofm_tensor, float **bs_ptr,
+                          int group, int sub_conv_n) {
+  int channel = ofm_tensor->dims()[1];
+  // dw-deconv
+  format_DWDconv_filter(
+      filter_tensor,
+      (reinterpret_cast<float *>(*bs_ptr) + sub_conv_n * channel), sub_conv_n);
+  format_bias_array(bs_ptr, channel);
+  format_fp16_ofm(ofm_tensor);
+}
 void expand_conv_arg(ConvArgs *arg) {
   ConvArgs args = *arg;
 
@@ -311,9 +346,9 @@ void expand_conv_arg(ConvArgs *arg) {
   auto filter_pad_width_mul_channel =
       args.image.pad_width * args.image.channels;
   auto image_amount_per_row_multi_win_first =
-      image_amount_per_row * (4 * args.kernel.stride_h - args.image.pad_height);
+      image_amount_per_row * (2 * args.kernel.stride_h - args.image.pad_height);
   auto image_amount_per_row_multi_win =
-      image_amount_per_row * (4 * args.kernel.stride_h);
+      image_amount_per_row * (2 * args.kernel.stride_h);
 
   auto image_block_num = block_num;
   auto image_block_len =
@@ -340,7 +375,8 @@ void expand_conv_arg(ConvArgs *arg) {
       (512 / (align_to_x(args.filter_num, 4) / 4 * 2) > 2)
           ? (512 / (align_to_x(args.filter_num, 4) / 4 * 2) - 2)
           : 0;
-  auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS;
+  // auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS;
+  auto cmd = 0UL | USE_BIAS;
 
   auto deconv_param = ((args.deconv_tx_param.deconv_en) << 24) |
                       ((args.deconv_tx_param.sub_conv_num) << 16) |
@@ -378,7 +414,8 @@ void expand_conv_arg(ConvArgs *arg) {
 
 void expand_EW_arg(EWAddArgs *arg) {
   EWAddArgs args = *arg;
-  uint64_t cmd = args.relu_enabled ? USE_RELU : 0;
+  // uint64_t cmd = args.relu_enabled ? USE_RELU : 0;
+  uint64_t cmd = 0;
   uint64_t datalen = (uint64_t)args.image0.width *
                      (uint64_t)args.image0.height *
                      (uint64_t)args.image0.channels;
@@ -406,8 +443,10 @@ void expand_EW_arg(EWAddArgs *arg) {
 
 void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
                     framework::Tensor *out, framework::Tensor *filter,
-                    bool relu_enabled, int group_num, int stride_h,
-                    int stride_w, int padding_h, int padding_w, float *bs_ptr) {
+                    ActivationType activation_enable,
+                    int16_t leaky_relu_negative_slope, int group_num,
+                    int stride_h, int stride_w, int padding_h, int padding_w,
+                    float *bs_ptr) {
   auto input_ptr = input->data<float>();
   auto filter_ptr = filter->data<float>();
   auto out_ptr = out->data<float>();
@@ -453,7 +492,10 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
             filter->dims()[3]));
 
   for (int i = 0; i < n; i++) {
-    arg->conv_arg[i].relu_enabled = relu_enabled;
+    // arg->conv_arg[i].relu_enabled = relu_enabled;
+    arg->conv_arg[i].output.activation.activation_type = activation_enable;
+    arg->conv_arg[i].output.activation.leaky_relu_negative_slope =
+        leaky_relu_negative_slope;
     arg->conv_arg[i].group_num = (uint32_t)group_num;
     arg->conv_arg[i].kernel.stride_h = (uint32_t)stride_h;
     arg->conv_arg[i].kernel.stride_w = (uint32_t)stride_w;
@@ -525,8 +567,9 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
 
 void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
                      framework::Tensor *out, framework::Tensor *filter,
-                     bool relu_enabled, int group_num, int stride_h,
-                     int stride_w, int padding_h, int padding_w,
+                     ActivationType activation_enable,
+                     int16_t leaky_relu_negative_slope, int group_num,
+                     int stride_h, int stride_w, int padding_h, int padding_w,
                      float *bs_ptr) {
   auto input_ptr = input->data<float>();
   auto filter_ptr = filter->data<float>();
@@ -652,7 +695,13 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
     }
 
     for (int j = 0; j < split_num; ++j) {
-      arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled;
+      // arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled;
+      arg->split_conv_args[i]->conv_arg[j].output.activation.activation_type =
+          activation_enable;
+      arg->split_conv_args[i]
+          ->conv_arg[j]
+          .output.activation.leaky_relu_negative_slope =
+          leaky_relu_negative_slope;
       arg->split_conv_args[i]->conv_arg[j].group_num = (uint32_t)group_num;
 
       arg->split_conv_args[i]->conv_arg[j].kernel.width =
@@ -765,12 +814,17 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
 
 void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
                      framework::Tensor *out, framework::Tensor *filter,
-                     bool relu_enabled, int stride_h, int stride_w,
-                     int padding_h, int padding_w, float *bias_ptr) {
+                     ActivationType activation_enable,
+                     int16_t leaky_relu_negative_slope, int stride_h,
+                     int stride_w, int padding_h, int padding_w,
+                     float *bias_ptr) {
   auto filter_ptr = filter->data<float>();
   auto input_ptr = input->data<float>();
   auto output_ptr = out->mutable_data<float>();
-  arg->relu_enabled = relu_enabled;
+  arg->sub_conv_num = 1;
+  // arg->relu_enabled = relu_enabled;
+  arg->output.activation.activation_type = activation_enable;
+  arg->output.activation.leaky_relu_negative_slope = leaky_relu_negative_slope;
   arg->bias_address = bias_ptr;
   arg->filter_address = filter_ptr;
   arg->kernel.height = (uint32_t)filter->dims()[2];
@@ -788,5 +842,114 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
   arg->output.scale_address = out->scale;
 }  // end dwconv arg fill
 
+void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input,
+                       framework::Tensor *out, framework::Tensor *filter,
+                       ActivationType activation_enable,
+                       int16_t leaky_relu_negative_slope, int stride_h,
+                       int stride_w, int padding_h, int padding_w,
+                       float *bias_ptr) {
+  auto filter_ptr = filter->data<float>();
+  auto input_ptr = input->data<float>();
+  auto output_ptr = out->mutable_data<float>();
+
+  auto deleter = [](void *p) { fpga_free(p); };
+
+  arg->group_num = (uint32_t)filter->dims()[0];
+  arg->sub_conv_num = (uint32_t)stride_w;
+  arg->filter_num = (uint32_t)filter->dims()[0];
+
+  int sub_conv_num = stride_w;
+
+  int sub_pad =
+      deconv_filter::deconv_calc_sub_pad((int)filter->dims()[3],  // NOLINT
+                                         padding_w, stride_w);
+  auto sub_filter_width = (uint32_t)deconv_filter::deconv_get_sub_filter_axis(
+      (int)filter->dims()[3], stride_w);  // NOLINT
+
+  auto sub_output_width = (uint32_t)deconv_filter::deconv_get_sub_out_axis(
+      (int)input->dims()[3], sub_pad, sub_filter_width);  // NOLINT
+  auto sub_output_height = (uint32_t)deconv_filter::deconv_get_sub_out_axis(
+      (int)input->dims()[2], sub_pad, sub_filter_width);  // NOLINT
+
+  arg->sub_output_width = (uint32_t)sub_output_width;
+  arg->sub_output_height = (uint32_t)sub_output_height;
+  arg->omit_size = (uint32_t)deconv_filter::deconv_get_omit(
+      stride_w, (int)filter->dims()[3], padding_w);  // NOLINT
+
+  auto sub_channels = (int)input->dims()[1];  // NOLINT
+  uint32_t omit_size = arg->omit_size;
+  int real_out_width = sub_output_width * sub_conv_num - 2 * omit_size;
+  int real_out_height = sub_output_height * sub_conv_num - 2 * omit_size;
+  int sub_filter_num = sub_conv_num * (arg->filter_num);
+
+  framework::DDim dims_out_new = framework::make_ddim(
+      {1, arg->filter_num, real_out_height, real_out_width});
+  fpga::format_fp16_ofm(out, dims_out_new);
+  auto out_ptr = out->data<float>();
+
+  /*====For Addition
+  arg->output.address =
+      (half *)out_ptr +  // NOLINT
+      omit_size * sizeof(half) *
+          (align_to_x(real_out_width * arg->filter_num, IMAGE_ALIGNMENT));
+          */
+  arg->output.address = out_ptr;
+  arg->output.scale_address = out->scale;
+
+  int filter_offset = sub_filter_width * sub_filter_width *
+                      align_to_x(sub_channels, FILTER_ELEMENT_ALIGNMENT) *
+                      arg->sub_conv_num;
+
+  for (int i = 0; i < sub_conv_num; ++i) {
+    arg->dw_conv_args.push_back(std::make_shared<DWconvArgs>());
+
+    arg->dw_conv_args[i]->sub_conv_num = sub_conv_num;
+    // arg->dw_conv_args[i]->relu_enabled = relu_enabled;
+    arg->dw_conv_args[i]->output.activation.activation_type = activation_enable;
+    arg->dw_conv_args[i]->output.activation.leaky_relu_negative_slope =
+        leaky_relu_negative_slope;
+    arg->dw_conv_args[i]->bias_address = bias_ptr;
+
+    arg->dw_conv_args[i]->filter_address =
+        fpga_malloc(filter_offset * sizeof(int16_t));
+    memcpy(arg->dw_conv_args[i]->filter_address,
+           (reinterpret_cast<half *>(filter_ptr) + i * filter_offset),
+           filter_offset * sizeof(int16_t));
+    arg->vector_dw_conv_space.push_back(std::shared_ptr<char>(
+        reinterpret_cast<char *>(arg->dw_conv_args[i]->filter_address),
+        deleter));
+
+    arg->dw_conv_args[i]->kernel.height = (uint32_t)sub_filter_width;
+    arg->dw_conv_args[i]->kernel.width = (uint32_t)sub_filter_width;
+
+    arg->dw_conv_args[i]->kernel.stride_h = (uint32_t)1;
+    arg->dw_conv_args[i]->kernel.stride_w = (uint32_t)1;
+    arg->dw_conv_args[i]->image.address = input_ptr;
+    arg->dw_conv_args[i]->image.channels = (uint32_t)input->dims()[1];
+    arg->dw_conv_args[i]->image.height = (uint32_t)input->dims()[2];
+    arg->dw_conv_args[i]->image.width = (uint32_t)input->dims()[3];
+
+    arg->dw_conv_args[i]->image.pad_height = sub_pad;
+    arg->dw_conv_args[i]->image.pad_width = sub_pad;
+    arg->dw_conv_args[i]->image.scale_address = input->scale;
+
+    arg->dw_conv_args[i]->output.address =
+        fpga_malloc(sub_output_height *
+                    align_to_x(sub_output_width * sub_channels * sub_conv_num,
+                               IMAGE_ALIGNMENT) *
+                    sizeof(int16_t));
+    arg->dw_conv_args[i]->output.scale_address =
+        static_cast<float *>(fpga_malloc(2 * sizeof(float)));
+    arg->vector_dw_conv_space.push_back(std::shared_ptr<char>(
+        reinterpret_cast<char *>(arg->dw_conv_args[i]->output.address),
+        deleter));
+    arg->vector_dw_conv_space.push_back(std::shared_ptr<char>(
+        reinterpret_cast<char *>(arg->dw_conv_args[i]->output.scale_address),
+        deleter));
+  }
+
+  // arg->output.scale_address = out->scale;
+}  // end dwconv arg fill
+
 }  // namespace fpga
 }  // namespace paddle_mobile
diff --git a/src/fpga/V1/api.h b/src/fpga/V1/api.h
index b5c586e92aca2cc8a540ba54479ae7941f42e02c..05a30ddce4828bf8ac0f049ea0db4f18dc1dba79 100644
--- a/src/fpga/V1/api.h
+++ b/src/fpga/V1/api.h
@@ -47,16 +47,28 @@ void format_concat_output(framework::Tensor* out, int height, int width,
 
 void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input,
                     framework::Tensor* out, framework::Tensor* filter,
-                    bool relu_enabled, int group_num, int stride_h,
-                    int stride_w, int padding_h, int padding_w, float* bs_ptr);
+                    ActivationType activation_enable,
+                    int16_t leaky_relu_negative_slope, int group_num,
+                    int stride_h, int stride_w, int padding_h, int padding_w,
+                    float* bs_ptr);
 void fill_deconv_arg(struct DeconvArgs* arg, framework::Tensor* input,
                      framework::Tensor* out, framework::Tensor* filter,
-                     bool relu_enabled, int group_num, int stride_h,
-                     int stride_w, int padding_h, int padding_w, float* bs_ptr);
+                     ActivationType activation_enable,
+                     int16_t leaky_relu_negative_slope, int group_num,
+                     int stride_h, int stride_w, int padding_h, int padding_w,
+                     float* bs_ptr);
 void fill_dwconv_arg(struct DWconvArgs* arg, framework::Tensor* input,
                      framework::Tensor* out, framework::Tensor* filter,
-                     bool relu_enabled, int stride_h, int stride_w,
-                     int padding_h, int padding_w, float* bias_ptr);
+                     ActivationType activation_enable,
+                     int16_t leaky_relu_negative_slope, int stride_h,
+                     int stride_w, int padding_h, int padding_w,
+                     float* bias_ptr);
+void fill_DWDeconv_arg(struct DWDeconvArgs* arg, framework::Tensor* input,
+                       framework::Tensor* out, framework::Tensor* filter,
+                       ActivationType activation_enable,
+                       int16_t leaky_relu_negative_slope, int stride_h,
+                       int stride_w, int padding_h, int padding_w,
+                       float* bs_ptr);
 
 void format_deconv_filter(framework::Tensor* filter_tensor, float max_value,
                           int group_num, int stride);
@@ -69,6 +81,10 @@ void format_deconv_data(framework::Tensor* filter_tensor,
 void format_dwconv_data(framework::Tensor* filter_tensor,
                         framework::Tensor* ofm_tensor, float* scale_ptr,
                         float** bias_ptr);
+void format_DWDeconv_data(framework::Tensor* filter_tensor,
+                          framework::Tensor* ofm_tensor, float** bs_ptr,
+                          int group, int sub_conv_n);
+
 template <typename Dtype>
 void savefile(std::string filename, void* buffer, int dataSize, Dtype tmp) {
   float data;
diff --git a/src/fpga/V1/deconv_filter.cpp b/src/fpga/V1/deconv_filter.cpp
index 8fb3cd69fdfb10effb5769b656e19858e481f5f4..7c87452f5a7264ad069d8508cb1e9dc24f5cdc3d 100644
--- a/src/fpga/V1/deconv_filter.cpp
+++ b/src/fpga/V1/deconv_filter.cpp
@@ -19,16 +19,6 @@ limitations under the License. */
 #include "fpga/V1/filter.h"
 // #include "filter.h"
 #include "fpga/V1/api.h"
-// #include "fpga_api.h"
-
-// just for test
-//#include <string>
-//#include "deconv.h"
-//#include "deconv_api.h"
-// using namespace std;
-// using namespace paddle_mobile::fpga;
-// using namespace baidu::fpga::deconv::api;
-// namespace api = baidu::fpga::deconv::api;
 
 namespace paddle_mobile {
 namespace fpga {
@@ -42,7 +32,8 @@ void deconv_inverse_filter(float** data_in, int num, int channel, int width,
   float* tmp = *data_in;
   int data_size = num * channel * width * height;
   int hw_len = height * width;
-  auto tmp_data = (float*)fpga_malloc(data_size * sizeof(float));
+  auto tmp_data =
+      reinterpret_cast<float*>(fpga_malloc(data_size * sizeof(float)));
   for (int i = 0; i < num; ++i) {
     for (int j = 0; j < channel; ++j) {
       for (int k = 0; k < hw_len; ++k) {
@@ -97,9 +88,10 @@ int deconv_get_omit(int stride, int filter_width, int pad) {
   return (stride - idx);
 }
 
-void deconv_get_sub_filter(char** data_in, int height, int width,
-                           int sub_conv_n, int kernel_num, int channel) {
-  char* ptr_tmp = *data_in;
+template <typename T>
+void deconv_get_sub_filter(T** data_in, int height, int width, int sub_conv_n,
+                           int kernel_num, int channel) {
+  T* ptr_tmp = *data_in;
   int sub_num = kernel_num * sub_conv_n;
   int sub_h = height / sub_conv_n;
   int sub_w = width / sub_conv_n;
@@ -107,7 +99,8 @@ void deconv_get_sub_filter(char** data_in, int height, int width,
   int sub_filter_size =
       kernel_num * sub_h * sub_w * channel * sub_conv_n * sub_conv_n;
 
-  char* ptr_sub_filter = (char*)fpga_malloc(sub_filter_size * sizeof(char));
+  T* ptr_sub_filter =
+      reinterpret_cast<T*>(fpga_malloc(sub_filter_size * sizeof(T)));
   for (int idx = 0; idx < sub_conv_n; ++idx) {
     for (int nn = 0; nn < sub_num; ++nn) {
       int ni = nn % kernel_num;
@@ -124,7 +117,7 @@ void deconv_get_sub_filter(char** data_in, int height, int width,
 
           fpga_copy(
               ptr_sub_filter + idx * sub_h * sub_w * channel * sub_num + sidx,
-              (*data_in) + kidx, channel * sizeof(char));
+              (*data_in) + kidx, channel * sizeof(T));
           // for (int cc =0; cc < channel; ++cc) {
           //     ptr_sub_filter[idx*sub_h*sub_w*channel*sub_num + sidx + cc] =
           //     (*data_in)[kidx + cc];
@@ -140,7 +133,7 @@ void deconv_get_sub_filter(char** data_in, int height, int width,
 void deconv_NC_convert(float** filter_in, int kernel_num, int channels,
                        int hw) {
   float* tmp = *filter_in;
-  float* ptr_filter = (float*)(paddle_mobile::fpga::fpga_malloc(
+  float* ptr_filter = reinterpret_cast<float*>(paddle_mobile::fpga::fpga_malloc(
       hw * kernel_num * channels * sizeof(float)));
 
   for (int c = 0; c < channels; ++c) {
@@ -188,7 +181,8 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
   result2);
   }*/
 
-  deconv_get_sub_filter(quantize_data, height, width, stride, num, channel);
+  deconv_get_sub_filter<char>(quantize_data, height, width, stride, num,
+                              channel);
   /*{
      char result2 = (char)0;
      string filename = "sub_filter_filter_data";
@@ -212,10 +206,12 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
                                 ((residual == 0) ? div_num : (div_num - 1)) +
                             align_to_x(residual, FILTER_NUM_ALIGNMENT);
 
-  char** ptr_ptr_data = (char**)fpga_malloc(sub_conv_n * sizeof(char*));
+  char** ptr_ptr_data =
+      reinterpret_cast<char**>(fpga_malloc(sub_conv_n * sizeof(char*)));
   int origin_offset = sub_chw * sub_num;
   for (int i = 0; i < sub_conv_n; ++i) {
-    (ptr_ptr_data)[i] = (char*)fpga_malloc(origin_offset * sizeof(char));
+    (ptr_ptr_data)[i] =
+        reinterpret_cast<char*>(fpga_malloc(origin_offset * sizeof(char)));
     fpga_copy((ptr_ptr_data)[i], (*quantize_data) + origin_offset * i,
               origin_offset * sizeof(char));
 
@@ -233,8 +229,8 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
 
   int align_offset =
       align_to_x(sub_chw, FILTER_ELEMENT_ALIGNMENT) * num_after_alignment;
-  char* ptr_space = (char*)fpga_malloc(sub_conv_n * align_offset *
-                                       sizeof(char));  // continuous space
+  char* ptr_space = reinterpret_cast<char*>(fpga_malloc(
+      sub_conv_n * align_offset * sizeof(char)));  // continuous space
   for (int i = 0; i < sub_conv_n; ++i) {
     char* ptr_tmp = (ptr_ptr_data)[i];
 
@@ -251,7 +247,7 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
     fpga_copy(ptr_space + i * align_offset, ptr_tmp, align_offset);
     fpga_free(ptr_tmp);
   }
-  *data_in = (float*)ptr_space;
+  *data_in = reinterpret_cast<float*>(ptr_space);
 
   /*    {
         char result2 = (char)0;
@@ -262,6 +258,22 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
   fpga_flush(ptr_space, sub_conv_n * align_offset * sizeof(char));
 }
 
+void DWDconv_format_filter(float** data_in, int num, int channel, int height,
+                           int width, float* scale_ptr, int stride) {
+  deconv_inverse_filter(data_in, num, channel, width, height);
+
+  filter::quantize_to_fp16(data_in, channel, height, width, scale_ptr);
+  int16_t** quantize_data = (int16_t**)data_in;  // NOLINT
+  filter::convert_to_hwn(quantize_data, channel, height, width);
+
+  deconv_get_sub_filter<int16_t>(quantize_data, height, width, stride, num,
+                                 channel);
+
+  filter::align_element_n(quantize_data, channel, height, width);
+  fpga_flush(*quantize_data, align_to_x(channel, FILTER_ELEMENT_ALIGNMENT) *
+                                 height * width * sizeof(int16_t));
+}
+
 }  // namespace deconv_filter
 }  // namespace fpga
 }  // namespace paddle_mobile
diff --git a/src/fpga/V1/deconv_filter.h b/src/fpga/V1/deconv_filter.h
index 5fa9781933712a8506c052258dbf2f7f7e05fe37..f1a50b95c52dadc49f4dd333791a22f63bf6d0a3 100644
--- a/src/fpga/V1/deconv_filter.h
+++ b/src/fpga/V1/deconv_filter.h
@@ -24,11 +24,15 @@ int deconv_calc_sub_pad(int filter_axis, int pad, int stride);
 int deconv_get_sub_filter_axis(int filter_axis, int stride);
 int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis);
 int deconv_get_omit(int stride, int filter_width, int pad);
-void deconv_get_sub_filter(char** data_in, int height, int width,
-                           int sub_conv_n, int kernel_num, int channel);
+
+template <typename T>
+void deconv_get_sub_filter(T** data_in, int height, int width, int sub_conv_n,
+                           int kernel_num, int channel);
 void deconv_format_filter(float** data_in, int num, int channel, int height,
                           int width, int group_num, float max, int stride);
 void deconv_NC_convert(float** filter_in, int kernel_num, int channels, int hw);
+void DWDconv_format_filter(float** data_in, int num, int channel, int height,
+                           int width, float* scale_ptr, int stride);
 
 }  // namespace deconv_filter
 }  // namespace fpga
diff --git a/src/fpga/V1/filter.cpp b/src/fpga/V1/filter.cpp
old mode 100755
new mode 100644
index 197448d515d67459b280bf33a14b8f8419970fc2..50341b75e129479e7f8d8ab4d9c200df574996cb
--- a/src/fpga/V1/filter.cpp
+++ b/src/fpga/V1/filter.cpp
@@ -346,6 +346,16 @@ void format_dwconv_filter(float **data_in, int num, int height, int width,
   fpga_flush(*quantize_data, align_to_x(num, FILTER_ELEMENT_ALIGNMENT) *
                                  height * width * sizeof(int16_t));
 }
+
+void format_DWDeconv_filter(float **data_in, int num, int height, int width,
+                            float *scale_ptr) {
+  quantize_to_fp16(data_in, num, height, width, scale_ptr);
+  int16_t **quantize_data = (int16_t **)data_in;  // NOLINT
+  convert_to_hwn(quantize_data, num, height, width);
+  align_element_n(quantize_data, num, height, width);
+  fpga_flush(*quantize_data, align_to_x(num, FILTER_ELEMENT_ALIGNMENT) *
+                                 height * width * sizeof(int16_t));
+}
 }  // namespace filter
 }  // namespace fpga
 }  // namespace paddle_mobile
diff --git a/src/fpga/V1/pe.cpp b/src/fpga/V1/pe.cpp
index aeb5cdd65385b87a5da1e15e98b9914ca6be189c..5a81e2422979f08b2113bd9b46022fe4d77154cb 100644
--- a/src/fpga/V1/pe.cpp
+++ b/src/fpga/V1/pe.cpp
@@ -18,7 +18,6 @@ limitations under the License. */
 #include "fpga/V1/image.h"
 #include "fpga/common/config.h"
 #include "fpga/common/driver.h"
-
 #ifdef COST_TIME_PRINT
 #include <sys/time.h>
 #include <time.h>
@@ -64,6 +63,7 @@ using namespace std;     // NOLINT
 #define REG_TIMER_COUNTER 0x070
 
 #define REG_SCALE_PARAMETER 0x080
+#define REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR 0x090
 
 #define REG_FLASH_CMD 0x200
 #define REG_FLASH_DATA 0x208
@@ -163,6 +163,7 @@ using namespace std;     // NOLINT
 #define REG_DWCONV_FILTER_BASE_ADDR 0xe08
 #define REG_DWCONV_FILTER_SHAPE 0xe10
 #define REG_DWCONV_FILTER_N_ALIGN 0xe18
+#define REG_DWCONV_FILTER_SUBNUMBER 0xe20
 #define REG_DWCONV_CMD 0xe00
 
 int ComputeFpgaConv(const struct SplitConvArgs &args) {
@@ -189,8 +190,8 @@ int ComputeFpgaConv(const struct SplitConvArgs &args) {
 int ComputeBasicConv(const struct ConvArgs &args) {
 #ifdef FPGA_PRINT_MODE
   DLOG << "======Compute Basic Conv======";
-  DLOG << "   relu_enabled:" << args.relu_enabled
-       << "   sb_address:" << args.sb_address
+  // DLOG << "   relu_enabled:" << args.relu_enabled
+  DLOG << "   sb_address:" << args.sb_address
        << "   filter_address:" << args.filter_address
        << "   filter_num:" << args.filter_num
        << "   group_num:" << args.group_num;
@@ -212,6 +213,25 @@ int ComputeBasicConv(const struct ConvArgs &args) {
 #ifdef PADDLE_MOBILE_ZU5
   int ret = 0;
   uint64_t output_scale = 0;
+
+  uint64_t reg_ActivationArgs = 0;
+  // active function:{none,leakeyrelu,sigmoid,tanh}
+  ActivationArgs active_args;
+  // active_args.activation_type = LEAKYRELU;
+
+  active_args.activation_type = args.output.activation.activation_type;
+
+  active_args.leaky_relu_negative_slope =
+      args.output.activation.leaky_relu_negative_slope;
+
+  reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
+                       active_args.leaky_relu_negative_slope;
+
+  DLOG << "   activation_type:" << active_args.activation_type
+       << "   leaky_relu_negative_slope:"
+       << active_args.leaky_relu_negative_slope;
+  DLOG << "   reg_ActivationArgs:" << reg_ActivationArgs;
+
   pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
   if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status) {
     ret = -EIO;
@@ -219,6 +239,10 @@ int ComputeBasicConv(const struct ConvArgs &args) {
     pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
     return ret;
   }
+
+  reg_writeq(reg_ActivationArgs,
+             REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);  // active functoion
+
   reg_writeq(output_scale, REG_SCALE_PARAMETER);
   reg_writeq(
       ((uint64_t)args.image.height) | (((uint64_t)args.image.width) << 32),
@@ -278,6 +302,9 @@ int ComputeBasicConv(const struct ConvArgs &args) {
   output_scale = (output_scale << 32) | (output_scale >> 32);
   fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
 
+  active_args.activation_type = NONE;
+  reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
+
   pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
 
   return ret;
@@ -314,6 +341,23 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
   uint64_t image_physical_address = 0;
   uint64_t output_physical_address = 0;
 
+  uint64_t reg_ActivationArgs = 0;
+  // active function:{none,leakeyrelu,sigmoid,tanh}
+  ActivationArgs active_args;
+  // active_args.activation_type = LEAKYRELU;
+  active_args.activation_type = args.output.activation.activation_type;
+
+  active_args.leaky_relu_negative_slope =
+      args.output.activation.leaky_relu_negative_slope;
+
+  reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
+                       active_args.leaky_relu_negative_slope;
+
+  DLOG << "   activation_type:" << active_args.activation_type
+       << "   leaky_relu_negative_slope:"
+       << active_args.leaky_relu_negative_slope;
+  DLOG << "   reg_ActivationArgs:" << reg_ActivationArgs;
+
   image_physical_address = vaddr_to_paddr_driver(args.image.address);
   output_physical_address = vaddr_to_paddr_driver(args.output.address);
   uint32_t output_height = (uint32_t)(
@@ -364,6 +408,9 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
     return ret;
   }
 
+  reg_writeq(reg_ActivationArgs,
+             REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);  // active functoion
+
   reg_writeq(output_scale, REG_SCALE_PARAMETER);
   reg_writeq(image_physical_address, REG_POOLING_IMAGE_BASE_ADDR);
   reg_writeq(output_physical_address, REG_POOLING_RESULT_BASE_ADDR);
@@ -408,6 +455,10 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
   output_scale = reg_readq(REG_SCALE_PARAMETER);
   output_scale = (output_scale << 32) | (output_scale >> 32);
   fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
+
+  active_args.activation_type = NONE;
+  reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
+
   pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
 
   return ret;
@@ -418,8 +469,8 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
 int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
 #ifdef FPGA_PRINT_MODE
   DLOG << "=============ComputeFpgaEWAdd===========";
-  DLOG << "   relu_enabled:" << args.relu_enabled
-       << "   const0:" << fp16_2_fp32(int16_t(args.const0))
+  // DLOG << "   relu_enabled:" << args.relu_enabled
+  DLOG << "   const0:" << fp16_2_fp32(int16_t(args.const0))
        << "   const1:" << fp16_2_fp32(int16_t(args.const1));
   DLOG << "   image0_address:" << args.image0.address
        << "   image0_scale_address:" << args.image0.scale_address
@@ -441,6 +492,19 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
 #ifdef PADDLE_MOBILE_ZU5
   int ret = 0;
   uint64_t output_scale = 0;
+
+  uint64_t reg_ActivationArgs = 0;
+  ActivationArgs active_args;
+  active_args.activation_type = args.output.activation.activation_type;
+  active_args.leaky_relu_negative_slope =
+      args.output.activation.leaky_relu_negative_slope;
+  reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
+                       active_args.leaky_relu_negative_slope;
+  DLOG << "    activation_type:" << active_args.activation_type
+       << "    leaky_relu_negative_slope:"
+       << active_args.leaky_relu_negative_slope;
+  DLOG << "    reg_ActivationArgs:" << reg_ActivationArgs;
+
   pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
   if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_EW]->status) {
     ret = -EIO;
@@ -449,6 +513,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
     return ret;
   }
 
+  reg_writeq(reg_ActivationArgs,
+             REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);  // active functoion
+
   reg_writeq(output_scale, REG_SCALE_PARAMETER);
   reg_writeq(args.driver.image0_address_phy, REG_EW_IMAGE0_BASE_ADDR);
   reg_writeq(args.driver.image1_address_phy, REG_EW_IMAGE1_BASE_ADDR);
@@ -468,6 +535,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
   output_scale = reg_readq(REG_SCALE_PARAMETER);
   output_scale = (output_scale << 32) | (output_scale >> 32);
   fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
+  active_args.activation_type = NONE;
+  reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
+
   pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
   return ret;
 #endif
@@ -501,6 +571,17 @@ int PerformBypass(const struct BypassArgs &args) {
   uint8_t data_cell_in = 0;
   uint8_t data_cell_out = 0;
   int ret = 0;
+
+  uint64_t reg_ActivationArgs = 0;
+  ActivationArgs active_args;
+  active_args.activation_type = args.output.activation.activation_type;
+
+  active_args.leaky_relu_negative_slope =
+      args.output.activation.leaky_relu_negative_slope;
+
+  reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
+                       active_args.leaky_relu_negative_slope;
+
   datalen = (uint64_t)args.image.width * (uint64_t)args.image.height *
             (uint64_t)args.image.channels;
   datalen = align_to_x(datalen, 16);
@@ -559,7 +640,6 @@ int PerformBypass(const struct BypassArgs &args) {
       (data_cell_out != SIZE_FP16 && data_cell_out != SIZE_FP32)) {
     return -EFAULT;
   }
-
   pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
   if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_BYPASS]->status) {
     ret = -EIO;
@@ -567,7 +647,8 @@ int PerformBypass(const struct BypassArgs &args) {
     pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
     return ret;
   }
-
+  reg_writeq(reg_ActivationArgs,
+             REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);  // active functoion
   reg_writeq(output_scale, REG_SCALE_PARAMETER);
   reg_writeq(input_address_phy, REG_CONVERT_SRC_ADDR);
   reg_writeq(output_address_phy, REG_CONVERT_DST_ADDR);
@@ -585,12 +666,27 @@ int PerformBypass(const struct BypassArgs &args) {
   output_scale = reg_readq(REG_SCALE_PARAMETER);
   output_scale = (output_scale << 32) | (output_scale >> 32);
   fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
+  reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
   pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
   return ret;
 #endif
   return 0;
 }  // PerformBypass
 
+uint64_t FPGAVersion() {
+#ifdef FPGA_PRINT_MODE
+  DLOG << "=============ComputeFpgaBypass===========";
+#endif
+#ifdef PADDLE_MOBILE_ZU5
+  uint64_t fpga_ver = 0;
+  pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
+  fpga_ver = reg_readq(REG_HARDWARE_STATUS);
+  pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
+  return fpga_ver;
+#endif
+  return 0;
+}  // FPGAVersion
+
 int ComputeFPGAConcat(const struct ConcatArgs &args) {
 #ifdef FPGA_PRINT_MODE
   DLOG << "=============ComputeFpgaConcat===========";
@@ -655,6 +751,45 @@ void deconv_post_process(const struct DeconvArgs &args) {
   fpga_flush(args.output.address,
              num * align_deconv_row_len * deconv_h * sizeof(int16_t));
 }
+void DWDeconv_post_process(const struct DWDeconvArgs &args) {
+  int sub_conv_n = args.sub_conv_num;
+  int sub_height = args.sub_output_height;
+  int sub_width = args.sub_output_width;
+  int omit_size = args.omit_size;
+  int channel = args.filter_num;
+  int num = 1;
+  int origin_h = sub_height * sub_conv_n;
+  int origin_w = sub_width * sub_conv_n;
+  int align_origin_w = align_to_x(origin_w * channel, IMAGE_ALIGNMENT);
+  int deconv_h = origin_h - 2 * omit_size;
+  int deconv_w = origin_w - 2 * omit_size;
+  int deconv_row_len = deconv_w * channel;
+  int align_deconv_row_len = align_to_x(deconv_row_len, IMAGE_ALIGNMENT);
+
+  for (int idx = 0; idx < sub_conv_n; ++idx) {
+    paddle_mobile::fpga::fpga_invalidate(
+        args.dw_conv_args[idx]->output.address,
+        align_origin_w * origin_h * sizeof(int16_t));
+  }
+
+  int deconv_idx = 0;
+  for (int nn = 0; nn < num; ++nn) {
+    for (int hh = 0; hh < origin_h; ++hh) {
+      int hx = (hh % sub_conv_n);
+      auto sub_t = (int16_t *)(args.dw_conv_args[sub_conv_n - hx - 1]  // NOLINT
+                                   ->output.address);
+      int hi = (hh / sub_conv_n);
+      if ((hh < omit_size) || (hh >= (origin_h - omit_size))) continue;
+      int sidx = (nn * origin_h * align_origin_w + hi * align_origin_w +
+                  omit_size * channel);
+      fpga_copy((int16_t *)(args.output.address) + deconv_idx,    // NOLINT
+                sub_t + sidx, sizeof(int16_t) * deconv_row_len);  // NOLINT
+      deconv_idx += align_deconv_row_len;
+    }
+  }
+  fpga_flush(args.output.address,
+             num * align_deconv_row_len * deconv_h * sizeof(int16_t));
+}
 
 int ComputeFpgaDeconv(const struct DeconvArgs &args) {
 #ifdef FPGA_PRINT_MODE
@@ -755,7 +890,7 @@ int ComputeFPGASplit(const struct SplitArgs &args) {
 int ComputeDWConv(const struct DWconvArgs &args) {
 #ifdef FPGA_PRINT_MODE
   DLOG << "=============ComputeDWConv===========";
-  DLOG << "   mode:" << args.relu_enabled;
+  // DLOG << "   mode:" << args.relu_enabled;
   DLOG << "   image_address:" << args.image.address
        << "   image_scale_address:" << args.image.scale_address
        << "   image_channels:" << args.image.channels
@@ -778,7 +913,8 @@ int ComputeDWConv(const struct DWconvArgs &args) {
   uint64_t output_scale = 0;
   uint64_t timer_cnt = 0;
   int ret = 0;
-  uint64_t cmd = args.relu_enabled;
+  // uint64_t cmd = args.relu_enabled;
+  uint64_t cmd = 0;
   uint64_t image_physical_address = 0;
   uint64_t output_physical_address = 0;
   uint64_t filter_physical_address = 0;
@@ -792,17 +928,21 @@ int ComputeDWConv(const struct DWconvArgs &args) {
       align_to_x((uint64_t)args.image.channels, IMAGE_ALIGNMENT);
   uint64_t filter_amount_per_row_align =
       filter_N_align * (uint64_t)args.kernel.width;
-  uint64_t filter_amount_align = filter_N_align * (uint64_t)args.kernel.width *
-                                 (uint64_t)args.kernel.height;
+  uint64_t sub_filter_amount_align = filter_N_align *
+                                     (uint64_t)args.kernel.width *
+                                     (uint64_t)args.kernel.height;
+  uint64_t filter_amount_align =
+      sub_filter_amount_align * (uint64_t)args.sub_conv_num;
 
   uint32_t output_height = (uint32_t)(
       (args.image.height + args.image.pad_height * 2 - args.kernel.height) /
           args.kernel.stride_h +
       1);
   uint32_t output_width = (uint32_t)(
-      (args.image.width + args.image.pad_width * 2 - args.kernel.width) /
-          args.kernel.stride_w +
-      1);
+      ((args.image.width + args.image.pad_width * 2 - args.kernel.width) /
+           args.kernel.stride_w +
+       1) *
+      args.sub_conv_num);
 
   uint64_t image_amount_per_row =
       align_to_x((uint64_t)args.image.width * (uint64_t)args.image.channels,
@@ -845,12 +985,15 @@ int ComputeDWConv(const struct DWconvArgs &args) {
 
   /*restart scale*/
   reg_writeq(output_scale, REG_SCALE_PARAMETER);
+
   reg_writeq(image_physical_address, REG_POOLING_IMAGE_BASE_ADDR);
   reg_writeq(output_physical_address, REG_POOLING_RESULT_BASE_ADDR);
   reg_writeq((bias_physical_address << 32 | filter_physical_address),
              REG_DWCONV_FILTER_BASE_ADDR);
   reg_writeq(filter_amount_per_row_align | (filter_amount_align << 32),
              REG_DWCONV_FILTER_SHAPE);
+  reg_writeq(sub_filter_amount_align | (((uint64_t)args.sub_conv_num) << 32),
+             REG_DWCONV_FILTER_SUBNUMBER);
   reg_writeq(filter_N_align, REG_DWCONV_FILTER_N_ALIGN);
 
   reg_writeq(
@@ -904,10 +1047,88 @@ int ComputeDWConv(const struct DWconvArgs &args) {
   output_scale = reg_readq(REG_SCALE_PARAMETER);
   output_scale = (output_scale << 32) | (output_scale >> 32);
   fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
+  DLOG << "output_scale:" << output_scale;
   pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
   return ret;
 #endif
   return 0;
 }
+int ComputeDWDeconv(const struct DWDeconvArgs &args) {
+#ifdef FPGA_PRINT_MODE
+  DLOG << "=============ComputeFPGADeConv===========";
+  DLOG << "   filter_num:" << args.filter_num
+       << "   group_num:" << args.group_num << "omit_size:" << args.omit_size
+       << "sub_output_width: " << args.sub_output_width
+       << "sub_output_height: " << args.sub_output_height
+       << "   sub_conv_num:" << args.sub_conv_num;
+  DLOG << "args.output.address: " << args.output.address
+       << "args.output.scale_address: " << args.output.scale_address;
+
+#endif
+
+  int sub_conv_num = args.sub_conv_num;
+
+#ifdef COST_TIME_PRINT
+  timeval start, end;
+  long dif_sec, dif_usec;  // NOLINT
+#endif
+
+  for (int i = 0; i < sub_conv_num; i++) {
+#ifdef COST_TIME_PRINT
+    gettimeofday(&start, NULL);
+#endif
+
+    ComputeDWConv(*args.dw_conv_args[i]);
+#ifdef COST_TIME_PRINT
+    gettimeofday(&end, NULL);
+    dif_sec = end.tv_sec - start.tv_sec;
+    dif_usec = end.tv_usec - start.tv_usec;
+    std::cout << "deconv basic_conv: " << i << " times:  "
+              << "    cost time: " << (dif_sec * 1000000 + dif_usec) << "us"
+              << std::endl;
+#endif
+  }
+
+  if (sub_conv_num > 1) {
+    float max_scale = -1.0f;
+#ifdef COST_TIME_PRINT
+    gettimeofday(&start, NULL);
+#endif
+    for (int i = 0; i < sub_conv_num; i++) {
+      paddle_mobile::fpga::fpga_invalidate(
+          args.dw_conv_args[i]->output.scale_address, 2 * sizeof(float));
+      float ptr_scale = (args.dw_conv_args[i]->output.scale_address)[0];
+      if (ptr_scale > max_scale) {
+        args.output.scale_address[0] = ptr_scale;
+        args.output.scale_address[1] =
+            (args.dw_conv_args[i]->output.scale_address)[1];
+      }
+    }
+
+#ifdef COST_TIME_PRINT
+    gettimeofday(&end, NULL);
+    dif_sec = end.tv_sec - start.tv_sec;
+    dif_usec = end.tv_usec - start.tv_usec;
+    std::cout << "deconv scale  "
+              << "    cost time: " << (dif_sec * 1000000 + dif_usec) << "us"
+              << std::endl;
+#endif
+  }
+
+#ifdef COST_TIME_PRINT
+  gettimeofday(&start, NULL);
+#endif
+  DWDeconv_post_process(args);
+#ifdef COST_TIME_PRINT
+  gettimeofday(&end, NULL);
+  dif_sec = end.tv_sec - start.tv_sec;
+  dif_usec = end.tv_usec - start.tv_usec;
+  std::cout << "deconv_post_process  "
+            << "    cost time: " << (dif_sec * 1000000 + dif_usec) << "us"
+            << std::endl;
+#endif
+  return 0;
+}  // ComputeFpgaDeconv
+
 }  // namespace fpga
 }  // namespace paddle_mobile
diff --git a/src/fpga/common/driver.cpp b/src/fpga/common/driver.cpp
index 18a310b09cad4a741eb83453a09f3c94d4f0db05..b1d3559dbbb238ae24cc6224e2d253dab744dce1 100644
--- a/src/fpga/common/driver.cpp
+++ b/src/fpga/common/driver.cpp
@@ -154,7 +154,6 @@ int memory_request(struct fpga_memory *memory, size_t size, uint64_t *addr) {
   unsigned int nr = (unsigned int)_nr;
   int ret = 0;
   uint64_t a_size = FPGA_PAGE_SIZE * nr;
-  DLOG << a_size;
 
   pthread_mutex_lock(&memory->mutex);
 
@@ -391,9 +390,6 @@ int fpga_invalidate_driver(void *address, size_t size) {
 
 void fpga_copy_driver(void *dest, const void *src, size_t num) {
   uint64_t i;
-
-  DLOG << "dest:" << dest << " src:" << src << " size:" << num;
-
   for (i = 0; i < num; i++) {
     *((int8_t *)dest + i) = *((int8_t *)src + i);  // NOLINT
   }
diff --git a/src/fpga/common/driver.h b/src/fpga/common/driver.h
index 4fa83b776e7b3df5df5e536de91093fd18ca67a1..d35627cd46b3f233255a98d1e1fbca27469f715c 100644
--- a/src/fpga/common/driver.h
+++ b/src/fpga/common/driver.h
@@ -29,7 +29,7 @@ namespace driver {
 
 #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
 
-#define FPGA_REG_PHY_ADDR 0xa0000000
+#define FPGA_REG_PHY_ADDR 0x80000000
 #define FPGA_REG_SIZE 0x1000
 #define FPGA_MEM_PHY_ADDR 0x40000000
 #define FPGA_MEM_SIZE 0x80000000
diff --git a/src/fpga/common/fpga_common.cpp b/src/fpga/common/fpga_common.cpp
index 0a1787aa3f211a247d95cd7124879ce14af980a9..bf90a3a11926b1f90ed8a659db908a061f79b0e9 100644
--- a/src/fpga/common/fpga_common.cpp
+++ b/src/fpga/common/fpga_common.cpp
@@ -76,7 +76,7 @@ int32_t convertmantissa(int32_t i) {
 }
 
 float fp16_2_fp32(int16_t fp16_num) {
-  int16_t se_fp16 = fp16_num >> 10;
+  int16_t se_fp16 = (fp16_num >> 10) & 0x3f;
   int16_t m_fp16 = fp16_num & 0x3ff;
   int32_t e_fp32 = 0;
   int16_t offset = 0;
@@ -94,7 +94,7 @@ float fp16_2_fp32(int16_t fp16_num) {
     e_fp32 = 0x80000000;
     offset = 0;
   } else if (se_fp16 < 63) {
-    e_fp32 = 0x80000000 + (se_fp16 - 32) << 23;
+    e_fp32 = 0x80000000 + ((se_fp16 - 32) << 23);
     offset = 1024;
   } else {  // se_fp16 == 63
     e_fp32 = 0xC7800000;
diff --git a/src/fpga/common/fpga_common.h b/src/fpga/common/fpga_common.h
old mode 100755
new mode 100644
index c9519071fba94ad1e2b526d9e4d5cd96a1bcdbac..60753e5cde1e39a1dbf4a1016667db748fc6b9f9
--- a/src/fpga/common/fpga_common.h
+++ b/src/fpga/common/fpga_common.h
@@ -45,6 +45,7 @@ enum ActivationType {
   LEAKYRELU = 1,
   SIGMOID = 2,
   TANH = 3,
+  SOFTMAX = 4,
 };
 
 struct ActivationArgs {
@@ -132,7 +133,7 @@ struct DeconvTxParm {
 #endif
 
 struct ConvArgs {
-  bool relu_enabled;
+  // bool relu_enabled;
   void* sb_address;  // scale and bias
   void* filter_address;
   float* filter_scale_address;
@@ -198,7 +199,7 @@ struct PoolingArgs {
 };
 
 struct EWAddArgs {
-  bool relu_enabled;
+  // bool relu_enabled;
   uint32_t const0;  // output0 = const0 x input0 + const1 x input1;
   uint32_t const1;
   struct ImageInputArgs image0;
@@ -229,13 +230,27 @@ struct DeconvArgs {
   std::vector<std::shared_ptr<SplitConvArgs>> split_conv_args;
 };
 struct DWconvArgs {
-  bool relu_enabled;
+  uint32_t sub_conv_num;
+  // bool relu_enabled;
   void* bias_address;
   void* filter_address;
   struct KernelArgs kernel;
   struct ImageInputArgs image;
   struct ImageOutputArgs output;
 };
+
+struct DWDeconvArgs {
+  uint32_t sub_conv_num;
+  uint32_t group_num;
+  uint32_t filter_num;
+  uint32_t omit_size;
+  uint32_t sub_output_width;
+  uint32_t sub_output_height;
+  struct ImageOutputArgs output;
+  std::vector<std::shared_ptr<DWconvArgs>> dw_conv_args;
+  std::vector<std::shared_ptr<char>> vector_dw_conv_space;
+};
+
 // static inline int align_to_x(int num, int x) { return (num + x - 1) / x * x;
 // }
 static inline uint32_t align_to_x(int64_t num, int64_t x) {
diff --git a/src/fpga/common/pe.h b/src/fpga/common/pe.h
index 9f2800428e431ea302d6cd33685e8ff1dcdc2751..cf0574bc04b05d538766ecba895e97944e1233f8 100644
--- a/src/fpga/common/pe.h
+++ b/src/fpga/common/pe.h
@@ -18,6 +18,7 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace fpga {
 
+uint64_t FPGAVersion();
 int PerformBypass(const struct BypassArgs& args);
 int ComputeBasicConv(const struct ConvArgs& args);
 int ComputeFpgaPool(const struct PoolingArgs& args);
@@ -28,5 +29,7 @@ int ComputeFPGAConcat(const struct ConcatArgs& args);
 int ComputeFPGASplit(const struct SplitArgs& args);
 int ComputeFpgaDeconv(const struct DeconvArgs& args);
 int ComputeDWConv(const struct DWconvArgs& args);
+int ComputeDWDeconv(const struct DWDeconvArgs& args);
+
 }  // namespace fpga
 }  // namespace paddle_mobile
diff --git a/src/operators/activation_op.cpp b/src/operators/activation_op.cpp
index bcff87c9276721c19a970eb328fc0a183ed6c003..76c9e1a014bc0e51b032d8516ba9448fa25b2aa5 100644
--- a/src/operators/activation_op.cpp
+++ b/src/operators/activation_op.cpp
@@ -31,6 +31,10 @@ DEFINE_ACTIVATION_INFERSHAPE(Relu6);
 
 #ifdef SIGMOID_OP
 DEFINE_ACTIVATION_INFERSHAPE(Sigmoid);
+namespace ops = paddle_mobile::operators;
+#ifdef PADDLE_MOBILE_FPGA
+REGISTER_OPERATOR_FPGA(sigmoid, ops::SigmoidOp);
+#endif
 #endif  // SIGMOID_OP
 
 #ifdef TANH_OP
diff --git a/src/operators/kernel/fpga/V1/conv_add_bn_kernel.cpp b/src/operators/kernel/fpga/V1/conv_add_bn_kernel.cpp
index 30ff3155a47c813f303dc59191edd8b60e6d8ce3..3e41efdf76ed5b14d408a1278c7dba0bd1f30a1f 100644
--- a/src/operators/kernel/fpga/V1/conv_add_bn_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/conv_add_bn_kernel.cpp
@@ -22,7 +22,10 @@ namespace operators {
 
 template <>
 bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
-  bool relu_enabled = false;
+  // bool relu_enabled = false;
+  paddle_mobile::fpga::ActivationType activation_enable =
+      paddle_mobile::fpga::NONE;
+  int16_t leaky_relu_negative_slope = 0;
   auto input = const_cast<Tensor *>(param->Input());
 
   auto bias = param->Bias();
@@ -61,10 +64,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
 
   fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
   fpga::SplitConvArgs conv_arg = {0};
-  fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
-                       param->Groups(), param->Strides()[0],
-                       param->Strides()[1], param->Paddings()[0],
-                       param->Paddings()[1], bs_ptr);
+  fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
+                       leaky_relu_negative_slope, param->Groups(),
+                       param->Strides()[0], param->Strides()[1],
+                       param->Paddings()[0], param->Paddings()[1], bs_ptr);
   param->SetFpgaArgs(conv_arg);
 
   return true;
diff --git a/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp
index 7f720323253fff53f7d1bb92f8bfeec77bf0da14..b7b99be78acae80c46b9d1bd1f3cb72d5f4a7cfb 100644
--- a/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp
@@ -23,7 +23,10 @@ namespace operators {
 template <>
 bool ConvAddBNReluKernel<FPGA, float>::Init(
     FusionConvAddBNReluParam<FPGA> *param) {
-  bool relu_enabled = true;
+  // bool relu_enabled = true;
+  paddle_mobile::fpga::ActivationType activation_enable =
+      paddle_mobile::fpga::LEAKYRELU;
+  int16_t leaky_relu_negative_slope = 0;
   auto input = const_cast<Tensor *>(param->Input());
   auto bias = param->Bias();
   auto bias_ptr = bias->data<float>();
@@ -64,16 +67,16 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
   if (groups == channel) {
     fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr);
     fpga::DWconvArgs dwconv_arg = {0};
-    fpga::fill_dwconv_arg(&dwconv_arg, input, out, filter, relu_enabled,
-                          strides[0], strides[1], paddings[0], paddings[1],
-                          new_bias_ptr);
+    fpga::fill_dwconv_arg(&dwconv_arg, input, out, filter, activation_enable,
+                          leaky_relu_negative_slope, strides[0], strides[1],
+                          paddings[0], paddings[1], new_bias_ptr);
     param->SetFpgaArgs(dwconv_arg);
   } else {
     fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
     fpga::SplitConvArgs conv_arg = {0};
-    fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
-                         param->Groups(), strides[0], strides[1], paddings[0],
-                         paddings[1], bs_ptr);
+    fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
+                         leaky_relu_negative_slope, param->Groups(), strides[0],
+                         strides[1], paddings[0], paddings[1], bs_ptr);
     param->SetFpgaArgs(conv_arg);
   }
   return true;
diff --git a/src/operators/kernel/fpga/V1/conv_add_kernel.cpp b/src/operators/kernel/fpga/V1/conv_add_kernel.cpp
old mode 100755
new mode 100644
index e566dc9b165811a3e8a9f78d040cc8c571fd93a9..153be5a4f888c2a39a7b05b9a7fbb72e305acb8d
--- a/src/operators/kernel/fpga/V1/conv_add_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/conv_add_kernel.cpp
@@ -21,7 +21,10 @@ namespace operators {
 
 template <>
 bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
-  bool relu_enabled = false;
+  // bool relu_enabled = false;
+  paddle_mobile::fpga::ActivationType activation_enable =
+      paddle_mobile::fpga::NONE;
+  int16_t leaky_relu_negative_slope = 0;
   auto input = const_cast<Tensor *>(param->Input());
   const Tensor *bias = param->Bias();
   auto bias_ptr = bias->data<float>();
@@ -40,10 +43,10 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
 
   fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
   fpga::SplitConvArgs conv_arg = {0};
-  fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
-                       param->Groups(), param->Strides()[0],
-                       param->Strides()[1], param->Paddings()[0],
-                       param->Paddings()[1], bs_ptr);
+  fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
+                       leaky_relu_negative_slope, param->Groups(),
+                       param->Strides()[0], param->Strides()[1],
+                       param->Paddings()[0], param->Paddings()[1], bs_ptr);
   param->SetFpgaArgs(conv_arg);
   return true;
 }
diff --git a/src/operators/kernel/fpga/V1/conv_add_relu_kernel.cpp b/src/operators/kernel/fpga/V1/conv_add_relu_kernel.cpp
old mode 100755
new mode 100644
index 6b2a2d77c0df29b4c319061776491b0583157d6f..eef35bf74b6b28e3ec0c49d6b7ace0a350f3f194
--- a/src/operators/kernel/fpga/V1/conv_add_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/conv_add_relu_kernel.cpp
@@ -21,7 +21,10 @@ namespace operators {
 
 template <>
 bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
-  bool relu_enabled = true;
+  // bool relu_enabled = true;
+  paddle_mobile::fpga::ActivationType activation_enable =
+      paddle_mobile::fpga::LEAKYRELU;
+  int16_t leaky_relu_negative_slope = 0;
   auto input = const_cast<Tensor *>(param->Input());
   const Tensor *bias = param->Bias();
   auto bias_ptr = bias->data<float>();
@@ -40,10 +43,10 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
 
   fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
   fpga::SplitConvArgs conv_arg = {0};
-  fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
-                       param->Groups(), param->Strides()[0],
-                       param->Strides()[1], param->Paddings()[0],
-                       param->Paddings()[1], bs_ptr);
+  fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
+                       leaky_relu_negative_slope, param->Groups(),
+                       param->Strides()[0], param->Strides()[1],
+                       param->Paddings()[0], param->Paddings()[1], bs_ptr);
   param->SetFpgaArgs(conv_arg);
   return true;
 }
diff --git a/src/operators/kernel/fpga/V1/conv_bn_kernel.cpp b/src/operators/kernel/fpga/V1/conv_bn_kernel.cpp
index 492d418b9023a3c4c802da099a5da5ebf5568649..c4c2bf184d536ace31e52defb59e97c154386464 100644
--- a/src/operators/kernel/fpga/V1/conv_bn_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/conv_bn_kernel.cpp
@@ -22,7 +22,10 @@ namespace operators {
 
 template <>
 bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
-  bool relu_enabled = false;
+  // bool relu_enabled = false;
+  paddle_mobile::fpga::ActivationType activation_enable =
+      paddle_mobile::fpga::NONE;
+  int16_t leaky_relu_negative_slope = 0;
   auto input = const_cast<Tensor *>(param->Input());
   auto filter = const_cast<Tensor *>(param->Filter());
   auto out = param->Output();
@@ -53,10 +56,10 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
 
   fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
   fpga::SplitConvArgs conv_arg = {0};
-  fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
-                       param->Groups(), param->Strides()[0],
-                       param->Strides()[1], param->Paddings()[0],
-                       param->Paddings()[1], bs_ptr);
+  fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
+                       leaky_relu_negative_slope, param->Groups(),
+                       param->Strides()[0], param->Strides()[1],
+                       param->Paddings()[0], param->Paddings()[1], bs_ptr);
   param->SetFpgaArgs(conv_arg);
   return true;
 }
diff --git a/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp
index 337b25ffa5d3ba00cd60935f8643213cb5ea70d3..463c90d1bb0dcd48a7b41aff73b830d14f989c73 100644
--- a/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp
@@ -22,7 +22,10 @@ namespace operators {
 
 template <>
 bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
-  bool relu_enabled = true;
+  // bool relu_enabled = true;
+  paddle_mobile::fpga::ActivationType activation_enable =
+      paddle_mobile::fpga::LEAKYRELU;
+  int16_t leaky_relu_negative_slope = 0;
   auto input = const_cast<Tensor *>(param->Input());
   auto filter = const_cast<Tensor *>(param->Filter());
   auto out = param->Output();
@@ -53,10 +56,10 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
 
   fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
   fpga::SplitConvArgs conv_arg = {0};
-  fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
-                       param->Groups(), param->Strides()[0],
-                       param->Strides()[1], param->Paddings()[0],
-                       param->Paddings()[1], bs_ptr);
+  fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
+                       leaky_relu_negative_slope, param->Groups(),
+                       param->Strides()[0], param->Strides()[1],
+                       param->Paddings()[0], param->Paddings()[1], bs_ptr);
   param->SetFpgaArgs(conv_arg);
   return true;
 }
diff --git a/src/operators/kernel/fpga/V1/deconv_add_kernel.cpp b/src/operators/kernel/fpga/V1/deconv_add_kernel.cpp
index 83adddabf0213a441779815d312161d1737d1296..97a4d5516b52939a3a1d90a22c8050679810d405 100644
--- a/src/operators/kernel/fpga/V1/deconv_add_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/deconv_add_kernel.cpp
@@ -23,7 +23,10 @@ namespace operators {
 
 template <>
 bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
-  bool relu_enabled = false;
+  // bool relu_enabled = false;
+  paddle_mobile::fpga::ActivationType activation_enable =
+      paddle_mobile::fpga::NONE;
+  int16_t leaky_relu_negative_slope = 0;
   auto input = const_cast<Tensor *>(param->Input());
   const Tensor *bias = param->Bias();
   auto bias_ptr = bias->data<float>();
@@ -49,13 +52,24 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
                         "filter width should be equal to filter height ");
   PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
                         "filter axis should be the multiple of stride axis ");
-  fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
-  fpga::DeconvArgs deconv_arg = {0};
-  fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled,
-                        param->Groups(), param->Strides()[0],
-                        param->Strides()[1], param->Paddings()[0],
-                        param->Paddings()[1], bs_ptr);
-  param->SetFpgaArgs(deconv_arg);
+  if (param->Groups() == channel) {
+    fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
+                               sub_conv_n);
+    fpga::DWDeconvArgs DWDeconv_arg = {0};
+    fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter,
+                            activation_enable, leaky_relu_negative_slope,
+                            param->Strides()[0], param->Strides()[1],
+                            param->Paddings()[0], param->Paddings()[1], bs_ptr);
+    param->SetFpgaArgs(DWDeconv_arg);
+  } else {
+    fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
+    fpga::DeconvArgs deconv_arg = {0};
+    fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
+                          leaky_relu_negative_slope, param->Groups(),
+                          param->Strides()[0], param->Strides()[1],
+                          param->Paddings()[0], param->Paddings()[1], bs_ptr);
+    param->SetFpgaArgs(deconv_arg);
+  }
 
   return true;
 }
@@ -63,7 +77,11 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
 template <>
 void DeconvAddKernel<FPGA, float>::Compute(
     const FusionDeconvAddParam<FPGA> &param) {
-  fpga::ComputeFpgaDeconv(param.FpgaArgs());
+  if (param.Groups() == param.Output()->dims()[1]) {
+    fpga::ComputeDWDeconv(param.FpgaDWDconvArgs());
+  } else {
+    fpga::ComputeFpgaDeconv(param.FpgaArgs());
+  }
 }
 
 }  // namespace operators
diff --git a/src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp b/src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp
index 9a96ca6e53644e6b5a8a99a8eed2f5e92449e681..f0b29943d7731d716a19cff1e3cfc904d7610c0b 100644
--- a/src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp
@@ -24,7 +24,10 @@ namespace operators {
 template <>
 bool DeconvAddReluKernel<FPGA, float>::Init(
     FusionDeconvAddReluParam<FPGA> *param) {
-  bool relu_enabled = true;
+  // bool relu_enabled = true;
+  paddle_mobile::fpga::ActivationType activation_enable =
+      paddle_mobile::fpga::LEAKYRELU;
+  int16_t leaky_relu_negative_slope = 0;
   auto input = const_cast<Tensor *>(param->Input());
   const Tensor *bias = param->Bias();
   auto bias_ptr = bias->data<float>();
@@ -50,20 +53,36 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
                         "filter width should be equal to filter height ");
   PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
                         "filter axis should be the multiple of stride axis ");
-  fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
-  fpga::DeconvArgs deconv_arg = {0};
-  fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled,
-                        param->Groups(), param->Strides()[0],
-                        param->Strides()[1], param->Paddings()[0],
-                        param->Paddings()[1], bs_ptr);
-  param->SetFpgaArgs(deconv_arg);
+  if (param->Groups() == channel) {
+    fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
+                               sub_conv_n);
+    fpga::DWDeconvArgs DWDeconv_arg = {0};
+    fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter,
+                            activation_enable, leaky_relu_negative_slope,
+                            param->Strides()[0], param->Strides()[1],
+                            param->Paddings()[0], param->Paddings()[1], bs_ptr);
+    param->SetFpgaArgs(DWDeconv_arg);
+  } else {
+    fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
+    fpga::DeconvArgs deconv_arg = {0};
+    fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
+                          leaky_relu_negative_slope, param->Groups(),
+                          param->Strides()[0], param->Strides()[1],
+                          param->Paddings()[0], param->Paddings()[1], bs_ptr);
+    param->SetFpgaArgs(deconv_arg);
+  }
   return true;
 }
 
 template <>
 void DeconvAddReluKernel<FPGA, float>::Compute(
     const FusionDeconvAddReluParam<FPGA> &param) {
-  fpga::ComputeFpgaDeconv(param.FpgaArgs());
+  // fpga::ComputeFpgaDeconv(param.FpgaArgs());
+  if (param.Groups() == param.Output()->dims()[1]) {
+    fpga::ComputeDWDeconv(param.FpgaDWDconvArgs());
+  } else {
+    fpga::ComputeFpgaDeconv(param.FpgaArgs());
+  }
 }
 
 }  // namespace operators
diff --git a/src/operators/kernel/fpga/V1/elementwise_add_kernel.cpp b/src/operators/kernel/fpga/V1/elementwise_add_kernel.cpp
index be773412f099410b02f24b1d38d2a44d6ca77689..27eee7e5ba7045473ff035f45236d04e080a692e 100644
--- a/src/operators/kernel/fpga/V1/elementwise_add_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/elementwise_add_kernel.cpp
@@ -20,7 +20,10 @@ namespace operators {
 
 template <>
 bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
-  bool relu_enabled = false;
+  // bool relu_enabled = false;
+  paddle_mobile::fpga::ActivationType activation_enable =
+      paddle_mobile::fpga::NONE;
+  int16_t leaky_relu_negative_slope = 0;
   auto *input_x = const_cast<LoDTensor *>(param->InputX());
   auto *input_y = const_cast<LoDTensor *>(param->InputY());
   auto *out = param->Out();
@@ -30,7 +33,10 @@ bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
   auto out_ptr = out->mutable_data<float>();
 
   fpga::EWAddArgs ewaddArgs = {0};
-  ewaddArgs.relu_enabled = relu_enabled;
+  // ewaddArgs.relu_enabled = relu_enabled;
+  ewaddArgs.output.activation.activation_type = activation_enable;
+  ewaddArgs.output.activation.leaky_relu_negative_slope =
+      leaky_relu_negative_slope;
   ewaddArgs.const0 = 0x3c00;  // =1
   ewaddArgs.const1 = 0x3c00;  // =1
   ewaddArgs.image0.address = input_x_ptr;
diff --git a/src/operators/kernel/fpga/V1/elementwise_add_relu_kernel.cpp b/src/operators/kernel/fpga/V1/elementwise_add_relu_kernel.cpp
index 541bb6126509dc7da59fa6bed5c46aff3442928b..fbbe679d4b6a6d4b0ca0a25ebb7aacf93a133943 100644
--- a/src/operators/kernel/fpga/V1/elementwise_add_relu_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/elementwise_add_relu_kernel.cpp
@@ -21,7 +21,10 @@ namespace operators {
 template <>
 bool ElementwiseAddReluKernel<FPGA, float>::Init(
     ElementwiseAddReluParam<FPGA> *param) {
-  bool relu_enabled = true;
+  // bool relu_enabled = true;
+  paddle_mobile::fpga::ActivationType activation_enable =
+      paddle_mobile::fpga::LEAKYRELU;
+  int16_t leaky_relu_negative_slope = 0;
   auto *input_x = const_cast<LoDTensor *>(param->InputX());
   auto *input_y = const_cast<LoDTensor *>(param->InputY());
   auto *out = param->Out();
@@ -31,7 +34,10 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
   auto out_ptr = out->mutable_data<float>();
 
   fpga::EWAddArgs ewaddArgs = {0};
-  ewaddArgs.relu_enabled = relu_enabled;
+  // ewaddArgs.relu_enabled = relu_enabled;
+  ewaddArgs.output.activation.activation_type = activation_enable;
+  ewaddArgs.output.activation.leaky_relu_negative_slope =
+      leaky_relu_negative_slope;
   ewaddArgs.const0 = 0x3c00;  // =1
   ewaddArgs.const1 = 0x3c00;  // =1
   ewaddArgs.image0.address = input_x_ptr;
diff --git a/src/operators/kernel/fpga/V1/fetch_kernel.cpp b/src/operators/kernel/fpga/V1/fetch_kernel.cpp
index e6e4591168b90cbe19b207cd9e77eaf5cd07de80..c00bdf57a259e24669c33f011d7b77eb20d4b308 100644
--- a/src/operators/kernel/fpga/V1/fetch_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/fetch_kernel.cpp
@@ -19,12 +19,34 @@ namespace operators {
 
 template <>
 bool FetchKernel<FPGA, float>::Init(FetchParam<FPGA> *param) {
+  Tensor *output = param->Out();
+  // fpga::format_fp16_ofm(output);
   return true;
 }
 
 template <>
 void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) {
   param.Out()->ShareDataWith(*(param.InputX()));
+  /*auto input =
+          reinterpret_cast<Tensor *>(const_cast<Tensor *>(param.InputX()));
+  fpga::format_image(input);
+  auto input_ptr = input->data<float>();
+  Tensor *output = param.Out();
+  auto output_ptr = output->data<float>();
+
+  fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
+
+  args.input_data_type = fpga::DATA_TYPE_FP16;
+  args.output_data_type = fpga::DATA_TYPE_FP32;
+  args.input_layout_type = fpga::LAYOUT_CHW;
+  args.output_layout_type = fpga::LAYOUT_HWC;
+  args.image.address = reinterpret_cast<void *>(input_ptr);
+  args.image.channels = (uint32_t)input->dims()[1];
+  args.image.height = (input->dims().size() == 4) ? (uint32_t)input->dims()[2] :
+  1; args.image.width = (input->dims().size() == 4) ? (uint32_t)input->dims()[3]
+  : 1; args.image.pad_height = 0; args.image.pad_width = 0; args.output.address
+  = output_ptr; args.output.scale_address = output->scale;
+  fpga::PerformBypass(args);*/
 }
 
 template class FetchKernel<FPGA, float>;
diff --git a/src/operators/kernel/fpga/V1/fusion_fc_kernel.cpp b/src/operators/kernel/fpga/V1/fusion_fc_kernel.cpp
index 9258fb90e1e6bf9a597a387843ce781858628139..fadeae324ff8f5160bc5ff410c2e02b09539a01e 100644
--- a/src/operators/kernel/fpga/V1/fusion_fc_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/fusion_fc_kernel.cpp
@@ -20,7 +20,10 @@ namespace operators {
 
 template <>
 bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
-  bool relu_enabled = false;
+  // bool relu_enabled = false;
+  paddle_mobile::fpga::ActivationType activation_enable =
+      paddle_mobile::fpga::NONE;
+  int16_t leaky_relu_negative_slope = 0;
   auto input_x = const_cast<LoDTensor *>(param->InputX());
   auto filter = const_cast<Tensor *>(param->InputY());
   const Tensor *input_z = param->InputZ();
@@ -55,8 +58,8 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
   fpga::format_fp16_ofm(out);
 
   fpga::SplitConvArgs conv_arg = {0};
-  fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1,
-                       0, 0, bs_ptr);
+  fpga::fill_split_arg(&conv_arg, input_x, out, filter, activation_enable,
+                       leaky_relu_negative_slope, 1, 1, 1, 0, 0, bs_ptr);
   param->SetFpgaArgs(conv_arg);
   return true;
 }
diff --git a/src/operators/kernel/fpga/V1/reshape_kernel.cpp b/src/operators/kernel/fpga/V1/reshape_kernel.cpp
index f5495e6d005f7f7c14ebd3d290ea9be02b9f0951..5e01bb74bab6996ca59632ae31f37ecfeafc918c 100644
--- a/src/operators/kernel/fpga/V1/reshape_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/reshape_kernel.cpp
@@ -22,6 +22,12 @@ namespace operators {
 template <>
 bool ReshapeKernel<FPGA, float>::Init(ReshapeParam<FPGA> *param) {
   param->Out()->ShareDataWith(*param->InputX());
+  const int in_n = param->InputX()->dims()[0];
+  const int in_c = param->InputX()->dims()[1];
+  const int in_h = param->InputX()->dims()[2];
+  const int in_w = param->InputX()->dims()[3];
+  auto out = param->Out();
+  out->Resize(framework::make_ddim({in_n, in_c * in_h * in_w}));
   return true;
 }
 
diff --git a/src/operators/kernel/fpga/V1/sigmoid_kernel.cpp b/src/operators/kernel/fpga/V1/sigmoid_kernel.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..6c836e2776891f283677287eae54019f0dbef39b
--- /dev/null
+++ b/src/operators/kernel/fpga/V1/sigmoid_kernel.cpp
@@ -0,0 +1,56 @@
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef SIGMOID_OP
+
+#include "operators/kernel/activation_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) {
+  paddle_mobile::fpga::ActivationType activation_enable =
+      paddle_mobile::fpga::SIGMOID;
+  int16_t leaky_relu_negative_slope = 0;
+  auto input = const_cast<Tensor *>(param->InputX());
+  auto input_ptr = input->data<float>();
+  auto out = param->Out();
+  fpga::format_fp16_ofm(out);
+
+  fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
+  args.input_data_type = fpga::DATA_TYPE_FP16;
+  args.output_data_type = fpga::DATA_TYPE_FP16;
+  args.image.address = input_ptr;
+  args.image.height =
+      (input->dims().size() == 4) ? (uint32_t)input->dims()[2] : 1;
+  args.image.width =
+      (input->dims().size() == 4) ? (uint32_t)input->dims()[3] : 1;
+  args.image.channels = (uint32_t)input->dims()[1];
+  args.output.address = out->data<float>();
+  args.output.scale_address = out->scale;
+  args.output.activation.activation_type = activation_enable;
+  args.output.activation.leaky_relu_negative_slope = leaky_relu_negative_slope;
+  param->SetFpgaArgs(args);
+  return true;
+}
+template <>
+void SigmoidKernel<FPGA, float>::Compute(const SigmoidParam<FPGA> &param) {
+  fpga::PerformBypass(param.FpgaArgs());
+}
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
diff --git a/src/operators/kernel/fpga/V1/softmax_kernel.cpp b/src/operators/kernel/fpga/V1/softmax_kernel.cpp
index e5ada795b120c1438688089078be20e03f078cbb..2698fdece49409aec017112e8613a706c248cf48 100644
--- a/src/operators/kernel/fpga/V1/softmax_kernel.cpp
+++ b/src/operators/kernel/fpga/V1/softmax_kernel.cpp
@@ -26,7 +26,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
   auto input_ptr = input->data<float>();
   auto out = param->Out();
   fpga::format_fp32_ofm(out);
-
   auto float_input = new Tensor;
   if (input->dims().size() == 2) {
     float_input->mutable_data<float>({1, input->dims()[1]});
@@ -36,7 +35,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
   } else {
     DLOG << "wrong dimension of softmax input";
   }
-
   fpga::format_fp32_ofm(float_input);
   fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
   args.input_layout_type = fpga::LAYOUT_HWC;
@@ -53,6 +51,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
   args.output.scale_address = float_input->scale;
   param->SetFloatInput(float_input);
   param->SetFpgaArgs(args);
+
   return true;
 }
 
diff --git a/src/operators/op_param.h b/src/operators/op_param.h
index 959bfd7f743401a453ab0169ca773285e2904d4e..e3da6724a8c33501f50bc463ee25a88166f4351b 100644
--- a/src/operators/op_param.h
+++ b/src/operators/op_param.h
@@ -1078,6 +1078,15 @@ class SigmoidParam : public OpParam {
  private:
   RType *input_x_;
   RType *out_;
+#ifdef PADDLE_MOBILE_FPGA
+
+ private:
+  fpga::BypassArgs fpga_bypass_args;
+
+ public:
+  const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; }
+  void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; }
+#endif
 };
 #endif
 
@@ -1200,6 +1209,20 @@ class FetchParam : public OpParam {
  private:
   RType *input_x_;
   Tensor *out_;
+#ifdef PADDLE_MOBILE_FPGA
+
+ private:
+  std::shared_ptr<RType> float_input_x_;
+  fpga::BypassArgs fpga_bypass_args;
+
+ public:
+  RType *FloatInput() const {
+    return float_input_x_ == nullptr ? input_x_ : float_input_x_.get();
+  }
+  void SetFloatInput(Tensor *input) { float_input_x_.reset(input); }
+  const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; }
+  void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; }
+#endif
 };
 
 #ifdef FILL_CONSTANT_OP
@@ -2357,10 +2380,17 @@ class ConvTransposeParam : public OpParam {
 
  private:
   fpga::DeconvArgs fpga_conv_args;
+  fpga::DWDeconvArgs fpga_DWDeconv_args;
 
  public:
   const fpga::DeconvArgs &FpgaArgs() const { return fpga_conv_args; }
+  const fpga::DWDeconvArgs &FpgaDWDconvArgs() const {
+    return fpga_DWDeconv_args;
+  }
   void SetFpgaArgs(const fpga::DeconvArgs &args) { fpga_conv_args = args; }
+  void SetFpgaArgs(const fpga::DWDeconvArgs &args) {
+    fpga_DWDeconv_args = args;
+  }
 #endif
 };