diff --git a/metal/MobileNetDemo/MobileNetDemo.xcodeproj/project.pbxproj b/metal/MobileNetDemo/MobileNetDemo.xcodeproj/project.pbxproj index 5596c6f0a24471b42e7aedf48db84384b1042e5c..dbacb00f0d857655ef6048cff24ad6cab5cb91f4 100644 --- a/metal/MobileNetDemo/MobileNetDemo.xcodeproj/project.pbxproj +++ b/metal/MobileNetDemo/MobileNetDemo.xcodeproj/project.pbxproj @@ -243,13 +243,13 @@ ); inputPaths = ( "${SRCROOT}/../Pods/Target Support Files/Pods-MobileNetDemo/Pods-MobileNetDemo-frameworks.sh", - "${BUILT_PRODUCTS_DIR}/SwiftProtobuf/SwiftProtobuf.framework", + "${BUILT_PRODUCTS_DIR}/Protobuf/Protobuf.framework", ); name = "[CP] Embed Pods Frameworks"; outputFileListPaths = ( ); outputPaths = ( - "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/SwiftProtobuf.framework", + "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/Protobuf.framework", ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; @@ -436,7 +436,7 @@ baseConfigurationReference = 4FE67FF667A24FCB0134F627 /* Pods-MobileNetDemo.debug.xcconfig */; buildSettings = { ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; - CODE_SIGN_STYLE = Automatic; + CODE_SIGN_STYLE = Manual; DEVELOPMENT_TEAM = A798K58VVL; INFOPLIST_FILE = MobileNetDemo/Info.plist; IPHONEOS_DEPLOYMENT_TARGET = 9.0; @@ -446,6 +446,7 @@ ); PRODUCT_BUNDLE_IDENTIFIER = Ray.MobileNetDemo; PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ForAllDev; SWIFT_VERSION = 4.0; TARGETED_DEVICE_FAMILY = "1,2"; }; @@ -456,7 +457,7 @@ baseConfigurationReference = E57059FE3629E3A8DE6C7ECF /* Pods-MobileNetDemo.release.xcconfig */; buildSettings = { ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; - CODE_SIGN_STYLE = Automatic; + CODE_SIGN_STYLE = Manual; DEVELOPMENT_TEAM = A798K58VVL; INFOPLIST_FILE = MobileNetDemo/Info.plist; IPHONEOS_DEPLOYMENT_TARGET = 9.0; @@ -466,6 +467,7 @@ ); PRODUCT_BUNDLE_IDENTIFIER = Ray.MobileNetDemo; PRODUCT_NAME = "$(TARGET_NAME)"; + PROVISIONING_PROFILE_SPECIFIER = ForAllDev; SWIFT_VERSION = 4.0; TARGETED_DEVICE_FAMILY = "1,2"; }; diff --git a/metal/Podfile b/metal/Podfile index 0262c9beaf3e3d973de4cb2a3d7af041cbff0627..f07622c920f286102e29e9a09bdee52cbcebf116 100644 --- a/metal/Podfile +++ b/metal/Podfile @@ -5,21 +5,25 @@ workspace 'paddle-mobile.xcworkspace' target 'paddle-mobile-demo' do project 'paddle-mobile-demo/paddle-mobile-demo.xcodeproj' - pod 'SwiftProtobuf', '~> 1.0' + # pod 'SwiftProtobuf', '~> 1.0' + pod 'Protobuf', '~> 3.0.0' end target 'paddle-mobile' do project 'paddle-mobile/paddle-mobile.xcodeproj' - pod 'SwiftProtobuf', '~> 1.0' + # pod 'SwiftProtobuf', '~> 1.0' + pod 'Protobuf', '~> 3.0.0' end target 'paddle-mobile-unit-test' do project 'paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj' - pod 'SwiftProtobuf', '~> 1.0' + # pod 'SwiftProtobuf', '~> 1.0' + pod 'Protobuf', '~> 3.0.0' end target 'MobileNetDemo' do project 'MobileNetDemo/MobileNetDemo.xcodeproj' - pod 'SwiftProtobuf', '~> 1.0' + # pod 'SwiftProtobuf', '~> 1.0' + pod 'Protobuf', '~> 3.0.0' end diff --git a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj index 75e48f2d894c54ac385df4d71c014de370af0053..9e7bab8b8afa48656645d953049df8fb51cf5918 100644 --- a/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj +++ b/metal/paddle-mobile-demo/paddle-mobile-demo.xcodeproj/project.pbxproj @@ -411,11 +411,11 @@ ); inputPaths = ( "${SRCROOT}/../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo-frameworks.sh", - "${BUILT_PRODUCTS_DIR}/SwiftProtobuf/SwiftProtobuf.framework", + "${BUILT_PRODUCTS_DIR}/Protobuf/Protobuf.framework", ); name = "[CP] Embed Pods Frameworks"; outputPaths = ( - "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/SwiftProtobuf.framework", + "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/Protobuf.framework", ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; diff --git a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.pbxproj b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.pbxproj index 50d58bb45bb5c0e8e5ffbbe8f10ce3e41b770f7c..6de1a7f37225222c629841b89549bcdadda12753 100644 --- a/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.pbxproj +++ b/metal/paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj/project.pbxproj @@ -234,11 +234,11 @@ ); inputPaths = ( "${SRCROOT}/../Pods/Target Support Files/Pods-paddle-mobile-unit-test/Pods-paddle-mobile-unit-test-frameworks.sh", - "${BUILT_PRODUCTS_DIR}/SwiftProtobuf/SwiftProtobuf.framework", + "${BUILT_PRODUCTS_DIR}/Protobuf/Protobuf.framework", ); name = "[CP] Embed Pods Frameworks"; outputPaths = ( - "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/SwiftProtobuf.framework", + "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/Protobuf.framework", ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; diff --git a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj index 4cc97d0e9eea2d79b0e9360aa4c9ce817709fced..17eeb75bfffcd5bb2b0d484b0fe2c9048049bebd 100644 --- a/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj +++ b/metal/paddle-mobile/paddle-mobile.xcodeproj/project.pbxproj @@ -7,6 +7,8 @@ objects = { /* Begin PBXBuildFile section */ + 456BB7B421F5B356001474E2 /* Framework.pbobjc.m in Sources */ = {isa = PBXBuildFile; fileRef = 456BB7B221F5B356001474E2 /* Framework.pbobjc.m */; settings = {COMPILER_FLAGS = "-fno-objc-arc"; }; }; + 456BB7B521F5B356001474E2 /* Framework.pbobjc.h in Headers */ = {isa = PBXBuildFile; fileRef = 456BB7B321F5B356001474E2 /* Framework.pbobjc.h */; settings = {ATTRIBUTES = (Public, ); }; }; 4AA1EA862146625E00D0F791 /* BilinearInterpOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA852146625E00D0F791 /* BilinearInterpOp.swift */; }; 4AA1EA88214662BD00D0F791 /* BilinearInterpKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */; }; 4AA1EA8A2146631C00D0F791 /* BilinearInterp.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA892146631C00D0F791 /* BilinearInterp.metal */; }; @@ -47,15 +49,14 @@ FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA620E11CBC0081E9F8 /* Operator.swift */; }; FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA720E11CBC0081E9F8 /* BatchNormOp.swift */; }; FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA820E11CBC0081E9F8 /* ReluOp.swift */; }; - FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BAF20E11CC20081E9F8 /* framework.pb.swift */; }; FC039BB920E11CC20081E9F8 /* Scope.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB020E11CC20081E9F8 /* Scope.swift */; }; FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB120E11CC20081E9F8 /* TensorDesc.swift */; }; - FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB220E11CC20081E9F8 /* ProgramDesc.swift */; }; - FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB320E11CC20081E9F8 /* VarDesc.swift */; }; + FC039BBB20E11CC20081E9F8 /* PMProgramDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB220E11CC20081E9F8 /* PMProgramDesc.swift */; }; + FC039BBC20E11CC20081E9F8 /* PMVarDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB320E11CC20081E9F8 /* PMVarDesc.swift */; }; FC039BBD20E11CC20081E9F8 /* Program.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB420E11CC20081E9F8 /* Program.swift */; }; - FC039BBE20E11CC20081E9F8 /* OpDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB520E11CC20081E9F8 /* OpDesc.swift */; }; + FC039BBE20E11CC20081E9F8 /* PMOpDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB520E11CC20081E9F8 /* PMOpDesc.swift */; }; FC039BBF20E11CC20081E9F8 /* Attribute.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB620E11CC20081E9F8 /* Attribute.swift */; }; - FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB720E11CC20081E9F8 /* BlockDesc.swift */; }; + FC039BC020E11CC20081E9F8 /* PMBlockDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB720E11CC20081E9F8 /* PMBlockDesc.swift */; }; FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */; }; FC0E2DBC20EE45FE009C1FAC /* ConvKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */; }; FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */; }; @@ -93,6 +94,7 @@ FCA67CD7213827AC00BD58AA /* ConvAddBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */; }; FCA67CD92138287B00BD58AA /* ConvBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */; }; FCB40E5921E0DCAB0075EC91 /* FetchKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCB40E5821E0DCAB0075EC91 /* FetchKernel.swift */; }; + FCB91DC221FEEE990051C6B2 /* BufferToTexture.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCB91DC121FEEE990051C6B2 /* BufferToTexture.metal */; }; FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */; }; FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */; }; FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */; }; @@ -137,6 +139,8 @@ /* End PBXBuildFile section */ /* Begin PBXFileReference section */ + 456BB7B221F5B356001474E2 /* Framework.pbobjc.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = Framework.pbobjc.m; sourceTree = ""; }; + 456BB7B321F5B356001474E2 /* Framework.pbobjc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Framework.pbobjc.h; sourceTree = ""; }; 4AA1EA852146625E00D0F791 /* BilinearInterpOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BilinearInterpOp.swift; sourceTree = ""; }; 4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BilinearInterpKernel.swift; sourceTree = ""; }; 4AA1EA892146631C00D0F791 /* BilinearInterp.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BilinearInterp.metal; sourceTree = ""; }; @@ -182,15 +186,14 @@ FC039BA620E11CBC0081E9F8 /* Operator.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Operator.swift; sourceTree = ""; }; FC039BA720E11CBC0081E9F8 /* BatchNormOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BatchNormOp.swift; sourceTree = ""; }; FC039BA820E11CBC0081E9F8 /* ReluOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ReluOp.swift; sourceTree = ""; }; - FC039BAF20E11CC20081E9F8 /* framework.pb.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = framework.pb.swift; sourceTree = ""; }; FC039BB020E11CC20081E9F8 /* Scope.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Scope.swift; sourceTree = ""; }; FC039BB120E11CC20081E9F8 /* TensorDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TensorDesc.swift; sourceTree = ""; }; - FC039BB220E11CC20081E9F8 /* ProgramDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ProgramDesc.swift; sourceTree = ""; }; - FC039BB320E11CC20081E9F8 /* VarDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VarDesc.swift; sourceTree = ""; }; + FC039BB220E11CC20081E9F8 /* PMProgramDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PMProgramDesc.swift; sourceTree = ""; }; + FC039BB320E11CC20081E9F8 /* PMVarDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PMVarDesc.swift; sourceTree = ""; }; FC039BB420E11CC20081E9F8 /* Program.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Program.swift; sourceTree = ""; }; - FC039BB520E11CC20081E9F8 /* OpDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = OpDesc.swift; sourceTree = ""; }; + FC039BB520E11CC20081E9F8 /* PMOpDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PMOpDesc.swift; sourceTree = ""; }; FC039BB620E11CC20081E9F8 /* Attribute.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Attribute.swift; sourceTree = ""; }; - FC039BB720E11CC20081E9F8 /* BlockDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BlockDesc.swift; sourceTree = ""; }; + FC039BB720E11CC20081E9F8 /* PMBlockDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PMBlockDesc.swift; sourceTree = ""; }; FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ReluKernel.swift; sourceTree = ""; }; FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvKernel.swift; sourceTree = ""; }; FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BatchNormKernel.swift; sourceTree = ""; }; @@ -229,6 +232,7 @@ FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddBNReluKernel.metal; sourceTree = ""; }; FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvBNReluKernel.metal; sourceTree = ""; }; FCB40E5821E0DCAB0075EC91 /* FetchKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FetchKernel.swift; sourceTree = ""; }; + FCB91DC121FEEE990051C6B2 /* BufferToTexture.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BufferToTexture.metal; sourceTree = ""; }; FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DwConvBNReluOp.swift; sourceTree = ""; }; FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluOp.swift; sourceTree = ""; }; FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluKernel.swift; sourceTree = ""; }; @@ -403,15 +407,16 @@ FC039BAE20E11CC20081E9F8 /* Program */ = { isa = PBXGroup; children = ( - FC039BAF20E11CC20081E9F8 /* framework.pb.swift */, + 456BB7B321F5B356001474E2 /* Framework.pbobjc.h */, + 456BB7B221F5B356001474E2 /* Framework.pbobjc.m */, FC039BB020E11CC20081E9F8 /* Scope.swift */, FC039BB120E11CC20081E9F8 /* TensorDesc.swift */, - FC039BB220E11CC20081E9F8 /* ProgramDesc.swift */, - FC039BB320E11CC20081E9F8 /* VarDesc.swift */, + FC039BB220E11CC20081E9F8 /* PMProgramDesc.swift */, + FC039BB320E11CC20081E9F8 /* PMVarDesc.swift */, FC039BB420E11CC20081E9F8 /* Program.swift */, - FC039BB520E11CC20081E9F8 /* OpDesc.swift */, + FC039BB520E11CC20081E9F8 /* PMOpDesc.swift */, FC039BB620E11CC20081E9F8 /* Attribute.swift */, - FC039BB720E11CC20081E9F8 /* BlockDesc.swift */, + FC039BB720E11CC20081E9F8 /* PMBlockDesc.swift */, FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */, ); path = Program; @@ -496,6 +501,7 @@ FCEB6837212F00B100D2448E /* metal */ = { isa = PBXGroup; children = ( + FCB91DC121FEEE990051C6B2 /* BufferToTexture.metal */, 4AF928812135673D005B6C3A /* ConcatKernel.metal */, 4AA1EA9D2148D6F900D0F791 /* ConcatKernel.inc.metal */, 4AF9288321357BE3005B6C3A /* Elementwise.metal */, @@ -545,6 +551,7 @@ isa = PBXHeadersBuildPhase; buildActionMask = 2147483647; files = ( + 456BB7B521F5B356001474E2 /* Framework.pbobjc.h in Headers */, FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */, ); runOnlyForDeploymentPostprocessing = 0; @@ -582,7 +589,7 @@ TargetAttributes = { FC039B6920E11C3C0081E9F8 = { CreatedOnToolsVersion = 9.3.1; - LastSwiftMigration = 0940; + LastSwiftMigration = 1000; }; }; }; @@ -659,7 +666,7 @@ FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */, FCE3A1B12153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal in Sources */, FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */, - FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */, + FC039BBB20E11CC20081E9F8 /* PMProgramDesc.swift in Sources */, FCE3A1AB2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift in Sources */, FC9D037920E229E4000F735A /* OpParam.swift in Sources */, FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */, @@ -670,6 +677,7 @@ FCA67CD52138272900BD58AA /* ConvAddMetal.metal in Sources */, FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */, 4AA1EA8C2146640900D0F791 /* SplitOp.swift in Sources */, + FCB91DC221FEEE990051C6B2 /* BufferToTexture.metal in Sources */, 4AA1EAAC214F55C800D0F791 /* Softmax.inc.metal in Sources */, FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */, 4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */, @@ -684,7 +692,6 @@ FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */, FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */, FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */, - FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */, FC039B9920E11C9A0081E9F8 /* Types.swift in Sources */, FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */, FCA3A1632132A4AC00084FE5 /* ReshapeKernel.metal in Sources */, @@ -698,6 +705,7 @@ 4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */, 4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */, FC2BFCC221DF2F9100C262B2 /* GlobalConfig.swift in Sources */, + 456BB7B421F5B356001474E2 /* Framework.pbobjc.m in Sources */, FCBCCC652122FCD700D94F7E /* TransposeOp.swift in Sources */, 4AA1EAA6214B5F6800D0F791 /* Shape.metal in Sources */, FCD04E6E20F31B4B0007374F /* ReshapeOp.swift in Sources */, @@ -711,7 +719,7 @@ FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */, FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */, FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */, - FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */, + FC039BBC20E11CC20081E9F8 /* PMVarDesc.swift in Sources */, FC803BC5214CB8F00094B8E5 /* ConvAddPrelu.inc.metal in Sources */, 4AF928822135673D005B6C3A /* ConcatKernel.metal in Sources */, FCBCCC632122FCC000D94F7E /* TransposeKernel.swift in Sources */, @@ -749,14 +757,14 @@ FCE3A1A92153DE5100C37CDE /* ConvAddAddPreluOp.swift in Sources */, FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */, FCE3A1AD2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift in Sources */, - FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */, + FC039BC020E11CC20081E9F8 /* PMBlockDesc.swift in Sources */, FC803BC3214CB79C0094B8E5 /* ConvAddPreluKernel.metal in Sources */, 4AA1EA90214664CD00D0F791 /* Split.metal in Sources */, FCD04E6820F315020007374F /* PoolKernel.swift in Sources */, FC0226582138F38D00F395E2 /* PoolKernel.metal in Sources */, FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */, FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */, - FC039BBE20E11CC20081E9F8 /* OpDesc.swift in Sources */, + FC039BBE20E11CC20081E9F8 /* PMOpDesc.swift in Sources */, FC9797C921D6101D00F2FD90 /* ResizeBilinearOp.swift in Sources */, 4AA1EA88214662BD00D0F791 /* BilinearInterpKernel.swift in Sources */, FC2BFD4621DF685F00C262B2 /* Scale.swift in Sources */, diff --git a/metal/paddle-mobile/paddle-mobile/Src/Framework/Loader.swift b/metal/paddle-mobile/paddle-mobile/Src/Framework/Loader.swift index b94104099635382b3b2e4ec902cc3eed55533d26..1d4f0ec14fa6442be708e729ce841969a12f5582 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Framework/Loader.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Framework/Loader.swift @@ -13,7 +13,7 @@ limitations under the License. */ import Foundation -import SwiftProtobuf +//import SwiftProtobuf public class Loader { class ParaLoader { @@ -145,13 +145,17 @@ public class Loader { public init(){} func loadModelandParam(_ device:MTLDevice,_ modelData:Data, _ paraLoaderPointer:ParaLoaderWithPointer?, _ paraLoader:ParaLoader?) throws -> Program { do { - let protoProgram = try PaddleMobile_Framework_Proto_ProgramDesc.init( - serializedData: modelData) + /// swift protobuf serialized Data to instance class + // let protoProgram = try PaddleMobile_Framework_Proto_ProgramDesc.init( + // serializedData: modelData) + + /// oc protobuf serialized Data to instance class + let protoProgram = try ProgramDesc.init(data: (modelData as NSData) as Data) - let originProgramDesc = ProgramDesc.init(protoProgram: protoProgram) + let originProgramDesc = PMProgramDesc.init(protoProgram: protoProgram) let programDesc = ProgramOptimize

.init().optimize(originProgramDesc: originProgramDesc) -// let programDesc = ProgramDesc.init(protoProgram: protoProgram) +// let programDesc = PMProgramDesc.init(protoProgram: protoProgram) print(programDesc) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/OpCreator.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/OpCreator.swift index 06b078a78699599f692c9976070bb74563c907b2..fcedbd36f7f50b348aab97de18c9fee414f182cf 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/OpCreator.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/OpCreator.swift @@ -27,7 +27,7 @@ class OpCreator { } } - func creat(device: MTLDevice, opDesc: OpDesc, scope: Scope, initContext: InitContext) throws -> Runable & InferShaperable { + func creat(device: MTLDevice, opDesc: PMOpDesc, scope: Scope, initContext: InitContext) throws -> Runable & InferShaperable { guard let opCreator = opCreators[opDesc.type] else { throw PaddleMobileError.opError(message: "there is no " + opDesc.type + " yet") } @@ -39,7 +39,7 @@ class OpCreator { } } - let opCreators: [String : (MTLDevice, OpDesc, Scope, InitContext) throws -> Runable & InferShaperable] = + let opCreators: [String : (MTLDevice, PMOpDesc, Scope, InitContext) throws -> Runable & InferShaperable] = [gConvType : ConvOp

.creat, gBatchNormType : BatchNormOp

.creat, gReluType : ReluOp

.creat, diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/OpParam.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/OpParam.swift index b0ac4c231a6d1ad8473eddb1109df1eabb0c59a7..01c22166642a1e16717f2cad3d434d2fb1ed0f76 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/OpParam.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/OpParam.swift @@ -27,7 +27,7 @@ protocol OpParam { func outputDesc() -> String //associatedtype ParamPrecisionType: PrecisionType - init(opDesc: OpDesc, inScope: Scope) throws + init(opDesc: PMOpDesc, inScope: Scope) throws static func getFirstTensor(key: String, map: [String : [String]], from: Scope) throws -> VarType static func inputX(inputs: [String : [String]], from: Scope) throws -> VarType static func inputBiase(inputs: [String : [String]], from: Scope) throws -> VarType diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/Operator.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/Operator.swift index d4dd117fec638819f2ef32ac6e8853e711f4603b..532d1b661d4cb0e9823e09a9fc82d13af4f40f76 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/Operator.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/Base/Operator.swift @@ -72,11 +72,11 @@ public class InitContext { protocol Creator where Self: OperatorProtocol{ associatedtype OpType: OperatorProtocol & Runable & InferShaperable - static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws -> OpType + static func creat(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws -> OpType } extension Creator where Self: OperatorProtocol { - static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws -> OpType { + static func creat(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws -> OpType { do { return try OpType.provide(device:device, opDesc: opDesc, inScope: inScope, initContext: initContext) } catch let error { @@ -100,11 +100,11 @@ protocol OperatorProtocol { var attrs: [String : Attr] { get } var para: ParamType { get } var kernel: KerType { get } - init(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws + init(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws } extension OperatorProtocol { - static func provide(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws -> Self { + static func provide(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws -> Self { do { return try Self.init(device: device, opDesc: opDesc, inScope: inScope, initContext: initContext) } catch let error { @@ -114,7 +114,7 @@ extension OperatorProtocol { } class Operator : OperatorProtocol where KernelType.ParamType == ParameterType { - required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws { + required init(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws { type = opDesc.type scope = inScope inputs = opDesc.inputs diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/BatchNormOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/BatchNormOp.swift index ccf27a9d4021cf8607bf80555430ee90469b2741..a877620416cb1b12be1ac1ef2a86f198fe75fc60 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/BatchNormOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/BatchNormOp.swift @@ -17,7 +17,7 @@ import Metal class BatchNormParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { input = try BatchNormParam.inputX(inputs: opDesc.inputs, from: inScope) if input.transpose != [0, 2, 3, 1] { diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/BilinearInterpOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/BilinearInterpOp.swift index 6a749e772bff70c78237304f3567a42e2492f20c..a19dd1039073812b024a55c60bfad8c3c1387e71 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/BilinearInterpOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/BilinearInterpOp.swift @@ -17,7 +17,7 @@ import Metal class BilinearInterpParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { input = try BilinearInterpParam.inputX(inputs: opDesc.inputs, from: inScope) output = try BilinearInterpParam.outputOut(outputs: opDesc.outputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/BoxcoderOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/BoxcoderOp.swift index 50a9f5ad88c58f8896857524c6bd4451b34850ad..4679885ab6e5c946d9b335f8b59f8537e37ea967 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/BoxcoderOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/BoxcoderOp.swift @@ -16,7 +16,7 @@ import Foundation class BoxcoderParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { priorBox = try BoxcoderParam.getFirstTensor(key: "PriorBox", map: opDesc.inputs, from: inScope) priorBoxVar = try BoxcoderParam.getFirstTensor(key: "PriorBoxVar", map: opDesc.inputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConcatOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConcatOp.swift index cb757edaa0852108864b2f9bef2b945b0575ffba..c2c22d55af6fc33ca69cbc028f149d54285459e7 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConcatOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConcatOp.swift @@ -16,7 +16,7 @@ import Foundation class ConcatParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { guard let xlist = opDesc.inputs["X"] else { fatalError() diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddAddPreluOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddAddPreluOp.swift index f6fa4dd0a7820cf4dcfe8899a7355332a06f841c..552d72f436bf6de89f52bae186f72a0a778b1f4c 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddAddPreluOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddAddPreluOp.swift @@ -17,7 +17,7 @@ import Metal class ConvAddAddPreluParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { filter = try ConvAddAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) input = try ConvAddAddPreluParam.input(inputs: opDesc.inputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddBatchNormReluOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddBatchNormReluOp.swift index 4f4b23cf5809446b5ed321940c09466497fdcf05..6aacd4208e0a46ba6c88f9e2073c6ef3d4753952 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddBatchNormReluOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddBatchNormReluOp.swift @@ -17,7 +17,7 @@ import Foundation class ConvAddBatchNormReluParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { filter = try ConvAddBatchNormReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddOp.swift index bc3d0de85dbbf9c20096dafc4e08cff1a0115de4..923c2c210ddba99dcebec77ae91299cd28ed638e 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddOp.swift @@ -16,7 +16,7 @@ import Foundation class ConvAddParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { filter = try ConvAddParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) input = try ConvAddParam.input(inputs: opDesc.inputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddPreluOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddPreluOp.swift index 1157c0538392d8f211214ab7bb1a3a1a2afac8e6..1c0bbba8d9dba61560ce4be97369fbb406fe238a 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddPreluOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvAddPreluOp.swift @@ -16,7 +16,7 @@ import Foundation class ConvAddPreluParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { filter = try ConvAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) input = try ConvAddPreluParam.input(inputs: opDesc.inputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvBNReluOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvBNReluOp.swift index 104ff302872c3549c07f27437ea3c8f01ffbafb5..423e55e391ad9a110fb71af09a16373a322d3d5f 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvBNReluOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvBNReluOp.swift @@ -16,7 +16,7 @@ import Foundation class ConvBNReluParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { filter = try ConvBNReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) input = try ConvBNReluParam.input(inputs: opDesc.inputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvOp.swift index 9532ee720cf053e76f4ac2094ed4de1fab0792f5..c66813b166fefd8fe5f139c94d73cf55ff83d682 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvOp.swift @@ -16,7 +16,7 @@ import Foundation class ConvParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { filter = try ConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) input = try ConvParam.input(inputs: opDesc.inputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvTransposeOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvTransposeOp.swift index d4bd4140e5097f77fd5bc295a940a8df800a7258..c035f403a62875da14df291bad01766731caf380 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvTransposeOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ConvTransposeOp.swift @@ -16,7 +16,7 @@ import Foundation class ConvTransposeParam: ConvParam

{ //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { try super.init(opDesc: opDesc, inScope: inScope) } catch let error { diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ElementwiseAddOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ElementwiseAddOp.swift index 6588e59194f8550ad63a0adfc2a8bc46e877a437..5fa69d4f44e48603dec9213be78d08b11b433edd 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ElementwiseAddOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ElementwiseAddOp.swift @@ -17,7 +17,7 @@ import Metal class ElementwiseAddParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { inputX = try ElementwiseAddParam.inputX(inputs: opDesc.inputs, from: inScope) output = try ElementwiseAddParam.outputOut(outputs: opDesc.outputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ElementwiseAddPreluOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ElementwiseAddPreluOp.swift index 72426c1900d0b8a1ff87cc6d9507c5b66228147b..6a49d7bfa2fe4f060eedc84d47a8c1f8d64ee4d0 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ElementwiseAddPreluOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ElementwiseAddPreluOp.swift @@ -17,7 +17,7 @@ import Metal class ElementwiseAddPreluParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { alpha = try ElementwiseAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope) mode = try ElementwiseAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/FeedOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/FeedOp.swift index 5b3179191b172ae6ec777e392c7832d42cf26d43..46defcb58332a02cbc365a087708e792a66c6e5c 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/FeedOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/FeedOp.swift @@ -23,7 +23,7 @@ class FeedParam: OpParam{ } let scope: Scope - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { scope = inScope do { output = try FeedParam.outputOut(outputs: opDesc.outputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/FetchOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/FetchOp.swift index 477384cdac8bdc7dcfdd2c1bbea087d697ca5028..a5d04a4b03a182a4e843a31628bd2892de597093 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/FetchOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/FetchOp.swift @@ -19,7 +19,7 @@ class FetchParam: OpParam{ var output: FetchHolder let input: Texture let scope: Scope - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { scope = inScope do { input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/FlattenOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/FlattenOp.swift index dabb7792a2fc2314a797cd8ea412c7bf689c91cd..8500798adc75f9fac9e960857e9b0de319157c95 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/FlattenOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/FlattenOp.swift @@ -16,7 +16,7 @@ import Foundation class FlattenParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { input = try FlattenParam.inputX(inputs: opDesc.inputs, from: inScope) output = try FlattenParam.outputOut(outputs: opDesc.outputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/metal/BufferToTexture.metal b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/metal/BufferToTexture.metal new file mode 100644 index 0000000000000000000000000000000000000000..3c07872616bb7c2f130d92247feeeeaa60ece21e --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/Kernels/metal/BufferToTexture.metal @@ -0,0 +1,36 @@ +// +// RGBToYCrCb_Y.metal +// paddle-mobile-demo +// +// Created by liuRuiLong on 2018/12/28. +// Copyright © 2018 orange. All rights reserved. +// + +#include +using namespace metal; + +kernel void buffer_to_texture_kernel( + const device float *input [[buffer(0)]], + texture2d outTexture [[texture(0)]], + uint2 gid [[thread_position_in_grid]]){ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height()) { + return; + } + + float y = input[outTexture.get_width() * gid.y + gid.x]; + outTexture.write(float4(y, 0.0f, 0.0f, 0.0f), gid); +} + +kernel void buffer_to_texture_kernel_half(const device float *input [[buffer(0)]], + texture2d outTexture [[texture(0)]], + uint2 gid [[thread_position_in_grid]]){ + if (gid.x >= outTexture.get_width() || + gid.y >= outTexture.get_height()) { + return; + } + + float y = input[outTexture.get_width() * gid.y + gid.x]; + outTexture.write(half4(y, 0.0f, 0.0f, 0.0f), gid); +} + diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/MulticlassNMSOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/MulticlassNMSOp.swift index 3a10dfdfcb13ccfed2d040107da663337bcf2a12..6d2e46b64986300556898596ea881a254709f472 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/MulticlassNMSOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/MulticlassNMSOp.swift @@ -16,7 +16,7 @@ import Foundation class MulticlassNMSParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { scores = try MulticlassNMSParam.getFirstTensor(key: "Scores", map: opDesc.inputs, from: inScope) bboxes = try MulticlassNMSParam.getFirstTensor(key: "BBoxes", map: opDesc.inputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/PoolOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/PoolOp.swift index 4f5ba3a52d745cbe3db3dc4628b89852e840f7b6..e57c8f48e362af8cae8fedbb5a0292775f0ce923 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/PoolOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/PoolOp.swift @@ -16,7 +16,7 @@ import Foundation class PoolParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { input = try PoolParam.inputX(inputs: opDesc.inputs, from: inScope) output = try PoolParam.outputOut(outputs: opDesc.outputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/PreluOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/PreluOp.swift index 06732b87aa3eefc44b0beabe3c07b18057ba42f9..b7150c2fea85b7a6da6ae883e95c751484db6af6 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/PreluOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/PreluOp.swift @@ -16,7 +16,7 @@ import Foundation class PreluParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { input = try PreluParam.inputX(inputs: opDesc.inputs, from: inScope) output = try PreluParam.outputOut(outputs: opDesc.outputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/PriorBoxOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/PriorBoxOp.swift index fe6dea8f437ed63bed76b475fbd03dd64f966ef1..bff7c9870a3dc70e820b02ad775ca8a19527c26d 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/PriorBoxOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/PriorBoxOp.swift @@ -16,7 +16,7 @@ import Foundation class PriorBoxParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { min_max_aspect_ratios_order = try PriorBoxParam.getAttr(key: "min_max_aspect_ratios_order", attrs: opDesc.attrs) } catch _ { diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ReluOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ReluOp.swift index 488e3408df67ea08d75f7cc080395e44d9651699..ef109081061c601fb17a23e943dcd01af618b724 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ReluOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ReluOp.swift @@ -17,7 +17,7 @@ import Foundation class ReluParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { input = try ReluParam.inputX(inputs: opDesc.inputs, from: inScope) output = try ReluParam.outputOut(outputs: opDesc.outputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ReshapeOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ReshapeOp.swift index 71019014e5be259ecfa7d9ef287d75729d791501..e40eae02d0c11c0bd372514466b28cef27dea96b 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ReshapeOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ReshapeOp.swift @@ -17,7 +17,7 @@ import Metal class ReshapeParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { input = try ReshapeParam.inputX(inputs: opDesc.inputs, from: inScope) output = try ReshapeParam.outputOut(outputs: opDesc.outputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ResizeBilinearOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ResizeBilinearOp.swift index e915f561c86a64c24d34a7345b2eb8d49041188d..980bb734a796c067012855f8a0d0c4ccef33afdb 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ResizeBilinearOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ResizeBilinearOp.swift @@ -16,7 +16,7 @@ import Foundation class ResizeBilinearParam: OpParam { typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { input = try ResizeBilinearParam.inputX(inputs: opDesc.inputs, from: inScope) // if (input.transpose != [0, 2, 3, 1]) || (input.tensorDim.cout() != 4) { diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/ShapeOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/ShapeOp.swift index 76d63ed86dc96a6d90a4cde29909992cbfa35ceb..c13c3864e4f73bdad1b83e19ca9f66051eea266d 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/ShapeOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/ShapeOp.swift @@ -16,7 +16,7 @@ import Foundation class ShapeParam: OpParam { // typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { input = try ShapeParam.input(inputs: opDesc.inputs, from: inScope) output = try ShapeParam.outputOut(outputs: opDesc.outputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/SoftmaxOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/SoftmaxOp.swift index d1c7d46fef08ba8c6792758fd4d661aff089b327..2b2455eaa60142f890c7ee5e14244c77854a0ccd 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/SoftmaxOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/SoftmaxOp.swift @@ -17,7 +17,7 @@ import Metal class SoftmaxParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { input = try SoftmaxParam.inputX(inputs: opDesc.inputs, from: inScope) output = try SoftmaxParam.outputOut(outputs: opDesc.outputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/SplitOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/SplitOp.swift index e3fc8814e21e1081721b3fc522b2f7ccf414d725..4d9933f39275d522cec71ca08a591182433d7bae 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/SplitOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/SplitOp.swift @@ -16,7 +16,7 @@ import Foundation class SplitParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { input = try SplitParam.inputX(inputs: opDesc.inputs, from: inScope) output = Texture.init(device: input.metalTexture!.device, inDim: input.dim) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Operators/TransposeOp.swift b/metal/paddle-mobile/paddle-mobile/Src/Operators/TransposeOp.swift index 187442e6a2e78686b826e1d6ad36ad2f00a106fd..064955fcac20937ae3ac8a12f51ef52ab5a00ba9 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Operators/TransposeOp.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Operators/TransposeOp.swift @@ -17,7 +17,7 @@ import Metal class TransposeParam: OpParam { //typealias ParamPrecisionType = P - required init(opDesc: OpDesc, inScope: Scope) throws { + required init(opDesc: PMOpDesc, inScope: Scope) throws { do { input = try TransposeParam.inputX(inputs: opDesc.inputs, from: inScope) output = try TransposeParam.outputOut(outputs: opDesc.outputs, from: inScope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/Attribute.swift b/metal/paddle-mobile/paddle-mobile/Src/Program/Attribute.swift index c26fd2132e6134dbbd05af08835229a31c231b9d..cc8afc994d12eb8a1de7f06ba97011b16f56c4b5 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Program/Attribute.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Program/Attribute.swift @@ -35,7 +35,11 @@ extension Array: Attr { extension String: Attr { } -func attrWithProtoDesc(attrDesc: PaddleMobile_Framework_Proto_OpDesc.Attr) -> Attr { +extension NSMutableArray :Attr { +} + + +func attrWithProtoDesc(attrDesc: OpDesc_Attr) -> Attr { switch attrDesc.type { case .boolean: return attrDesc.b @@ -47,14 +51,33 @@ func attrWithProtoDesc(attrDesc: PaddleMobile_Framework_Proto_OpDesc.Attr) -> At return attrDesc.l case .float: return attrDesc.f + /// convert GPB class to swift class case .booleans: - return attrDesc.bools + var dimsArray = [Bool]() + let dimsCount = attrDesc.boolsArray.count + for i in 0.. +//#else +// #import "GPBProtocolBuffers.h" +//#endif +#if GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS +#import +#else +#import +#endif + +#if GOOGLE_PROTOBUF_OBJC_GEN_VERSION != 30001 +#error This file was generated by a different version of protoc which is incompatible with your Protocol Buffer library sources. +#endif + +// @@protoc_insertion_point(imports) + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-declarations" + +CF_EXTERN_C_BEGIN + +@class BlockDesc; +@class OpDesc; +@class OpDesc_Attr; +@class OpDesc_Var; +@class OpProto_Attr; +@class OpProto_Var; +@class VarDesc; +@class VarType; +@class VarType_ChannelDesc; +@class VarType_LoDTensorArrayDesc; +@class VarType_LoDTensorDesc; +@class VarType_ReaderDesc; +@class VarType_TensorDesc; +@class VarType_Tuple; +@class Version; + +NS_ASSUME_NONNULL_BEGIN + +#pragma mark - Enum AttrType + +typedef GPB_ENUM(AttrType) { + AttrType_Int = 0, + AttrType_Float = 1, + AttrType_String = 2, + AttrType_Ints = 3, + AttrType_Floats = 4, + AttrType_Strings = 5, + AttrType_Boolean = 6, + AttrType_Booleans = 7, + AttrType_Block = 8, + AttrType_Long = 9, + AttrType_Blocks = 10, +}; + +GPBEnumDescriptor *AttrType_EnumDescriptor(void); + +/// Checks to see if the given value is defined by the enum or was not known at +/// the time this source was generated. +BOOL AttrType_IsValidValue(int32_t value); + +#pragma mark - Enum VarType_Type + +typedef GPB_ENUM(VarType_Type) { + /// Pod Types + VarType_Type_Bool = 0, + VarType_Type_Int16 = 1, + VarType_Type_Int32 = 2, + VarType_Type_Int64 = 3, + VarType_Type_Fp16 = 4, + VarType_Type_Fp32 = 5, + VarType_Type_Fp64 = 6, + + /// Tensor is used in C++. + VarType_Type_SizeT = 19, + VarType_Type_Uint8 = 20, + VarType_Type_Int8 = 21, + + /// Other types that may need additional descriptions + VarType_Type_LodTensor = 7, + VarType_Type_SelectedRows = 8, + VarType_Type_FeedMinibatch = 9, + VarType_Type_FetchList = 10, + VarType_Type_StepScopes = 11, + VarType_Type_LodRankTable = 12, + VarType_Type_LodTensorArray = 13, + VarType_Type_PlaceList = 14, + VarType_Type_Reader = 15, + VarType_Type_Channel = 16, + + /// Any runtime decided variable type is raw + /// raw variables should manage their own allocations + /// in operators like nccl_op + VarType_Type_Raw = 17, + VarType_Type_Tuple = 18, +}; + +GPBEnumDescriptor *VarType_Type_EnumDescriptor(void); + +/// Checks to see if the given value is defined by the enum or was not known at +/// the time this source was generated. +BOOL VarType_Type_IsValidValue(int32_t value); + +#pragma mark - FrameworkRoot + +/// Exposes the extension registry for this file. +/// +/// The base class provides: +/// @code +/// + (GPBExtensionRegistry *)extensionRegistry; +/// @endcode +/// which is a @c GPBExtensionRegistry that includes all the extensions defined by +/// this file and all files that it depends on. +@interface FrameworkRoot : GPBRootObject +@end + +#pragma mark - Version + +typedef GPB_ENUM(Version_FieldNumber) { + Version_FieldNumber_Version = 1, +}; + +/// Any incompatible changes to ProgramDesc and its dependencies should +/// raise the version defined version.h. +/// +/// Serailization and Deserialization codes should be modified in a way +/// that supports old versions following the version and compatibility policy. +@interface Version : GPBMessage + +@property(nonatomic, readwrite) int64_t version; + +@property(nonatomic, readwrite) BOOL hasVersion; +@end + +#pragma mark - OpDesc + +typedef GPB_ENUM(OpDesc_FieldNumber) { + OpDesc_FieldNumber_InputsArray = 1, + OpDesc_FieldNumber_OutputsArray = 2, + OpDesc_FieldNumber_Type = 3, + OpDesc_FieldNumber_AttrsArray = 4, + OpDesc_FieldNumber_IsTarget = 5, +}; + +/// OpDesc describes an instance of a C++ framework::OperatorBase +/// derived class type. +@interface OpDesc : GPBMessage + +@property(nonatomic, readwrite, copy, null_resettable) NSString *type; +/// Test to see if @c type has been set. +@property(nonatomic, readwrite) BOOL hasType; + +@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray *inputsArray; +/// The number of items in @c inputsArray without causing the array to be created. +@property(nonatomic, readonly) NSUInteger inputsArray_Count; + +@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray *outputsArray; +/// The number of items in @c outputsArray without causing the array to be created. +@property(nonatomic, readonly) NSUInteger outputsArray_Count; + +@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray *attrsArray; +/// The number of items in @c attrsArray without causing the array to be created. +@property(nonatomic, readonly) NSUInteger attrsArray_Count; + +@property(nonatomic, readwrite) BOOL isTarget; + +@property(nonatomic, readwrite) BOOL hasIsTarget; +@end + +#pragma mark - OpDesc_Attr + +typedef GPB_ENUM(OpDesc_Attr_FieldNumber) { + OpDesc_Attr_FieldNumber_Name = 1, + OpDesc_Attr_FieldNumber_Type = 2, + OpDesc_Attr_FieldNumber_I = 3, + OpDesc_Attr_FieldNumber_F = 4, + OpDesc_Attr_FieldNumber_S = 5, + OpDesc_Attr_FieldNumber_IntsArray = 6, + OpDesc_Attr_FieldNumber_FloatsArray = 7, + OpDesc_Attr_FieldNumber_StringsArray = 8, + OpDesc_Attr_FieldNumber_B = 10, + OpDesc_Attr_FieldNumber_BoolsArray = 11, + OpDesc_Attr_FieldNumber_BlockIdx = 12, + OpDesc_Attr_FieldNumber_L = 13, + OpDesc_Attr_FieldNumber_BlocksIdxArray = 14, +}; + +@interface OpDesc_Attr : GPBMessage + +@property(nonatomic, readwrite, copy, null_resettable) NSString *name; +/// Test to see if @c name has been set. +@property(nonatomic, readwrite) BOOL hasName; + +@property(nonatomic, readwrite) AttrType type; + +@property(nonatomic, readwrite) BOOL hasType; +@property(nonatomic, readwrite) int32_t i; + +@property(nonatomic, readwrite) BOOL hasI; +@property(nonatomic, readwrite) float f; + +@property(nonatomic, readwrite) BOOL hasF; +@property(nonatomic, readwrite, copy, null_resettable) NSString *s; +/// Test to see if @c s has been set. +@property(nonatomic, readwrite) BOOL hasS; + +@property(nonatomic, readwrite, strong, null_resettable) GPBInt32Array *intsArray; +/// The number of items in @c intsArray without causing the array to be created. +@property(nonatomic, readonly) NSUInteger intsArray_Count; + +@property(nonatomic, readwrite, strong, null_resettable) GPBFloatArray *floatsArray; +/// The number of items in @c floatsArray without causing the array to be created. +@property(nonatomic, readonly) NSUInteger floatsArray_Count; + +@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray *stringsArray; +/// The number of items in @c stringsArray without causing the array to be created. +@property(nonatomic, readonly) NSUInteger stringsArray_Count; + +@property(nonatomic, readwrite) BOOL b; + +@property(nonatomic, readwrite) BOOL hasB; +@property(nonatomic, readwrite, strong, null_resettable) GPBBoolArray *boolsArray; +/// The number of items in @c boolsArray without causing the array to be created. +@property(nonatomic, readonly) NSUInteger boolsArray_Count; + +@property(nonatomic, readwrite) int32_t blockIdx; + +@property(nonatomic, readwrite) BOOL hasBlockIdx; +@property(nonatomic, readwrite) int64_t l; + +@property(nonatomic, readwrite) BOOL hasL; +@property(nonatomic, readwrite, strong, null_resettable) GPBInt32Array *blocksIdxArray; +/// The number of items in @c blocksIdxArray without causing the array to be created. +@property(nonatomic, readonly) NSUInteger blocksIdxArray_Count; + +@end + +#pragma mark - OpDesc_Var + +typedef GPB_ENUM(OpDesc_Var_FieldNumber) { + OpDesc_Var_FieldNumber_Parameter = 1, + OpDesc_Var_FieldNumber_ArgumentsArray = 2, +}; + +@interface OpDesc_Var : GPBMessage + +@property(nonatomic, readwrite, copy, null_resettable) NSString *parameter; +/// Test to see if @c parameter has been set. +@property(nonatomic, readwrite) BOOL hasParameter; + +@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray *argumentsArray; +/// The number of items in @c argumentsArray without causing the array to be created. +@property(nonatomic, readonly) NSUInteger argumentsArray_Count; + +@end + +#pragma mark - OpProto + +typedef GPB_ENUM(OpProto_FieldNumber) { + OpProto_FieldNumber_Type = 1, + OpProto_FieldNumber_InputsArray = 2, + OpProto_FieldNumber_OutputsArray = 3, + OpProto_FieldNumber_AttrsArray = 4, + OpProto_FieldNumber_Comment = 5, +}; + +/// OpProto describes a C++ framework::OperatorBase derived class. +@interface OpProto : GPBMessage + +@property(nonatomic, readwrite, copy, null_resettable) NSString *type; +/// Test to see if @c type has been set. +@property(nonatomic, readwrite) BOOL hasType; + +@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray *inputsArray; +/// The number of items in @c inputsArray without causing the array to be created. +@property(nonatomic, readonly) NSUInteger inputsArray_Count; + +@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray *outputsArray; +/// The number of items in @c outputsArray without causing the array to be created. +@property(nonatomic, readonly) NSUInteger outputsArray_Count; + +@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray *attrsArray; +/// The number of items in @c attrsArray without causing the array to be created. +@property(nonatomic, readonly) NSUInteger attrsArray_Count; + +@property(nonatomic, readwrite, copy, null_resettable) NSString *comment; +/// Test to see if @c comment has been set. +@property(nonatomic, readwrite) BOOL hasComment; + +@end + +#pragma mark - OpProto_Var + +typedef GPB_ENUM(OpProto_Var_FieldNumber) { + OpProto_Var_FieldNumber_Name = 1, + OpProto_Var_FieldNumber_Comment = 2, + OpProto_Var_FieldNumber_Duplicable = 3, + OpProto_Var_FieldNumber_Intermediate = 4, + OpProto_Var_FieldNumber_Dispensable = 5, + OpProto_Var_FieldNumber_Reuse = 6, +}; + +/// VarProto describes the C++ type framework::Variable. +@interface OpProto_Var : GPBMessage + +@property(nonatomic, readwrite, copy, null_resettable) NSString *name; +/// Test to see if @c name has been set. +@property(nonatomic, readwrite) BOOL hasName; + +@property(nonatomic, readwrite, copy, null_resettable) NSString *comment; +/// Test to see if @c comment has been set. +@property(nonatomic, readwrite) BOOL hasComment; + +@property(nonatomic, readwrite) BOOL duplicable; + +@property(nonatomic, readwrite) BOOL hasDuplicable; +@property(nonatomic, readwrite) BOOL intermediate; + +@property(nonatomic, readwrite) BOOL hasIntermediate; +@property(nonatomic, readwrite) BOOL dispensable; + +@property(nonatomic, readwrite) BOOL hasDispensable; +@property(nonatomic, readwrite, copy, null_resettable) NSString *reuse; +/// Test to see if @c reuse has been set. +@property(nonatomic, readwrite) BOOL hasReuse; + +@end + +#pragma mark - OpProto_Attr + +typedef GPB_ENUM(OpProto_Attr_FieldNumber) { + OpProto_Attr_FieldNumber_Name = 1, + OpProto_Attr_FieldNumber_Type = 2, + OpProto_Attr_FieldNumber_Comment = 3, + OpProto_Attr_FieldNumber_Generated = 4, +}; + +/// AttrProto describes the C++ type Attribute. +@interface OpProto_Attr : GPBMessage + +@property(nonatomic, readwrite, copy, null_resettable) NSString *name; +/// Test to see if @c name has been set. +@property(nonatomic, readwrite) BOOL hasName; + +@property(nonatomic, readwrite) AttrType type; + +@property(nonatomic, readwrite) BOOL hasType; +@property(nonatomic, readwrite, copy, null_resettable) NSString *comment; +/// Test to see if @c comment has been set. +@property(nonatomic, readwrite) BOOL hasComment; + +/// If that attribute is generated, it means the Paddle third +/// language binding has responsibility to fill that +/// attribute. End-User should not set that attribute. +@property(nonatomic, readwrite) BOOL generated; + +@property(nonatomic, readwrite) BOOL hasGenerated; +@end + +#pragma mark - VarType + +typedef GPB_ENUM(VarType_FieldNumber) { + VarType_FieldNumber_Type = 1, + VarType_FieldNumber_SelectedRows = 2, + VarType_FieldNumber_LodTensor = 3, + VarType_FieldNumber_TensorArray_p = 4, + VarType_FieldNumber_Reader = 5, + VarType_FieldNumber_Channel = 6, + VarType_FieldNumber_Tuple = 7, +}; + +@interface VarType : GPBMessage + +@property(nonatomic, readwrite) VarType_Type type; + +@property(nonatomic, readwrite) BOOL hasType; +@property(nonatomic, readwrite, strong, null_resettable) VarType_TensorDesc *selectedRows; +/// Test to see if @c selectedRows has been set. +@property(nonatomic, readwrite) BOOL hasSelectedRows; + +@property(nonatomic, readwrite, strong, null_resettable) VarType_LoDTensorDesc *lodTensor; +/// Test to see if @c lodTensor has been set. +@property(nonatomic, readwrite) BOOL hasLodTensor; + +@property(nonatomic, readwrite, strong, null_resettable) VarType_LoDTensorArrayDesc *tensorArray_p; +/// Test to see if @c tensorArray_p has been set. +@property(nonatomic, readwrite) BOOL hasTensorArray_p; + +@property(nonatomic, readwrite, strong, null_resettable) VarType_ReaderDesc *reader; +/// Test to see if @c reader has been set. +@property(nonatomic, readwrite) BOOL hasReader; + +@property(nonatomic, readwrite, strong, null_resettable) VarType_ChannelDesc *channel; +/// Test to see if @c channel has been set. +@property(nonatomic, readwrite) BOOL hasChannel; + +@property(nonatomic, readwrite, strong, null_resettable) VarType_Tuple *tuple; +/// Test to see if @c tuple has been set. +@property(nonatomic, readwrite) BOOL hasTuple; + +@end + +#pragma mark - VarType_TensorDesc + +typedef GPB_ENUM(VarType_TensorDesc_FieldNumber) { + VarType_TensorDesc_FieldNumber_DataType = 1, + VarType_TensorDesc_FieldNumber_DimsArray = 2, +}; + +@interface VarType_TensorDesc : GPBMessage + +/// Should only be PODType. Is enforced in C++ +@property(nonatomic, readwrite) VarType_Type dataType; + +@property(nonatomic, readwrite) BOOL hasDataType; +/// [UNK, 640, 480] is saved as [-1, 640, 480] +@property(nonatomic, readwrite, strong, null_resettable) GPBInt64Array *dimsArray; +/// The number of items in @c dimsArray without causing the array to be created. +@property(nonatomic, readonly) NSUInteger dimsArray_Count; + +@end + +#pragma mark - VarType_LoDTensorDesc + +typedef GPB_ENUM(VarType_LoDTensorDesc_FieldNumber) { + VarType_LoDTensorDesc_FieldNumber_Tensor = 1, + VarType_LoDTensorDesc_FieldNumber_LodLevel = 2, +}; + +@interface VarType_LoDTensorDesc : GPBMessage + +@property(nonatomic, readwrite, strong, null_resettable) VarType_TensorDesc *tensor; +/// Test to see if @c tensor has been set. +@property(nonatomic, readwrite) BOOL hasTensor; + +@property(nonatomic, readwrite) int32_t lodLevel; + +@property(nonatomic, readwrite) BOOL hasLodLevel; +@end + +#pragma mark - VarType_LoDTensorArrayDesc + +typedef GPB_ENUM(VarType_LoDTensorArrayDesc_FieldNumber) { + VarType_LoDTensorArrayDesc_FieldNumber_Tensor = 1, + VarType_LoDTensorArrayDesc_FieldNumber_LodLevel = 2, +}; + +@interface VarType_LoDTensorArrayDesc : GPBMessage + +@property(nonatomic, readwrite, strong, null_resettable) VarType_TensorDesc *tensor; +/// Test to see if @c tensor has been set. +@property(nonatomic, readwrite) BOOL hasTensor; + +@property(nonatomic, readwrite) int32_t lodLevel; + +@property(nonatomic, readwrite) BOOL hasLodLevel; +@end + +#pragma mark - VarType_ReaderDesc + +typedef GPB_ENUM(VarType_ReaderDesc_FieldNumber) { + VarType_ReaderDesc_FieldNumber_LodTensorArray = 1, +}; + +@interface VarType_ReaderDesc : GPBMessage + +@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray *lodTensorArray; +/// The number of items in @c lodTensorArray without causing the array to be created. +@property(nonatomic, readonly) NSUInteger lodTensorArray_Count; + +@end + +#pragma mark - VarType_ChannelDesc + +typedef GPB_ENUM(VarType_ChannelDesc_FieldNumber) { + VarType_ChannelDesc_FieldNumber_DataType = 1, + VarType_ChannelDesc_FieldNumber_Capacity = 2, +}; + +@interface VarType_ChannelDesc : GPBMessage + +@property(nonatomic, readwrite) VarType_Type dataType; + +@property(nonatomic, readwrite) BOOL hasDataType; +@property(nonatomic, readwrite) int64_t capacity; + +@property(nonatomic, readwrite) BOOL hasCapacity; +@end + +#pragma mark - VarType_Tuple + +typedef GPB_ENUM(VarType_Tuple_FieldNumber) { + VarType_Tuple_FieldNumber_ElementTypeArray = 1, +}; + +@interface VarType_Tuple : GPBMessage + +// |elementTypeArray| contains |VarType_Type| +@property(nonatomic, readwrite, strong, null_resettable) GPBEnumArray *elementTypeArray; +/// The number of items in @c elementTypeArray without causing the array to be created. +@property(nonatomic, readonly) NSUInteger elementTypeArray_Count; + +@end + +#pragma mark - VarDesc + +typedef GPB_ENUM(VarDesc_FieldNumber) { + VarDesc_FieldNumber_Name = 1, + VarDesc_FieldNumber_Type = 2, + VarDesc_FieldNumber_Persistable = 3, +}; + +@interface VarDesc : GPBMessage + +@property(nonatomic, readwrite, copy, null_resettable) NSString *name; +/// Test to see if @c name has been set. +@property(nonatomic, readwrite) BOOL hasName; + +@property(nonatomic, readwrite, strong, null_resettable) VarType *type; +/// Test to see if @c type has been set. +@property(nonatomic, readwrite) BOOL hasType; + +@property(nonatomic, readwrite) BOOL persistable; + +@property(nonatomic, readwrite) BOOL hasPersistable; +@end + +#pragma mark - BlockDesc + +typedef GPB_ENUM(BlockDesc_FieldNumber) { + BlockDesc_FieldNumber_Idx = 1, + BlockDesc_FieldNumber_ParentIdx = 2, + BlockDesc_FieldNumber_VarsArray = 3, + BlockDesc_FieldNumber_OpsArray = 4, + BlockDesc_FieldNumber_ForwardBlockIdx = 5, +}; + +@interface BlockDesc : GPBMessage + +@property(nonatomic, readwrite) int32_t idx; + +@property(nonatomic, readwrite) BOOL hasIdx; +@property(nonatomic, readwrite) int32_t parentIdx; + +@property(nonatomic, readwrite) BOOL hasParentIdx; +@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray *varsArray; +/// The number of items in @c varsArray without causing the array to be created. +@property(nonatomic, readonly) NSUInteger varsArray_Count; + +@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray *opsArray; +/// The number of items in @c opsArray without causing the array to be created. +@property(nonatomic, readonly) NSUInteger opsArray_Count; + +@property(nonatomic, readwrite) int32_t forwardBlockIdx; + +@property(nonatomic, readwrite) BOOL hasForwardBlockIdx; +@end + +#pragma mark - ProgramDesc + +typedef GPB_ENUM(ProgramDesc_FieldNumber) { + ProgramDesc_FieldNumber_BlocksArray = 1, + ProgramDesc_FieldNumber_Version = 2, +}; + +/// Please refer to +/// https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/program.md +/// for more details. +/// TODO(panyx0718): A model can have multiple programs. Need a +/// way to distinguish them. Maybe ID or name? +@interface ProgramDesc : GPBMessage + +@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray *blocksArray; +/// The number of items in @c blocksArray without causing the array to be created. +@property(nonatomic, readonly) NSUInteger blocksArray_Count; + +@property(nonatomic, readwrite, strong, null_resettable) Version *version; +/// Test to see if @c version has been set. +@property(nonatomic, readwrite) BOOL hasVersion; + +@end + +NS_ASSUME_NONNULL_END + +CF_EXTERN_C_END + +#pragma clang diagnostic pop + +// @@protoc_insertion_point(global_scope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/Framework.pbobjc.m b/metal/paddle-mobile/paddle-mobile/Src/Program/Framework.pbobjc.m new file mode 100755 index 0000000000000000000000000000000000000000..00dad2662c232fdc6183179b4068916cc71a596c --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Src/Program/Framework.pbobjc.m @@ -0,0 +1,1417 @@ +// Generated by the protocol buffer compiler. DO NOT EDIT! +// source: Framework.proto + +// This CPP symbol can be defined to use imports that match up to the framework +// imports needed when using CocoaPods. +#if !defined(GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS) + #define GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS 0 +#endif + +//#if GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS +// #import +//#else +// #import "GPBProtocolBuffers_RuntimeSupport.h" +//#endif + +#if GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS +#import +#else +#import +#endif + + #import "Framework.pbobjc.h" +// @@protoc_insertion_point(imports) + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wdeprecated-declarations" + +#pragma mark - FrameworkRoot + +@implementation FrameworkRoot + +@end + +#pragma mark - FrameworkRoot_FileDescriptor + +static GPBFileDescriptor *FrameworkRoot_FileDescriptor(void) { + // This is called by +initialize so there is no need to worry + // about thread safety of the singleton. + static GPBFileDescriptor *descriptor = NULL; + if (!descriptor) { + GPBDebugCheckRuntimeVersion(); + descriptor = [[GPBFileDescriptor alloc] initWithPackage:@"paddle_mobile.framework.proto" + syntax:GPBFileSyntaxProto2]; + } + return descriptor; +} + +#pragma mark - Enum AttrType + +GPBEnumDescriptor *AttrType_EnumDescriptor(void) { + static GPBEnumDescriptor *descriptor = NULL; + if (!descriptor) { + static const char *valueNames = + "Int\000Float\000String\000Ints\000Floats\000Strings\000Boo" + "lean\000Booleans\000Block\000Long\000Blocks\000"; + static const int32_t values[] = { + AttrType_Int, + AttrType_Float, + AttrType_String, + AttrType_Ints, + AttrType_Floats, + AttrType_Strings, + AttrType_Boolean, + AttrType_Booleans, + AttrType_Block, + AttrType_Long, + AttrType_Blocks, + }; + GPBEnumDescriptor *worker = + [GPBEnumDescriptor allocDescriptorForName:GPBNSStringifySymbol(AttrType) + valueNames:valueNames + values:values + count:(uint32_t)(sizeof(values) / sizeof(int32_t)) + enumVerifier:AttrType_IsValidValue]; + if (!OSAtomicCompareAndSwapPtrBarrier(nil, worker, (void * volatile *)&descriptor)) { + [worker release]; + } + } + return descriptor; +} + +BOOL AttrType_IsValidValue(int32_t value__) { + switch (value__) { + case AttrType_Int: + case AttrType_Float: + case AttrType_String: + case AttrType_Ints: + case AttrType_Floats: + case AttrType_Strings: + case AttrType_Boolean: + case AttrType_Booleans: + case AttrType_Block: + case AttrType_Long: + case AttrType_Blocks: + return YES; + default: + return NO; + } +} + +#pragma mark - Version + +@implementation Version + +@dynamic hasVersion, version; + +typedef struct Version__storage_ { + uint32_t _has_storage_[1]; + int64_t version; +} Version__storage_; + +// This method is threadsafe because it is initially called +// in +initialize for each subclass. ++ (GPBDescriptor *)descriptor { + static GPBDescriptor *descriptor = nil; + if (!descriptor) { + static GPBMessageFieldDescription fields[] = { + { + .name = "version", + .dataTypeSpecific.className = NULL, + .number = Version_FieldNumber_Version, + .hasIndex = 0, + .offset = (uint32_t)offsetof(Version__storage_, version), + .flags = GPBFieldOptional | GPBFieldHasDefaultValue, + .dataType = GPBDataTypeInt64, + }, + }; + GPBDescriptor *localDescriptor = + [GPBDescriptor allocDescriptorForClass:[Version class] + rootClass:[FrameworkRoot class] + file:FrameworkRoot_FileDescriptor() + fields:fields + fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription)) + storageSize:sizeof(Version__storage_) + flags:0]; + NSAssert(descriptor == nil, @"Startup recursed!"); + descriptor = localDescriptor; + } + return descriptor; +} + +@end + +#pragma mark - OpDesc + +@implementation OpDesc + +@dynamic hasType, type; +@dynamic inputsArray, inputsArray_Count; +@dynamic outputsArray, outputsArray_Count; +@dynamic attrsArray, attrsArray_Count; +@dynamic hasIsTarget, isTarget; + +typedef struct OpDesc__storage_ { + uint32_t _has_storage_[1]; + NSMutableArray *inputsArray; + NSMutableArray *outputsArray; + NSString *type; + NSMutableArray *attrsArray; +} OpDesc__storage_; + +// This method is threadsafe because it is initially called +// in +initialize for each subclass. ++ (GPBDescriptor *)descriptor { + static GPBDescriptor *descriptor = nil; + if (!descriptor) { + static GPBMessageFieldDescription fields[] = { + { + .name = "inputsArray", + .dataTypeSpecific.className = GPBStringifySymbol(OpDesc_Var), + .number = OpDesc_FieldNumber_InputsArray, + .hasIndex = GPBNoHasBit, + .offset = (uint32_t)offsetof(OpDesc__storage_, inputsArray), + .flags = GPBFieldRepeated, + .dataType = GPBDataTypeMessage, + }, + { + .name = "outputsArray", + .dataTypeSpecific.className = GPBStringifySymbol(OpDesc_Var), + .number = OpDesc_FieldNumber_OutputsArray, + .hasIndex = GPBNoHasBit, + .offset = (uint32_t)offsetof(OpDesc__storage_, outputsArray), + .flags = GPBFieldRepeated, + .dataType = GPBDataTypeMessage, + }, + { + .name = "type", + .dataTypeSpecific.className = NULL, + .number = OpDesc_FieldNumber_Type, + .hasIndex = 0, + .offset = (uint32_t)offsetof(OpDesc__storage_, type), + .flags = GPBFieldRequired, + .dataType = GPBDataTypeString, + }, + { + .name = "attrsArray", + .dataTypeSpecific.className = GPBStringifySymbol(OpDesc_Attr), + .number = OpDesc_FieldNumber_AttrsArray, + .hasIndex = GPBNoHasBit, + .offset = (uint32_t)offsetof(OpDesc__storage_, attrsArray), + .flags = GPBFieldRepeated, + .dataType = GPBDataTypeMessage, + }, + { + .name = "isTarget", + .dataTypeSpecific.className = NULL, + .number = OpDesc_FieldNumber_IsTarget, + .hasIndex = 1, + .offset = 2, // Stored in _has_storage_ to save space. + .flags = GPBFieldOptional | GPBFieldHasDefaultValue, + .dataType = GPBDataTypeBool, + }, + }; + GPBDescriptor *localDescriptor = + [GPBDescriptor allocDescriptorForClass:[OpDesc class] + rootClass:[FrameworkRoot class] + file:FrameworkRoot_FileDescriptor() + fields:fields + fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription)) + storageSize:sizeof(OpDesc__storage_) + flags:0]; + NSAssert(descriptor == nil, @"Startup recursed!"); + descriptor = localDescriptor; + } + return descriptor; +} + +@end + +#pragma mark - OpDesc_Attr + +@implementation OpDesc_Attr + +@dynamic hasName, name; +@dynamic hasType, type; +@dynamic hasI, i; +@dynamic hasF, f; +@dynamic hasS, s; +@dynamic intsArray, intsArray_Count; +@dynamic floatsArray, floatsArray_Count; +@dynamic stringsArray, stringsArray_Count; +@dynamic hasB, b; +@dynamic boolsArray, boolsArray_Count; +@dynamic hasBlockIdx, blockIdx; +@dynamic hasL, l; +@dynamic blocksIdxArray, blocksIdxArray_Count; + +typedef struct OpDesc_Attr__storage_ { + uint32_t _has_storage_[1]; + AttrType type; + int32_t i; + float f; + int32_t blockIdx; + NSString *name; + NSString *s; + GPBInt32Array *intsArray; + GPBFloatArray *floatsArray; + NSMutableArray *stringsArray; + GPBBoolArray *boolsArray; + GPBInt32Array *blocksIdxArray; + int64_t l; +} OpDesc_Attr__storage_; + +// This method is threadsafe because it is initially called +// in +initialize for each subclass. ++ (GPBDescriptor *)descriptor { + static GPBDescriptor *descriptor = nil; + if (!descriptor) { + static GPBMessageFieldDescription fields[] = { + { + .name = "name", + .dataTypeSpecific.className = NULL, + .number = OpDesc_Attr_FieldNumber_Name, + .hasIndex = 0, + .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, name), + .flags = GPBFieldRequired, + .dataType = GPBDataTypeString, + }, + { + .name = "type", + .dataTypeSpecific.enumDescFunc = AttrType_EnumDescriptor, + .number = OpDesc_Attr_FieldNumber_Type, + .hasIndex = 1, + .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, type), + .flags = GPBFieldRequired | GPBFieldHasEnumDescriptor, + .dataType = GPBDataTypeEnum, + }, + { + .name = "i", + .dataTypeSpecific.className = NULL, + .number = OpDesc_Attr_FieldNumber_I, + .hasIndex = 2, + .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, i), + .flags = GPBFieldOptional, + .dataType = GPBDataTypeInt32, + }, + { + .name = "f", + .dataTypeSpecific.className = NULL, + .number = OpDesc_Attr_FieldNumber_F, + .hasIndex = 3, + .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, f), + .flags = GPBFieldOptional, + .dataType = GPBDataTypeFloat, + }, + { + .name = "s", + .dataTypeSpecific.className = NULL, + .number = OpDesc_Attr_FieldNumber_S, + .hasIndex = 4, + .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, s), + .flags = GPBFieldOptional, + .dataType = GPBDataTypeString, + }, + { + .name = "intsArray", + .dataTypeSpecific.className = NULL, + .number = OpDesc_Attr_FieldNumber_IntsArray, + .hasIndex = GPBNoHasBit, + .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, intsArray), + .flags = GPBFieldRepeated, + .dataType = GPBDataTypeInt32, + }, + { + .name = "floatsArray", + .dataTypeSpecific.className = NULL, + .number = OpDesc_Attr_FieldNumber_FloatsArray, + .hasIndex = GPBNoHasBit, + .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, floatsArray), + .flags = GPBFieldRepeated, + .dataType = GPBDataTypeFloat, + }, + { + .name = "stringsArray", + .dataTypeSpecific.className = NULL, + .number = OpDesc_Attr_FieldNumber_StringsArray, + .hasIndex = GPBNoHasBit, + .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, stringsArray), + .flags = GPBFieldRepeated, + .dataType = GPBDataTypeString, + }, + { + .name = "b", + .dataTypeSpecific.className = NULL, + .number = OpDesc_Attr_FieldNumber_B, + .hasIndex = 5, + .offset = 6, // Stored in _has_storage_ to save space. + .flags = GPBFieldOptional, + .dataType = GPBDataTypeBool, + }, + { + .name = "boolsArray", + .dataTypeSpecific.className = NULL, + .number = OpDesc_Attr_FieldNumber_BoolsArray, + .hasIndex = GPBNoHasBit, + .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, boolsArray), + .flags = GPBFieldRepeated, + .dataType = GPBDataTypeBool, + }, + { + .name = "blockIdx", + .dataTypeSpecific.className = NULL, + .number = OpDesc_Attr_FieldNumber_BlockIdx, + .hasIndex = 7, + .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, blockIdx), + .flags = GPBFieldOptional, + .dataType = GPBDataTypeInt32, + }, + { + .name = "l", + .dataTypeSpecific.className = NULL, + .number = OpDesc_Attr_FieldNumber_L, + .hasIndex = 8, + .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, l), + .flags = GPBFieldOptional, + .dataType = GPBDataTypeInt64, + }, + { + .name = "blocksIdxArray", + .dataTypeSpecific.className = NULL, + .number = OpDesc_Attr_FieldNumber_BlocksIdxArray, + .hasIndex = GPBNoHasBit, + .offset = (uint32_t)offsetof(OpDesc_Attr__storage_, blocksIdxArray), + .flags = GPBFieldRepeated, + .dataType = GPBDataTypeInt32, + }, + }; + GPBDescriptor *localDescriptor = + [GPBDescriptor allocDescriptorForClass:[OpDesc_Attr class] + rootClass:[FrameworkRoot class] + file:FrameworkRoot_FileDescriptor() + fields:fields + fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription)) + storageSize:sizeof(OpDesc_Attr__storage_) + flags:0]; + NSAssert(descriptor == nil, @"Startup recursed!"); + descriptor = localDescriptor; + } + return descriptor; +} + +@end + +#pragma mark - OpDesc_Var + +@implementation OpDesc_Var + +@dynamic hasParameter, parameter; +@dynamic argumentsArray, argumentsArray_Count; + +typedef struct OpDesc_Var__storage_ { + uint32_t _has_storage_[1]; + NSString *parameter; + NSMutableArray *argumentsArray; +} OpDesc_Var__storage_; + +// This method is threadsafe because it is initially called +// in +initialize for each subclass. ++ (GPBDescriptor *)descriptor { + static GPBDescriptor *descriptor = nil; + if (!descriptor) { + static GPBMessageFieldDescription fields[] = { + { + .name = "parameter", + .dataTypeSpecific.className = NULL, + .number = OpDesc_Var_FieldNumber_Parameter, + .hasIndex = 0, + .offset = (uint32_t)offsetof(OpDesc_Var__storage_, parameter), + .flags = GPBFieldRequired, + .dataType = GPBDataTypeString, + }, + { + .name = "argumentsArray", + .dataTypeSpecific.className = NULL, + .number = OpDesc_Var_FieldNumber_ArgumentsArray, + .hasIndex = GPBNoHasBit, + .offset = (uint32_t)offsetof(OpDesc_Var__storage_, argumentsArray), + .flags = GPBFieldRepeated, + .dataType = GPBDataTypeString, + }, + }; + GPBDescriptor *localDescriptor = + [GPBDescriptor allocDescriptorForClass:[OpDesc_Var class] + rootClass:[FrameworkRoot class] + file:FrameworkRoot_FileDescriptor() + fields:fields + fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription)) + storageSize:sizeof(OpDesc_Var__storage_) + flags:0]; + NSAssert(descriptor == nil, @"Startup recursed!"); + descriptor = localDescriptor; + } + return descriptor; +} + +@end + +#pragma mark - OpProto + +@implementation OpProto + +@dynamic hasType, type; +@dynamic inputsArray, inputsArray_Count; +@dynamic outputsArray, outputsArray_Count; +@dynamic attrsArray, attrsArray_Count; +@dynamic hasComment, comment; + +typedef struct OpProto__storage_ { + uint32_t _has_storage_[1]; + NSString *type; + NSMutableArray *inputsArray; + NSMutableArray *outputsArray; + NSMutableArray *attrsArray; + NSString *comment; +} OpProto__storage_; + +// This method is threadsafe because it is initially called +// in +initialize for each subclass. ++ (GPBDescriptor *)descriptor { + static GPBDescriptor *descriptor = nil; + if (!descriptor) { + static GPBMessageFieldDescription fields[] = { + { + .name = "type", + .dataTypeSpecific.className = NULL, + .number = OpProto_FieldNumber_Type, + .hasIndex = 0, + .offset = (uint32_t)offsetof(OpProto__storage_, type), + .flags = GPBFieldRequired, + .dataType = GPBDataTypeString, + }, + { + .name = "inputsArray", + .dataTypeSpecific.className = GPBStringifySymbol(OpProto_Var), + .number = OpProto_FieldNumber_InputsArray, + .hasIndex = GPBNoHasBit, + .offset = (uint32_t)offsetof(OpProto__storage_, inputsArray), + .flags = GPBFieldRepeated, + .dataType = GPBDataTypeMessage, + }, + { + .name = "outputsArray", + .dataTypeSpecific.className = GPBStringifySymbol(OpProto_Var), + .number = OpProto_FieldNumber_OutputsArray, + .hasIndex = GPBNoHasBit, + .offset = (uint32_t)offsetof(OpProto__storage_, outputsArray), + .flags = GPBFieldRepeated, + .dataType = GPBDataTypeMessage, + }, + { + .name = "attrsArray", + .dataTypeSpecific.className = GPBStringifySymbol(OpProto_Attr), + .number = OpProto_FieldNumber_AttrsArray, + .hasIndex = GPBNoHasBit, + .offset = (uint32_t)offsetof(OpProto__storage_, attrsArray), + .flags = GPBFieldRepeated, + .dataType = GPBDataTypeMessage, + }, + { + .name = "comment", + .dataTypeSpecific.className = NULL, + .number = OpProto_FieldNumber_Comment, + .hasIndex = 1, + .offset = (uint32_t)offsetof(OpProto__storage_, comment), + .flags = GPBFieldRequired, + .dataType = GPBDataTypeString, + }, + }; + GPBDescriptor *localDescriptor = + [GPBDescriptor allocDescriptorForClass:[OpProto class] + rootClass:[FrameworkRoot class] + file:FrameworkRoot_FileDescriptor() + fields:fields + fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription)) + storageSize:sizeof(OpProto__storage_) + flags:0]; + NSAssert(descriptor == nil, @"Startup recursed!"); + descriptor = localDescriptor; + } + return descriptor; +} + +@end + +#pragma mark - OpProto_Var + +@implementation OpProto_Var + +@dynamic hasName, name; +@dynamic hasComment, comment; +@dynamic hasDuplicable, duplicable; +@dynamic hasIntermediate, intermediate; +@dynamic hasDispensable, dispensable; +@dynamic hasReuse, reuse; + +typedef struct OpProto_Var__storage_ { + uint32_t _has_storage_[1]; + NSString *name; + NSString *comment; + NSString *reuse; +} OpProto_Var__storage_; + +// This method is threadsafe because it is initially called +// in +initialize for each subclass. ++ (GPBDescriptor *)descriptor { + static GPBDescriptor *descriptor = nil; + if (!descriptor) { + static GPBMessageFieldDescription fields[] = { + { + .name = "name", + .dataTypeSpecific.className = NULL, + .number = OpProto_Var_FieldNumber_Name, + .hasIndex = 0, + .offset = (uint32_t)offsetof(OpProto_Var__storage_, name), + .flags = GPBFieldRequired, + .dataType = GPBDataTypeString, + }, + { + .name = "comment", + .dataTypeSpecific.className = NULL, + .number = OpProto_Var_FieldNumber_Comment, + .hasIndex = 1, + .offset = (uint32_t)offsetof(OpProto_Var__storage_, comment), + .flags = GPBFieldRequired, + .dataType = GPBDataTypeString, + }, + { + .name = "duplicable", + .dataTypeSpecific.className = NULL, + .number = OpProto_Var_FieldNumber_Duplicable, + .hasIndex = 2, + .offset = 3, // Stored in _has_storage_ to save space. + .flags = GPBFieldOptional | GPBFieldHasDefaultValue, + .dataType = GPBDataTypeBool, + }, + { + .name = "intermediate", + .dataTypeSpecific.className = NULL, + .number = OpProto_Var_FieldNumber_Intermediate, + .hasIndex = 4, + .offset = 5, // Stored in _has_storage_ to save space. + .flags = GPBFieldOptional | GPBFieldHasDefaultValue, + .dataType = GPBDataTypeBool, + }, + { + .name = "dispensable", + .dataTypeSpecific.className = NULL, + .number = OpProto_Var_FieldNumber_Dispensable, + .hasIndex = 6, + .offset = 7, // Stored in _has_storage_ to save space. + .flags = GPBFieldOptional | GPBFieldHasDefaultValue, + .dataType = GPBDataTypeBool, + }, + { + .name = "reuse", + .dataTypeSpecific.className = NULL, + .number = OpProto_Var_FieldNumber_Reuse, + .hasIndex = 8, + .offset = (uint32_t)offsetof(OpProto_Var__storage_, reuse), + .flags = GPBFieldOptional, + .dataType = GPBDataTypeString, + }, + }; + GPBDescriptor *localDescriptor = + [GPBDescriptor allocDescriptorForClass:[OpProto_Var class] + rootClass:[FrameworkRoot class] + file:FrameworkRoot_FileDescriptor() + fields:fields + fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription)) + storageSize:sizeof(OpProto_Var__storage_) + flags:0]; + NSAssert(descriptor == nil, @"Startup recursed!"); + descriptor = localDescriptor; + } + return descriptor; +} + +@end + +#pragma mark - OpProto_Attr + +@implementation OpProto_Attr + +@dynamic hasName, name; +@dynamic hasType, type; +@dynamic hasComment, comment; +@dynamic hasGenerated, generated; + +typedef struct OpProto_Attr__storage_ { + uint32_t _has_storage_[1]; + AttrType type; + NSString *name; + NSString *comment; +} OpProto_Attr__storage_; + +// This method is threadsafe because it is initially called +// in +initialize for each subclass. ++ (GPBDescriptor *)descriptor { + static GPBDescriptor *descriptor = nil; + if (!descriptor) { + static GPBMessageFieldDescription fields[] = { + { + .name = "name", + .dataTypeSpecific.className = NULL, + .number = OpProto_Attr_FieldNumber_Name, + .hasIndex = 0, + .offset = (uint32_t)offsetof(OpProto_Attr__storage_, name), + .flags = GPBFieldRequired, + .dataType = GPBDataTypeString, + }, + { + .name = "type", + .dataTypeSpecific.enumDescFunc = AttrType_EnumDescriptor, + .number = OpProto_Attr_FieldNumber_Type, + .hasIndex = 1, + .offset = (uint32_t)offsetof(OpProto_Attr__storage_, type), + .flags = GPBFieldRequired | GPBFieldHasEnumDescriptor, + .dataType = GPBDataTypeEnum, + }, + { + .name = "comment", + .dataTypeSpecific.className = NULL, + .number = OpProto_Attr_FieldNumber_Comment, + .hasIndex = 2, + .offset = (uint32_t)offsetof(OpProto_Attr__storage_, comment), + .flags = GPBFieldRequired, + .dataType = GPBDataTypeString, + }, + { + .name = "generated", + .dataTypeSpecific.className = NULL, + .number = OpProto_Attr_FieldNumber_Generated, + .hasIndex = 3, + .offset = 4, // Stored in _has_storage_ to save space. + .flags = GPBFieldOptional | GPBFieldHasDefaultValue, + .dataType = GPBDataTypeBool, + }, + }; + GPBDescriptor *localDescriptor = + [GPBDescriptor allocDescriptorForClass:[OpProto_Attr class] + rootClass:[FrameworkRoot class] + file:FrameworkRoot_FileDescriptor() + fields:fields + fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription)) + storageSize:sizeof(OpProto_Attr__storage_) + flags:0]; + NSAssert(descriptor == nil, @"Startup recursed!"); + descriptor = localDescriptor; + } + return descriptor; +} + +@end + +#pragma mark - VarType + +@implementation VarType + +@dynamic hasType, type; +@dynamic hasSelectedRows, selectedRows; +@dynamic hasLodTensor, lodTensor; +@dynamic hasTensorArray_p, tensorArray_p; +@dynamic hasReader, reader; +@dynamic hasChannel, channel; +@dynamic hasTuple, tuple; + +typedef struct VarType__storage_ { + uint32_t _has_storage_[1]; + VarType_Type type; + VarType_TensorDesc *selectedRows; + VarType_LoDTensorDesc *lodTensor; + VarType_LoDTensorArrayDesc *tensorArray_p; + VarType_ReaderDesc *reader; + VarType_ChannelDesc *channel; + VarType_Tuple *tuple; +} VarType__storage_; + +// This method is threadsafe because it is initially called +// in +initialize for each subclass. ++ (GPBDescriptor *)descriptor { + static GPBDescriptor *descriptor = nil; + if (!descriptor) { + static GPBMessageFieldDescription fields[] = { + { + .name = "type", + .dataTypeSpecific.enumDescFunc = VarType_Type_EnumDescriptor, + .number = VarType_FieldNumber_Type, + .hasIndex = 0, + .offset = (uint32_t)offsetof(VarType__storage_, type), + .flags = GPBFieldRequired | GPBFieldHasEnumDescriptor, + .dataType = GPBDataTypeEnum, + }, + { + .name = "selectedRows", + .dataTypeSpecific.className = GPBStringifySymbol(VarType_TensorDesc), + .number = VarType_FieldNumber_SelectedRows, + .hasIndex = 1, + .offset = (uint32_t)offsetof(VarType__storage_, selectedRows), + .flags = GPBFieldOptional, + .dataType = GPBDataTypeMessage, + }, + { + .name = "lodTensor", + .dataTypeSpecific.className = GPBStringifySymbol(VarType_LoDTensorDesc), + .number = VarType_FieldNumber_LodTensor, + .hasIndex = 2, + .offset = (uint32_t)offsetof(VarType__storage_, lodTensor), + .flags = GPBFieldOptional, + .dataType = GPBDataTypeMessage, + }, + { + .name = "tensorArray_p", + .dataTypeSpecific.className = GPBStringifySymbol(VarType_LoDTensorArrayDesc), + .number = VarType_FieldNumber_TensorArray_p, + .hasIndex = 3, + .offset = (uint32_t)offsetof(VarType__storage_, tensorArray_p), + .flags = GPBFieldOptional, + .dataType = GPBDataTypeMessage, + }, + { + .name = "reader", + .dataTypeSpecific.className = GPBStringifySymbol(VarType_ReaderDesc), + .number = VarType_FieldNumber_Reader, + .hasIndex = 4, + .offset = (uint32_t)offsetof(VarType__storage_, reader), + .flags = GPBFieldOptional, + .dataType = GPBDataTypeMessage, + }, + { + .name = "channel", + .dataTypeSpecific.className = GPBStringifySymbol(VarType_ChannelDesc), + .number = VarType_FieldNumber_Channel, + .hasIndex = 5, + .offset = (uint32_t)offsetof(VarType__storage_, channel), + .flags = GPBFieldOptional, + .dataType = GPBDataTypeMessage, + }, + { + .name = "tuple", + .dataTypeSpecific.className = GPBStringifySymbol(VarType_Tuple), + .number = VarType_FieldNumber_Tuple, + .hasIndex = 6, + .offset = (uint32_t)offsetof(VarType__storage_, tuple), + .flags = GPBFieldOptional, + .dataType = GPBDataTypeMessage, + }, + }; + GPBDescriptor *localDescriptor = + [GPBDescriptor allocDescriptorForClass:[VarType class] + rootClass:[FrameworkRoot class] + file:FrameworkRoot_FileDescriptor() + fields:fields + fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription)) + storageSize:sizeof(VarType__storage_) + flags:0]; + NSAssert(descriptor == nil, @"Startup recursed!"); + descriptor = localDescriptor; + } + return descriptor; +} + +@end + +#pragma mark - Enum VarType_Type + +GPBEnumDescriptor *VarType_Type_EnumDescriptor(void) { + static GPBEnumDescriptor *descriptor = NULL; + if (!descriptor) { + static const char *valueNames = + "Bool\000Int16\000Int32\000Int64\000Fp16\000Fp32\000Fp64\000Si" + "zeT\000Uint8\000Int8\000LodTensor\000SelectedRows\000Fe" + "edMinibatch\000FetchList\000StepScopes\000LodRank" + "Table\000LodTensorArray\000PlaceList\000Reader\000Ch" + "annel\000Raw\000Tuple\000"; + static const int32_t values[] = { + VarType_Type_Bool, + VarType_Type_Int16, + VarType_Type_Int32, + VarType_Type_Int64, + VarType_Type_Fp16, + VarType_Type_Fp32, + VarType_Type_Fp64, + VarType_Type_SizeT, + VarType_Type_Uint8, + VarType_Type_Int8, + VarType_Type_LodTensor, + VarType_Type_SelectedRows, + VarType_Type_FeedMinibatch, + VarType_Type_FetchList, + VarType_Type_StepScopes, + VarType_Type_LodRankTable, + VarType_Type_LodTensorArray, + VarType_Type_PlaceList, + VarType_Type_Reader, + VarType_Type_Channel, + VarType_Type_Raw, + VarType_Type_Tuple, + }; + GPBEnumDescriptor *worker = + [GPBEnumDescriptor allocDescriptorForName:GPBNSStringifySymbol(VarType_Type) + valueNames:valueNames + values:values + count:(uint32_t)(sizeof(values) / sizeof(int32_t)) + enumVerifier:VarType_Type_IsValidValue]; + if (!OSAtomicCompareAndSwapPtrBarrier(nil, worker, (void * volatile *)&descriptor)) { + [worker release]; + } + } + return descriptor; +} + +BOOL VarType_Type_IsValidValue(int32_t value__) { + switch (value__) { + case VarType_Type_Bool: + case VarType_Type_Int16: + case VarType_Type_Int32: + case VarType_Type_Int64: + case VarType_Type_Fp16: + case VarType_Type_Fp32: + case VarType_Type_Fp64: + case VarType_Type_SizeT: + case VarType_Type_Uint8: + case VarType_Type_Int8: + case VarType_Type_LodTensor: + case VarType_Type_SelectedRows: + case VarType_Type_FeedMinibatch: + case VarType_Type_FetchList: + case VarType_Type_StepScopes: + case VarType_Type_LodRankTable: + case VarType_Type_LodTensorArray: + case VarType_Type_PlaceList: + case VarType_Type_Reader: + case VarType_Type_Channel: + case VarType_Type_Raw: + case VarType_Type_Tuple: + return YES; + default: + return NO; + } +} + +#pragma mark - VarType_TensorDesc + +@implementation VarType_TensorDesc + +@dynamic hasDataType, dataType; +@dynamic dimsArray, dimsArray_Count; + +typedef struct VarType_TensorDesc__storage_ { + uint32_t _has_storage_[1]; + VarType_Type dataType; + GPBInt64Array *dimsArray; +} VarType_TensorDesc__storage_; + +// This method is threadsafe because it is initially called +// in +initialize for each subclass. ++ (GPBDescriptor *)descriptor { + static GPBDescriptor *descriptor = nil; + if (!descriptor) { + static GPBMessageFieldDescription fields[] = { + { + .name = "dataType", + .dataTypeSpecific.enumDescFunc = VarType_Type_EnumDescriptor, + .number = VarType_TensorDesc_FieldNumber_DataType, + .hasIndex = 0, + .offset = (uint32_t)offsetof(VarType_TensorDesc__storage_, dataType), + .flags = GPBFieldRequired | GPBFieldHasEnumDescriptor, + .dataType = GPBDataTypeEnum, + }, + { + .name = "dimsArray", + .dataTypeSpecific.className = NULL, + .number = VarType_TensorDesc_FieldNumber_DimsArray, + .hasIndex = GPBNoHasBit, + .offset = (uint32_t)offsetof(VarType_TensorDesc__storage_, dimsArray), + .flags = GPBFieldRepeated, + .dataType = GPBDataTypeInt64, + }, + }; + GPBDescriptor *localDescriptor = + [GPBDescriptor allocDescriptorForClass:[VarType_TensorDesc class] + rootClass:[FrameworkRoot class] + file:FrameworkRoot_FileDescriptor() + fields:fields + fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription)) + storageSize:sizeof(VarType_TensorDesc__storage_) + flags:0]; + NSAssert(descriptor == nil, @"Startup recursed!"); + descriptor = localDescriptor; + } + return descriptor; +} + +@end + +#pragma mark - VarType_LoDTensorDesc + +@implementation VarType_LoDTensorDesc + +@dynamic hasTensor, tensor; +@dynamic hasLodLevel, lodLevel; + +typedef struct VarType_LoDTensorDesc__storage_ { + uint32_t _has_storage_[1]; + int32_t lodLevel; + VarType_TensorDesc *tensor; +} VarType_LoDTensorDesc__storage_; + +// This method is threadsafe because it is initially called +// in +initialize for each subclass. ++ (GPBDescriptor *)descriptor { + static GPBDescriptor *descriptor = nil; + if (!descriptor) { + static GPBMessageFieldDescription fields[] = { + { + .name = "tensor", + .dataTypeSpecific.className = GPBStringifySymbol(VarType_TensorDesc), + .number = VarType_LoDTensorDesc_FieldNumber_Tensor, + .hasIndex = 0, + .offset = (uint32_t)offsetof(VarType_LoDTensorDesc__storage_, tensor), + .flags = GPBFieldRequired, + .dataType = GPBDataTypeMessage, + }, + { + .name = "lodLevel", + .dataTypeSpecific.className = NULL, + .number = VarType_LoDTensorDesc_FieldNumber_LodLevel, + .hasIndex = 1, + .offset = (uint32_t)offsetof(VarType_LoDTensorDesc__storage_, lodLevel), + .flags = GPBFieldOptional | GPBFieldHasDefaultValue, + .dataType = GPBDataTypeInt32, + }, + }; + GPBDescriptor *localDescriptor = + [GPBDescriptor allocDescriptorForClass:[VarType_LoDTensorDesc class] + rootClass:[FrameworkRoot class] + file:FrameworkRoot_FileDescriptor() + fields:fields + fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription)) + storageSize:sizeof(VarType_LoDTensorDesc__storage_) + flags:0]; + NSAssert(descriptor == nil, @"Startup recursed!"); + descriptor = localDescriptor; + } + return descriptor; +} + +@end + +#pragma mark - VarType_LoDTensorArrayDesc + +@implementation VarType_LoDTensorArrayDesc + +@dynamic hasTensor, tensor; +@dynamic hasLodLevel, lodLevel; + +typedef struct VarType_LoDTensorArrayDesc__storage_ { + uint32_t _has_storage_[1]; + int32_t lodLevel; + VarType_TensorDesc *tensor; +} VarType_LoDTensorArrayDesc__storage_; + +// This method is threadsafe because it is initially called +// in +initialize for each subclass. ++ (GPBDescriptor *)descriptor { + static GPBDescriptor *descriptor = nil; + if (!descriptor) { + static GPBMessageFieldDescription fields[] = { + { + .name = "tensor", + .dataTypeSpecific.className = GPBStringifySymbol(VarType_TensorDesc), + .number = VarType_LoDTensorArrayDesc_FieldNumber_Tensor, + .hasIndex = 0, + .offset = (uint32_t)offsetof(VarType_LoDTensorArrayDesc__storage_, tensor), + .flags = GPBFieldRequired, + .dataType = GPBDataTypeMessage, + }, + { + .name = "lodLevel", + .dataTypeSpecific.className = NULL, + .number = VarType_LoDTensorArrayDesc_FieldNumber_LodLevel, + .hasIndex = 1, + .offset = (uint32_t)offsetof(VarType_LoDTensorArrayDesc__storage_, lodLevel), + .flags = GPBFieldOptional | GPBFieldHasDefaultValue, + .dataType = GPBDataTypeInt32, + }, + }; + GPBDescriptor *localDescriptor = + [GPBDescriptor allocDescriptorForClass:[VarType_LoDTensorArrayDesc class] + rootClass:[FrameworkRoot class] + file:FrameworkRoot_FileDescriptor() + fields:fields + fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription)) + storageSize:sizeof(VarType_LoDTensorArrayDesc__storage_) + flags:0]; + NSAssert(descriptor == nil, @"Startup recursed!"); + descriptor = localDescriptor; + } + return descriptor; +} + +@end + +#pragma mark - VarType_ReaderDesc + +@implementation VarType_ReaderDesc + +@dynamic lodTensorArray, lodTensorArray_Count; + +typedef struct VarType_ReaderDesc__storage_ { + uint32_t _has_storage_[1]; + NSMutableArray *lodTensorArray; +} VarType_ReaderDesc__storage_; + +// This method is threadsafe because it is initially called +// in +initialize for each subclass. ++ (GPBDescriptor *)descriptor { + static GPBDescriptor *descriptor = nil; + if (!descriptor) { + static GPBMessageFieldDescription fields[] = { + { + .name = "lodTensorArray", + .dataTypeSpecific.className = GPBStringifySymbol(VarType_LoDTensorDesc), + .number = VarType_ReaderDesc_FieldNumber_LodTensorArray, + .hasIndex = GPBNoHasBit, + .offset = (uint32_t)offsetof(VarType_ReaderDesc__storage_, lodTensorArray), + .flags = GPBFieldRepeated, + .dataType = GPBDataTypeMessage, + }, + }; + GPBDescriptor *localDescriptor = + [GPBDescriptor allocDescriptorForClass:[VarType_ReaderDesc class] + rootClass:[FrameworkRoot class] + file:FrameworkRoot_FileDescriptor() + fields:fields + fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription)) + storageSize:sizeof(VarType_ReaderDesc__storage_) + flags:0]; + NSAssert(descriptor == nil, @"Startup recursed!"); + descriptor = localDescriptor; + } + return descriptor; +} + +@end + +#pragma mark - VarType_ChannelDesc + +@implementation VarType_ChannelDesc + +@dynamic hasDataType, dataType; +@dynamic hasCapacity, capacity; + +typedef struct VarType_ChannelDesc__storage_ { + uint32_t _has_storage_[1]; + VarType_Type dataType; + int64_t capacity; +} VarType_ChannelDesc__storage_; + +// This method is threadsafe because it is initially called +// in +initialize for each subclass. ++ (GPBDescriptor *)descriptor { + static GPBDescriptor *descriptor = nil; + if (!descriptor) { + static GPBMessageFieldDescription fields[] = { + { + .name = "dataType", + .dataTypeSpecific.enumDescFunc = VarType_Type_EnumDescriptor, + .number = VarType_ChannelDesc_FieldNumber_DataType, + .hasIndex = 0, + .offset = (uint32_t)offsetof(VarType_ChannelDesc__storage_, dataType), + .flags = GPBFieldRequired | GPBFieldHasEnumDescriptor, + .dataType = GPBDataTypeEnum, + }, + { + .name = "capacity", + .dataTypeSpecific.className = NULL, + .number = VarType_ChannelDesc_FieldNumber_Capacity, + .hasIndex = 1, + .offset = (uint32_t)offsetof(VarType_ChannelDesc__storage_, capacity), + .flags = GPBFieldRequired, + .dataType = GPBDataTypeInt64, + }, + }; + GPBDescriptor *localDescriptor = + [GPBDescriptor allocDescriptorForClass:[VarType_ChannelDesc class] + rootClass:[FrameworkRoot class] + file:FrameworkRoot_FileDescriptor() + fields:fields + fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription)) + storageSize:sizeof(VarType_ChannelDesc__storage_) + flags:0]; + NSAssert(descriptor == nil, @"Startup recursed!"); + descriptor = localDescriptor; + } + return descriptor; +} + +@end + +#pragma mark - VarType_Tuple + +@implementation VarType_Tuple + +@dynamic elementTypeArray, elementTypeArray_Count; + +typedef struct VarType_Tuple__storage_ { + uint32_t _has_storage_[1]; + GPBEnumArray *elementTypeArray; +} VarType_Tuple__storage_; + +// This method is threadsafe because it is initially called +// in +initialize for each subclass. ++ (GPBDescriptor *)descriptor { + static GPBDescriptor *descriptor = nil; + if (!descriptor) { + static GPBMessageFieldDescription fields[] = { + { + .name = "elementTypeArray", + .dataTypeSpecific.enumDescFunc = VarType_Type_EnumDescriptor, + .number = VarType_Tuple_FieldNumber_ElementTypeArray, + .hasIndex = GPBNoHasBit, + .offset = (uint32_t)offsetof(VarType_Tuple__storage_, elementTypeArray), + .flags = GPBFieldRepeated | GPBFieldHasEnumDescriptor, + .dataType = GPBDataTypeEnum, + }, + }; + GPBDescriptor *localDescriptor = + [GPBDescriptor allocDescriptorForClass:[VarType_Tuple class] + rootClass:[FrameworkRoot class] + file:FrameworkRoot_FileDescriptor() + fields:fields + fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription)) + storageSize:sizeof(VarType_Tuple__storage_) + flags:0]; + NSAssert(descriptor == nil, @"Startup recursed!"); + descriptor = localDescriptor; + } + return descriptor; +} + +@end + +#pragma mark - VarDesc + +@implementation VarDesc + +@dynamic hasName, name; +@dynamic hasType, type; +@dynamic hasPersistable, persistable; + +typedef struct VarDesc__storage_ { + uint32_t _has_storage_[1]; + NSString *name; + VarType *type; +} VarDesc__storage_; + +// This method is threadsafe because it is initially called +// in +initialize for each subclass. ++ (GPBDescriptor *)descriptor { + static GPBDescriptor *descriptor = nil; + if (!descriptor) { + static GPBMessageFieldDescription fields[] = { + { + .name = "name", + .dataTypeSpecific.className = NULL, + .number = VarDesc_FieldNumber_Name, + .hasIndex = 0, + .offset = (uint32_t)offsetof(VarDesc__storage_, name), + .flags = GPBFieldRequired, + .dataType = GPBDataTypeString, + }, + { + .name = "type", + .dataTypeSpecific.className = GPBStringifySymbol(VarType), + .number = VarDesc_FieldNumber_Type, + .hasIndex = 1, + .offset = (uint32_t)offsetof(VarDesc__storage_, type), + .flags = GPBFieldRequired, + .dataType = GPBDataTypeMessage, + }, + { + .name = "persistable", + .dataTypeSpecific.className = NULL, + .number = VarDesc_FieldNumber_Persistable, + .hasIndex = 2, + .offset = 3, // Stored in _has_storage_ to save space. + .flags = GPBFieldOptional | GPBFieldHasDefaultValue, + .dataType = GPBDataTypeBool, + }, + }; + GPBDescriptor *localDescriptor = + [GPBDescriptor allocDescriptorForClass:[VarDesc class] + rootClass:[FrameworkRoot class] + file:FrameworkRoot_FileDescriptor() + fields:fields + fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription)) + storageSize:sizeof(VarDesc__storage_) + flags:0]; + NSAssert(descriptor == nil, @"Startup recursed!"); + descriptor = localDescriptor; + } + return descriptor; +} + +@end + +#pragma mark - BlockDesc + +@implementation BlockDesc + +@dynamic hasIdx, idx; +@dynamic hasParentIdx, parentIdx; +@dynamic varsArray, varsArray_Count; +@dynamic opsArray, opsArray_Count; +@dynamic hasForwardBlockIdx, forwardBlockIdx; + +typedef struct BlockDesc__storage_ { + uint32_t _has_storage_[1]; + int32_t idx; + int32_t parentIdx; + int32_t forwardBlockIdx; + NSMutableArray *varsArray; + NSMutableArray *opsArray; +} BlockDesc__storage_; + +// This method is threadsafe because it is initially called +// in +initialize for each subclass. ++ (GPBDescriptor *)descriptor { + static GPBDescriptor *descriptor = nil; + if (!descriptor) { + static GPBMessageFieldDescriptionWithDefault fields[] = { + { + .defaultValue.valueInt32 = 0, + .core.name = "idx", + .core.dataTypeSpecific.className = NULL, + .core.number = BlockDesc_FieldNumber_Idx, + .core.hasIndex = 0, + .core.offset = (uint32_t)offsetof(BlockDesc__storage_, idx), + .core.flags = GPBFieldRequired, + .core.dataType = GPBDataTypeInt32, + }, + { + .defaultValue.valueInt32 = 0, + .core.name = "parentIdx", + .core.dataTypeSpecific.className = NULL, + .core.number = BlockDesc_FieldNumber_ParentIdx, + .core.hasIndex = 1, + .core.offset = (uint32_t)offsetof(BlockDesc__storage_, parentIdx), + .core.flags = GPBFieldRequired, + .core.dataType = GPBDataTypeInt32, + }, + { + .defaultValue.valueMessage = nil, + .core.name = "varsArray", + .core.dataTypeSpecific.className = GPBStringifySymbol(VarDesc), + .core.number = BlockDesc_FieldNumber_VarsArray, + .core.hasIndex = GPBNoHasBit, + .core.offset = (uint32_t)offsetof(BlockDesc__storage_, varsArray), + .core.flags = GPBFieldRepeated, + .core.dataType = GPBDataTypeMessage, + }, + { + .defaultValue.valueMessage = nil, + .core.name = "opsArray", + .core.dataTypeSpecific.className = GPBStringifySymbol(OpDesc), + .core.number = BlockDesc_FieldNumber_OpsArray, + .core.hasIndex = GPBNoHasBit, + .core.offset = (uint32_t)offsetof(BlockDesc__storage_, opsArray), + .core.flags = GPBFieldRepeated, + .core.dataType = GPBDataTypeMessage, + }, + { + .defaultValue.valueInt32 = -1, + .core.name = "forwardBlockIdx", + .core.dataTypeSpecific.className = NULL, + .core.number = BlockDesc_FieldNumber_ForwardBlockIdx, + .core.hasIndex = 2, + .core.offset = (uint32_t)offsetof(BlockDesc__storage_, forwardBlockIdx), + .core.flags = GPBFieldOptional | GPBFieldHasDefaultValue, + .core.dataType = GPBDataTypeInt32, + }, + }; + GPBDescriptor *localDescriptor = + [GPBDescriptor allocDescriptorForClass:[BlockDesc class] + rootClass:[FrameworkRoot class] + file:FrameworkRoot_FileDescriptor() + fields:fields + fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescriptionWithDefault)) + storageSize:sizeof(BlockDesc__storage_) + flags:GPBDescriptorInitializationFlag_FieldsWithDefault]; + NSAssert(descriptor == nil, @"Startup recursed!"); + descriptor = localDescriptor; + } + return descriptor; +} + +@end + +#pragma mark - ProgramDesc + +@implementation ProgramDesc + +@dynamic blocksArray, blocksArray_Count; +@dynamic hasVersion, version; + +typedef struct ProgramDesc__storage_ { + uint32_t _has_storage_[1]; + NSMutableArray *blocksArray; + Version *version; +} ProgramDesc__storage_; + +// This method is threadsafe because it is initially called +// in +initialize for each subclass. ++ (GPBDescriptor *)descriptor { + static GPBDescriptor *descriptor = nil; + if (!descriptor) { + static GPBMessageFieldDescription fields[] = { + { + .name = "blocksArray", + .dataTypeSpecific.className = GPBStringifySymbol(BlockDesc), + .number = ProgramDesc_FieldNumber_BlocksArray, + .hasIndex = GPBNoHasBit, + .offset = (uint32_t)offsetof(ProgramDesc__storage_, blocksArray), + .flags = GPBFieldRepeated, + .dataType = GPBDataTypeMessage, + }, + { + .name = "version", + .dataTypeSpecific.className = GPBStringifySymbol(Version), + .number = ProgramDesc_FieldNumber_Version, + .hasIndex = 0, + .offset = (uint32_t)offsetof(ProgramDesc__storage_, version), + .flags = GPBFieldOptional, + .dataType = GPBDataTypeMessage, + }, + }; + GPBDescriptor *localDescriptor = + [GPBDescriptor allocDescriptorForClass:[ProgramDesc class] + rootClass:[FrameworkRoot class] + file:FrameworkRoot_FileDescriptor() + fields:fields + fieldCount:(uint32_t)(sizeof(fields) / sizeof(GPBMessageFieldDescription)) + storageSize:sizeof(ProgramDesc__storage_) + flags:0]; + NSAssert(descriptor == nil, @"Startup recursed!"); + descriptor = localDescriptor; + } + return descriptor; +} + +@end + + +#pragma clang diagnostic pop + +// @@protoc_insertion_point(global_scope) diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/OpDesc.swift b/metal/paddle-mobile/paddle-mobile/Src/Program/OpDesc.swift deleted file mode 100644 index 44fc09a29db0deec67e7682b303b1d0947b47a51..0000000000000000000000000000000000000000 --- a/metal/paddle-mobile/paddle-mobile/Src/Program/OpDesc.swift +++ /dev/null @@ -1,81 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -import Foundation - -class OpDesc { - let inputs: [String : [String]] - var paraInputs: [String : [String]] - var outputs: [String : [String]] - let unusedOutputs: [String : [String]] - var attrs: [String : Attr] = [:] - var type: String - init(protoOpDesc: PaddleMobile_Framework_Proto_OpDesc) { - type = protoOpDesc.type - let creator = { (vars: [PaddleMobile_Framework_Proto_OpDesc.Var], canAdd: (String) -> Bool) -> [String : [String]] in - var map: [String : [String]] = [:] - for opDescVar in vars { - if (canAdd(opDescVar.parameter)) { - map[opDescVar.parameter] = opDescVar.arguments - } - } - return map - } - - inputs = creator(protoOpDesc.inputs) { - opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false - } - - paraInputs = creator(protoOpDesc.inputs) { - !(opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false) - } - - outputs = creator(protoOpDesc.outputs) { - opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false - } - - unusedOutputs = creator(protoOpDesc.outputs) { - !(opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false) - } - - for attr in protoOpDesc.attrs { - if (attr.type != .block) { - attrs[attr.name] = attrWithProtoDesc(attrDesc: attr) - } - } - } -} - -extension OpDesc: CustomStringConvertible, CustomDebugStringConvertible { - var description: String { - var str = "" - str += "op type: \(type): \n" - str += " op inputs: \n" - str += " \(inputs) \n" - str += " op para inputs: \n" - str += " \(paraInputs) \n" - str += " op para outputs: \n" - str += " \(outputs) \n" - str += " op attrs: \n" - str += " \(attrs) \n" - - return str - } - - var debugDescription: String { - return description - } - - -} diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/BlockDesc.swift b/metal/paddle-mobile/paddle-mobile/Src/Program/PMBlockDesc.swift similarity index 71% rename from metal/paddle-mobile/paddle-mobile/Src/Program/BlockDesc.swift rename to metal/paddle-mobile/paddle-mobile/Src/Program/PMBlockDesc.swift index 1a66a69128b8692ae634363ca05b659dd6e604af..b021b09008b1f3bef3ba01d5a51fe7b7803fedaa 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Program/BlockDesc.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Program/PMBlockDesc.swift @@ -14,28 +14,28 @@ import Foundation -public class BlockDesc { +public class PMBlockDesc { let index: Int let parentIndex: Int - public let vars: [VarDesc] - let ops: [OpDesc] - init(block: PaddleMobile_Framework_Proto_BlockDesc) { + public let vars: [PMVarDesc] + let ops: [PMOpDesc] + init(block: BlockDesc) { index = Int(block.idx) parentIndex = Int(block.parentIdx) - var vars: [VarDesc] = [] - for varOfBlock in block.vars { - vars.append(VarDesc.init(protoVarDesc: varOfBlock)) + var vars: [PMVarDesc] = [] + for varOfBlock in block.varsArray { + vars.append(PMVarDesc.init(protoVarDesc: varOfBlock as! VarDesc)) } vars.sort { $0.name < $1.name } self.vars = vars - var ops: [OpDesc] = [] - for op in block.ops { - ops.append(OpDesc.init(protoOpDesc: op)) + var ops: [PMOpDesc] = [] + for op in block.opsArray { + ops.append(PMOpDesc.init(protoOpDesc: op as! OpDesc)) } self.ops = ops } - init(inVars: [VarDesc], inOps: [OpDesc]) { + init(inVars: [PMVarDesc], inOps: [PMOpDesc]) { vars = inVars ops = inOps index = 0 @@ -44,7 +44,7 @@ public class BlockDesc { } -extension BlockDesc: CustomStringConvertible, CustomDebugStringConvertible { +extension PMBlockDesc: CustomStringConvertible, CustomDebugStringConvertible { public var description: String { var str = "" diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/PMOpDesc.swift b/metal/paddle-mobile/paddle-mobile/Src/Program/PMOpDesc.swift new file mode 100644 index 0000000000000000000000000000000000000000..663677150eb0f0240b032a713424aac8ed66c86a --- /dev/null +++ b/metal/paddle-mobile/paddle-mobile/Src/Program/PMOpDesc.swift @@ -0,0 +1,81 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +import Foundation + +class PMOpDesc { + let inputs: [String : [String]] + var paraInputs: [String : [String]] + var outputs: [String : [String]] + let unusedOutputs: [String : [String]] + var attrs: [String : Attr] = [:] + var type: String + init(protoOpDesc: OpDesc) { + type = protoOpDesc.type + let creator = { (vars: [OpDesc_Var], canAdd: (String) -> Bool) -> [String : [String]] in + var map: [String : [String]] = [:] + for opDescVar in vars { + if (canAdd(opDescVar.parameter)) { + map[opDescVar.parameter] = opDescVar.argumentsArray as? [String] + } + } + return map + } + + inputs = creator(protoOpDesc.inputsArray as! [OpDesc_Var]) { + opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false + } + + paraInputs = creator(protoOpDesc.inputsArray as! [OpDesc_Var]) { + !(opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false) + } + + outputs = creator(protoOpDesc.outputsArray as! [OpDesc_Var]) { + opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false + } + + unusedOutputs = creator(protoOpDesc.outputsArray as! [OpDesc_Var]) { + !(opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false) + } + + for attr in protoOpDesc.attrsArray { + if ((attr as! OpDesc_Attr).type != .block) { + attrs[(attr as! OpDesc_Attr).name] = attrWithProtoDesc(attrDesc: attr as! OpDesc_Attr) + } + } + } +} + +extension PMOpDesc: CustomStringConvertible, CustomDebugStringConvertible { + var description: String { + var str = "" + str += "op type: \(type): \n" + str += " op inputs: \n" + str += " \(inputs) \n" + str += " op para inputs: \n" + str += " \(paraInputs) \n" + str += " op para outputs: \n" + str += " \(outputs) \n" + str += " op attrs: \n" + str += " \(attrs) \n" + + return str + } + + var debugDescription: String { + return description + } + + +} diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/ProgramDesc.swift b/metal/paddle-mobile/paddle-mobile/Src/Program/PMProgramDesc.swift similarity index 75% rename from metal/paddle-mobile/paddle-mobile/Src/Program/ProgramDesc.swift rename to metal/paddle-mobile/paddle-mobile/Src/Program/PMProgramDesc.swift index a95dcb771b511cc5534d15d028ad06451d72ecda..79b8875976dd42eb57ff981441103f001ddb7a6e 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Program/ProgramDesc.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Program/PMProgramDesc.swift @@ -14,11 +14,11 @@ import Foundation -public class ProgramDesc { - public var blocks: [BlockDesc] = [] - init(protoProgram: PaddleMobile_Framework_Proto_ProgramDesc) { - for block in protoProgram.blocks { - self.blocks.append(BlockDesc.init(block: block)) +public class PMProgramDesc { + public var blocks: [PMBlockDesc] = [] + init(protoProgram: ProgramDesc) { + for block in protoProgram.blocksArray { + self.blocks.append(PMBlockDesc.init(block: block as! BlockDesc)) } } @@ -26,7 +26,7 @@ public class ProgramDesc { } } -extension ProgramDesc: CustomStringConvertible, CustomDebugStringConvertible { +extension PMProgramDesc: CustomStringConvertible, CustomDebugStringConvertible { public var description: String { var str: String = "" for i in 0.. { ElementwiseAddPreluOp

.self ] - func optimize(originProgramDesc: ProgramDesc) -> ProgramDesc { + func optimize(originProgramDesc: PMProgramDesc) -> PMProgramDesc { guard originProgramDesc.blocks.count == 1 else { fatalError(" not support yet") @@ -287,13 +287,13 @@ class ProgramOptimize { } } - var ops: [OpDesc] = [] + var ops: [PMOpDesc] = [] for node in nodes { ops.append(node.opDesc!) } - var newProgramDesc = ProgramDesc.init() - let newBlock = BlockDesc.init(inVars: block.vars, inOps: ops) + let newProgramDesc = PMProgramDesc.init() + let newBlock = PMBlockDesc.init(inVars: block.vars, inOps: ops) newProgramDesc.blocks.append(newBlock) return newProgramDesc } diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/TensorDesc.swift b/metal/paddle-mobile/paddle-mobile/Src/Program/TensorDesc.swift index 1a72f5ef717063136c4708c881befd789a57219c..7565fffc99bd15862304bdf7d7dd1bc31a6ffaf4 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Program/TensorDesc.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Program/TensorDesc.swift @@ -52,9 +52,19 @@ class TensorDesc { } } - init(protoTensorDesc: PaddleMobile_Framework_Proto_VarType.TensorDesc) { - dims = protoTensorDesc.dims.map{ Int($0) > 0 ? Int($0) : abs(Int($0)) } - dataType = VarTypeType.init(rawValue: protoTensorDesc.dataType.rawValue) ?? .ErrorType + init(protoTensorDesc: VarType_TensorDesc) { + // dims = protoTensorDesc.dimsArray.map{ Int64($0)! > 0 ? Int64($0) : abs(Int64($0)) } + + var dimsArray = [Int]() + + let dimsCount = protoTensorDesc.dimsArray.count + for i in 0.. 0 ?Int(protoTensorDesc.dimsArray.value(at: i)) :abs(Int(protoTensorDesc.dimsArray.value(at: i))) + dimsArray.append(dim) + } + dims = dimsArray + + dataType = VarTypeType.init(rawValue: Int(protoTensorDesc.dataType.rawValue)) ?? .ErrorType } } diff --git a/metal/paddle-mobile/paddle-mobile/Src/Program/framework.pb.swift b/metal/paddle-mobile/paddle-mobile/Src/Program/framework.pb.swift index df4af3bcc91853e507321d46d3edfd04045f29ab..4a320a9eff27bf583ef1eccee3344f5571e0bbac 100644 --- a/metal/paddle-mobile/paddle-mobile/Src/Program/framework.pb.swift +++ b/metal/paddle-mobile/paddle-mobile/Src/Program/framework.pb.swift @@ -21,7 +21,6 @@ //limitations under the License. import Foundation -import SwiftProtobuf // If the compiler emits an error on this type, it is because this file // was generated by a version of the `protoc` Swift plug-in that is diff --git a/metal/paddle-mobile/paddle-mobile/paddle_mobile.h b/metal/paddle-mobile/paddle-mobile/paddle_mobile.h index 07a6478eb8206264cf739c80e9287a679585ae50..32b69c0b1438d0e18bae5e7c24a2731a473e1a67 100644 --- a/metal/paddle-mobile/paddle-mobile/paddle_mobile.h +++ b/metal/paddle-mobile/paddle-mobile/paddle_mobile.h @@ -15,6 +15,7 @@ #pragma once #import +#import //! Project version number for paddle_mobile. //FOUNDATION_EXPORT double paddle_mobileVersionNumber; diff --git a/src/fpga/V1/api.cpp b/src/fpga/V1/api.cpp index 137ac73512b9d88716ab585ba315f26aa3b14ea8..9a408a8f2fbe3c600679ddb2e3eadb493f323165 100644 --- a/src/fpga/V1/api.cpp +++ b/src/fpga/V1/api.cpp @@ -151,6 +151,30 @@ void format_dwconv_filter(framework::Tensor *filter_tensor, float *scale_ptr) { filter_tensor->reset_data_ptr(new_data); } +void format_DWDconv_filter(framework::Tensor *filter_tensor, float *scale_ptr, + int stride) { + auto dims = filter_tensor->dims(); + auto num = dims[0], height = dims[2], width = dims[3]; + auto data_ptr = filter_tensor->data(); + size_t memory_size = num * height * width * sizeof(float); + auto new_data = (float *)fpga_malloc(memory_size); // NOLINT + fpga_copy(new_data, data_ptr, memory_size); + + int hw = height * width; + deconv_filter::deconv_NC_convert(&new_data, num, 1, hw); + + num = dims[1]; + int channel = dims[0]; + + deconv_filter::DWDconv_format_filter(&new_data, num, channel, height, width, + scale_ptr, stride); + + // framework::DDim dims_new = + // framework::make_ddim({num, 1, height, width}); + // filter_tensor->Resize(dims_new); + filter_tensor->reset_data_ptr(new_data); +} + void format_fc_filter(framework::Tensor *filter_tensor, float max_value) { filter_tensor->scale[0] = float(max_value / 127.0); // NOLINT filter_tensor->scale[1] = float(127.0 / max_value); // NOLINT @@ -243,6 +267,17 @@ void format_dwconv_data(framework::Tensor *filter_tensor, format_bias_array(bias_ptr, channel); format_fp16_ofm(ofm_tensor); } +void format_DWDeconv_data(framework::Tensor *filter_tensor, + framework::Tensor *ofm_tensor, float **bs_ptr, + int group, int sub_conv_n) { + int channel = ofm_tensor->dims()[1]; + // dw-deconv + format_DWDconv_filter( + filter_tensor, + (reinterpret_cast(*bs_ptr) + sub_conv_n * channel), sub_conv_n); + format_bias_array(bs_ptr, channel); + format_fp16_ofm(ofm_tensor); +} void expand_conv_arg(ConvArgs *arg) { ConvArgs args = *arg; @@ -311,9 +346,9 @@ void expand_conv_arg(ConvArgs *arg) { auto filter_pad_width_mul_channel = args.image.pad_width * args.image.channels; auto image_amount_per_row_multi_win_first = - image_amount_per_row * (4 * args.kernel.stride_h - args.image.pad_height); + image_amount_per_row * (2 * args.kernel.stride_h - args.image.pad_height); auto image_amount_per_row_multi_win = - image_amount_per_row * (4 * args.kernel.stride_h); + image_amount_per_row * (2 * args.kernel.stride_h); auto image_block_num = block_num; auto image_block_len = @@ -340,7 +375,8 @@ void expand_conv_arg(ConvArgs *arg) { (512 / (align_to_x(args.filter_num, 4) / 4 * 2) > 2) ? (512 / (align_to_x(args.filter_num, 4) / 4 * 2) - 2) : 0; - auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS; + // auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS; + auto cmd = 0UL | USE_BIAS; auto deconv_param = ((args.deconv_tx_param.deconv_en) << 24) | ((args.deconv_tx_param.sub_conv_num) << 16) | @@ -378,7 +414,8 @@ void expand_conv_arg(ConvArgs *arg) { void expand_EW_arg(EWAddArgs *arg) { EWAddArgs args = *arg; - uint64_t cmd = args.relu_enabled ? USE_RELU : 0; + // uint64_t cmd = args.relu_enabled ? USE_RELU : 0; + uint64_t cmd = 0; uint64_t datalen = (uint64_t)args.image0.width * (uint64_t)args.image0.height * (uint64_t)args.image0.channels; @@ -406,8 +443,10 @@ void expand_EW_arg(EWAddArgs *arg) { void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, framework::Tensor *out, framework::Tensor *filter, - bool relu_enabled, int group_num, int stride_h, - int stride_w, int padding_h, int padding_w, float *bs_ptr) { + ActivationType activation_enable, + int16_t leaky_relu_negative_slope, int group_num, + int stride_h, int stride_w, int padding_h, int padding_w, + float *bs_ptr) { auto input_ptr = input->data(); auto filter_ptr = filter->data(); auto out_ptr = out->data(); @@ -453,7 +492,10 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, filter->dims()[3])); for (int i = 0; i < n; i++) { - arg->conv_arg[i].relu_enabled = relu_enabled; + // arg->conv_arg[i].relu_enabled = relu_enabled; + arg->conv_arg[i].output.activation.activation_type = activation_enable; + arg->conv_arg[i].output.activation.leaky_relu_negative_slope = + leaky_relu_negative_slope; arg->conv_arg[i].group_num = (uint32_t)group_num; arg->conv_arg[i].kernel.stride_h = (uint32_t)stride_h; arg->conv_arg[i].kernel.stride_w = (uint32_t)stride_w; @@ -525,8 +567,9 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, framework::Tensor *out, framework::Tensor *filter, - bool relu_enabled, int group_num, int stride_h, - int stride_w, int padding_h, int padding_w, + ActivationType activation_enable, + int16_t leaky_relu_negative_slope, int group_num, + int stride_h, int stride_w, int padding_h, int padding_w, float *bs_ptr) { auto input_ptr = input->data(); auto filter_ptr = filter->data(); @@ -652,7 +695,13 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, } for (int j = 0; j < split_num; ++j) { - arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled; + // arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled; + arg->split_conv_args[i]->conv_arg[j].output.activation.activation_type = + activation_enable; + arg->split_conv_args[i] + ->conv_arg[j] + .output.activation.leaky_relu_negative_slope = + leaky_relu_negative_slope; arg->split_conv_args[i]->conv_arg[j].group_num = (uint32_t)group_num; arg->split_conv_args[i]->conv_arg[j].kernel.width = @@ -765,12 +814,17 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input, framework::Tensor *out, framework::Tensor *filter, - bool relu_enabled, int stride_h, int stride_w, - int padding_h, int padding_w, float *bias_ptr) { + ActivationType activation_enable, + int16_t leaky_relu_negative_slope, int stride_h, + int stride_w, int padding_h, int padding_w, + float *bias_ptr) { auto filter_ptr = filter->data(); auto input_ptr = input->data(); auto output_ptr = out->mutable_data(); - arg->relu_enabled = relu_enabled; + arg->sub_conv_num = 1; + // arg->relu_enabled = relu_enabled; + arg->output.activation.activation_type = activation_enable; + arg->output.activation.leaky_relu_negative_slope = leaky_relu_negative_slope; arg->bias_address = bias_ptr; arg->filter_address = filter_ptr; arg->kernel.height = (uint32_t)filter->dims()[2]; @@ -788,5 +842,114 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input, arg->output.scale_address = out->scale; } // end dwconv arg fill +void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input, + framework::Tensor *out, framework::Tensor *filter, + ActivationType activation_enable, + int16_t leaky_relu_negative_slope, int stride_h, + int stride_w, int padding_h, int padding_w, + float *bias_ptr) { + auto filter_ptr = filter->data(); + auto input_ptr = input->data(); + auto output_ptr = out->mutable_data(); + + auto deleter = [](void *p) { fpga_free(p); }; + + arg->group_num = (uint32_t)filter->dims()[0]; + arg->sub_conv_num = (uint32_t)stride_w; + arg->filter_num = (uint32_t)filter->dims()[0]; + + int sub_conv_num = stride_w; + + int sub_pad = + deconv_filter::deconv_calc_sub_pad((int)filter->dims()[3], // NOLINT + padding_w, stride_w); + auto sub_filter_width = (uint32_t)deconv_filter::deconv_get_sub_filter_axis( + (int)filter->dims()[3], stride_w); // NOLINT + + auto sub_output_width = (uint32_t)deconv_filter::deconv_get_sub_out_axis( + (int)input->dims()[3], sub_pad, sub_filter_width); // NOLINT + auto sub_output_height = (uint32_t)deconv_filter::deconv_get_sub_out_axis( + (int)input->dims()[2], sub_pad, sub_filter_width); // NOLINT + + arg->sub_output_width = (uint32_t)sub_output_width; + arg->sub_output_height = (uint32_t)sub_output_height; + arg->omit_size = (uint32_t)deconv_filter::deconv_get_omit( + stride_w, (int)filter->dims()[3], padding_w); // NOLINT + + auto sub_channels = (int)input->dims()[1]; // NOLINT + uint32_t omit_size = arg->omit_size; + int real_out_width = sub_output_width * sub_conv_num - 2 * omit_size; + int real_out_height = sub_output_height * sub_conv_num - 2 * omit_size; + int sub_filter_num = sub_conv_num * (arg->filter_num); + + framework::DDim dims_out_new = framework::make_ddim( + {1, arg->filter_num, real_out_height, real_out_width}); + fpga::format_fp16_ofm(out, dims_out_new); + auto out_ptr = out->data(); + + /*====For Addition + arg->output.address = + (half *)out_ptr + // NOLINT + omit_size * sizeof(half) * + (align_to_x(real_out_width * arg->filter_num, IMAGE_ALIGNMENT)); + */ + arg->output.address = out_ptr; + arg->output.scale_address = out->scale; + + int filter_offset = sub_filter_width * sub_filter_width * + align_to_x(sub_channels, FILTER_ELEMENT_ALIGNMENT) * + arg->sub_conv_num; + + for (int i = 0; i < sub_conv_num; ++i) { + arg->dw_conv_args.push_back(std::make_shared()); + + arg->dw_conv_args[i]->sub_conv_num = sub_conv_num; + // arg->dw_conv_args[i]->relu_enabled = relu_enabled; + arg->dw_conv_args[i]->output.activation.activation_type = activation_enable; + arg->dw_conv_args[i]->output.activation.leaky_relu_negative_slope = + leaky_relu_negative_slope; + arg->dw_conv_args[i]->bias_address = bias_ptr; + + arg->dw_conv_args[i]->filter_address = + fpga_malloc(filter_offset * sizeof(int16_t)); + memcpy(arg->dw_conv_args[i]->filter_address, + (reinterpret_cast(filter_ptr) + i * filter_offset), + filter_offset * sizeof(int16_t)); + arg->vector_dw_conv_space.push_back(std::shared_ptr( + reinterpret_cast(arg->dw_conv_args[i]->filter_address), + deleter)); + + arg->dw_conv_args[i]->kernel.height = (uint32_t)sub_filter_width; + arg->dw_conv_args[i]->kernel.width = (uint32_t)sub_filter_width; + + arg->dw_conv_args[i]->kernel.stride_h = (uint32_t)1; + arg->dw_conv_args[i]->kernel.stride_w = (uint32_t)1; + arg->dw_conv_args[i]->image.address = input_ptr; + arg->dw_conv_args[i]->image.channels = (uint32_t)input->dims()[1]; + arg->dw_conv_args[i]->image.height = (uint32_t)input->dims()[2]; + arg->dw_conv_args[i]->image.width = (uint32_t)input->dims()[3]; + + arg->dw_conv_args[i]->image.pad_height = sub_pad; + arg->dw_conv_args[i]->image.pad_width = sub_pad; + arg->dw_conv_args[i]->image.scale_address = input->scale; + + arg->dw_conv_args[i]->output.address = + fpga_malloc(sub_output_height * + align_to_x(sub_output_width * sub_channels * sub_conv_num, + IMAGE_ALIGNMENT) * + sizeof(int16_t)); + arg->dw_conv_args[i]->output.scale_address = + static_cast(fpga_malloc(2 * sizeof(float))); + arg->vector_dw_conv_space.push_back(std::shared_ptr( + reinterpret_cast(arg->dw_conv_args[i]->output.address), + deleter)); + arg->vector_dw_conv_space.push_back(std::shared_ptr( + reinterpret_cast(arg->dw_conv_args[i]->output.scale_address), + deleter)); + } + + // arg->output.scale_address = out->scale; +} // end dwconv arg fill + } // namespace fpga } // namespace paddle_mobile diff --git a/src/fpga/V1/api.h b/src/fpga/V1/api.h index b5c586e92aca2cc8a540ba54479ae7941f42e02c..05a30ddce4828bf8ac0f049ea0db4f18dc1dba79 100644 --- a/src/fpga/V1/api.h +++ b/src/fpga/V1/api.h @@ -47,16 +47,28 @@ void format_concat_output(framework::Tensor* out, int height, int width, void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input, framework::Tensor* out, framework::Tensor* filter, - bool relu_enabled, int group_num, int stride_h, - int stride_w, int padding_h, int padding_w, float* bs_ptr); + ActivationType activation_enable, + int16_t leaky_relu_negative_slope, int group_num, + int stride_h, int stride_w, int padding_h, int padding_w, + float* bs_ptr); void fill_deconv_arg(struct DeconvArgs* arg, framework::Tensor* input, framework::Tensor* out, framework::Tensor* filter, - bool relu_enabled, int group_num, int stride_h, - int stride_w, int padding_h, int padding_w, float* bs_ptr); + ActivationType activation_enable, + int16_t leaky_relu_negative_slope, int group_num, + int stride_h, int stride_w, int padding_h, int padding_w, + float* bs_ptr); void fill_dwconv_arg(struct DWconvArgs* arg, framework::Tensor* input, framework::Tensor* out, framework::Tensor* filter, - bool relu_enabled, int stride_h, int stride_w, - int padding_h, int padding_w, float* bias_ptr); + ActivationType activation_enable, + int16_t leaky_relu_negative_slope, int stride_h, + int stride_w, int padding_h, int padding_w, + float* bias_ptr); +void fill_DWDeconv_arg(struct DWDeconvArgs* arg, framework::Tensor* input, + framework::Tensor* out, framework::Tensor* filter, + ActivationType activation_enable, + int16_t leaky_relu_negative_slope, int stride_h, + int stride_w, int padding_h, int padding_w, + float* bs_ptr); void format_deconv_filter(framework::Tensor* filter_tensor, float max_value, int group_num, int stride); @@ -69,6 +81,10 @@ void format_deconv_data(framework::Tensor* filter_tensor, void format_dwconv_data(framework::Tensor* filter_tensor, framework::Tensor* ofm_tensor, float* scale_ptr, float** bias_ptr); +void format_DWDeconv_data(framework::Tensor* filter_tensor, + framework::Tensor* ofm_tensor, float** bs_ptr, + int group, int sub_conv_n); + template void savefile(std::string filename, void* buffer, int dataSize, Dtype tmp) { float data; diff --git a/src/fpga/V1/deconv_filter.cpp b/src/fpga/V1/deconv_filter.cpp index 8fb3cd69fdfb10effb5769b656e19858e481f5f4..7c87452f5a7264ad069d8508cb1e9dc24f5cdc3d 100644 --- a/src/fpga/V1/deconv_filter.cpp +++ b/src/fpga/V1/deconv_filter.cpp @@ -19,16 +19,6 @@ limitations under the License. */ #include "fpga/V1/filter.h" // #include "filter.h" #include "fpga/V1/api.h" -// #include "fpga_api.h" - -// just for test -//#include -//#include "deconv.h" -//#include "deconv_api.h" -// using namespace std; -// using namespace paddle_mobile::fpga; -// using namespace baidu::fpga::deconv::api; -// namespace api = baidu::fpga::deconv::api; namespace paddle_mobile { namespace fpga { @@ -42,7 +32,8 @@ void deconv_inverse_filter(float** data_in, int num, int channel, int width, float* tmp = *data_in; int data_size = num * channel * width * height; int hw_len = height * width; - auto tmp_data = (float*)fpga_malloc(data_size * sizeof(float)); + auto tmp_data = + reinterpret_cast(fpga_malloc(data_size * sizeof(float))); for (int i = 0; i < num; ++i) { for (int j = 0; j < channel; ++j) { for (int k = 0; k < hw_len; ++k) { @@ -97,9 +88,10 @@ int deconv_get_omit(int stride, int filter_width, int pad) { return (stride - idx); } -void deconv_get_sub_filter(char** data_in, int height, int width, - int sub_conv_n, int kernel_num, int channel) { - char* ptr_tmp = *data_in; +template +void deconv_get_sub_filter(T** data_in, int height, int width, int sub_conv_n, + int kernel_num, int channel) { + T* ptr_tmp = *data_in; int sub_num = kernel_num * sub_conv_n; int sub_h = height / sub_conv_n; int sub_w = width / sub_conv_n; @@ -107,7 +99,8 @@ void deconv_get_sub_filter(char** data_in, int height, int width, int sub_filter_size = kernel_num * sub_h * sub_w * channel * sub_conv_n * sub_conv_n; - char* ptr_sub_filter = (char*)fpga_malloc(sub_filter_size * sizeof(char)); + T* ptr_sub_filter = + reinterpret_cast(fpga_malloc(sub_filter_size * sizeof(T))); for (int idx = 0; idx < sub_conv_n; ++idx) { for (int nn = 0; nn < sub_num; ++nn) { int ni = nn % kernel_num; @@ -124,7 +117,7 @@ void deconv_get_sub_filter(char** data_in, int height, int width, fpga_copy( ptr_sub_filter + idx * sub_h * sub_w * channel * sub_num + sidx, - (*data_in) + kidx, channel * sizeof(char)); + (*data_in) + kidx, channel * sizeof(T)); // for (int cc =0; cc < channel; ++cc) { // ptr_sub_filter[idx*sub_h*sub_w*channel*sub_num + sidx + cc] = // (*data_in)[kidx + cc]; @@ -140,7 +133,7 @@ void deconv_get_sub_filter(char** data_in, int height, int width, void deconv_NC_convert(float** filter_in, int kernel_num, int channels, int hw) { float* tmp = *filter_in; - float* ptr_filter = (float*)(paddle_mobile::fpga::fpga_malloc( + float* ptr_filter = reinterpret_cast(paddle_mobile::fpga::fpga_malloc( hw * kernel_num * channels * sizeof(float))); for (int c = 0; c < channels; ++c) { @@ -188,7 +181,8 @@ void deconv_format_filter(float** data_in, int num, int channel, int height, result2); }*/ - deconv_get_sub_filter(quantize_data, height, width, stride, num, channel); + deconv_get_sub_filter(quantize_data, height, width, stride, num, + channel); /*{ char result2 = (char)0; string filename = "sub_filter_filter_data"; @@ -212,10 +206,12 @@ void deconv_format_filter(float** data_in, int num, int channel, int height, ((residual == 0) ? div_num : (div_num - 1)) + align_to_x(residual, FILTER_NUM_ALIGNMENT); - char** ptr_ptr_data = (char**)fpga_malloc(sub_conv_n * sizeof(char*)); + char** ptr_ptr_data = + reinterpret_cast(fpga_malloc(sub_conv_n * sizeof(char*))); int origin_offset = sub_chw * sub_num; for (int i = 0; i < sub_conv_n; ++i) { - (ptr_ptr_data)[i] = (char*)fpga_malloc(origin_offset * sizeof(char)); + (ptr_ptr_data)[i] = + reinterpret_cast(fpga_malloc(origin_offset * sizeof(char))); fpga_copy((ptr_ptr_data)[i], (*quantize_data) + origin_offset * i, origin_offset * sizeof(char)); @@ -233,8 +229,8 @@ void deconv_format_filter(float** data_in, int num, int channel, int height, int align_offset = align_to_x(sub_chw, FILTER_ELEMENT_ALIGNMENT) * num_after_alignment; - char* ptr_space = (char*)fpga_malloc(sub_conv_n * align_offset * - sizeof(char)); // continuous space + char* ptr_space = reinterpret_cast(fpga_malloc( + sub_conv_n * align_offset * sizeof(char))); // continuous space for (int i = 0; i < sub_conv_n; ++i) { char* ptr_tmp = (ptr_ptr_data)[i]; @@ -251,7 +247,7 @@ void deconv_format_filter(float** data_in, int num, int channel, int height, fpga_copy(ptr_space + i * align_offset, ptr_tmp, align_offset); fpga_free(ptr_tmp); } - *data_in = (float*)ptr_space; + *data_in = reinterpret_cast(ptr_space); /* { char result2 = (char)0; @@ -262,6 +258,22 @@ void deconv_format_filter(float** data_in, int num, int channel, int height, fpga_flush(ptr_space, sub_conv_n * align_offset * sizeof(char)); } +void DWDconv_format_filter(float** data_in, int num, int channel, int height, + int width, float* scale_ptr, int stride) { + deconv_inverse_filter(data_in, num, channel, width, height); + + filter::quantize_to_fp16(data_in, channel, height, width, scale_ptr); + int16_t** quantize_data = (int16_t**)data_in; // NOLINT + filter::convert_to_hwn(quantize_data, channel, height, width); + + deconv_get_sub_filter(quantize_data, height, width, stride, num, + channel); + + filter::align_element_n(quantize_data, channel, height, width); + fpga_flush(*quantize_data, align_to_x(channel, FILTER_ELEMENT_ALIGNMENT) * + height * width * sizeof(int16_t)); +} + } // namespace deconv_filter } // namespace fpga } // namespace paddle_mobile diff --git a/src/fpga/V1/deconv_filter.h b/src/fpga/V1/deconv_filter.h index 5fa9781933712a8506c052258dbf2f7f7e05fe37..f1a50b95c52dadc49f4dd333791a22f63bf6d0a3 100644 --- a/src/fpga/V1/deconv_filter.h +++ b/src/fpga/V1/deconv_filter.h @@ -24,11 +24,15 @@ int deconv_calc_sub_pad(int filter_axis, int pad, int stride); int deconv_get_sub_filter_axis(int filter_axis, int stride); int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis); int deconv_get_omit(int stride, int filter_width, int pad); -void deconv_get_sub_filter(char** data_in, int height, int width, - int sub_conv_n, int kernel_num, int channel); + +template +void deconv_get_sub_filter(T** data_in, int height, int width, int sub_conv_n, + int kernel_num, int channel); void deconv_format_filter(float** data_in, int num, int channel, int height, int width, int group_num, float max, int stride); void deconv_NC_convert(float** filter_in, int kernel_num, int channels, int hw); +void DWDconv_format_filter(float** data_in, int num, int channel, int height, + int width, float* scale_ptr, int stride); } // namespace deconv_filter } // namespace fpga diff --git a/src/fpga/V1/filter.cpp b/src/fpga/V1/filter.cpp old mode 100755 new mode 100644 index 197448d515d67459b280bf33a14b8f8419970fc2..50341b75e129479e7f8d8ab4d9c200df574996cb --- a/src/fpga/V1/filter.cpp +++ b/src/fpga/V1/filter.cpp @@ -346,6 +346,16 @@ void format_dwconv_filter(float **data_in, int num, int height, int width, fpga_flush(*quantize_data, align_to_x(num, FILTER_ELEMENT_ALIGNMENT) * height * width * sizeof(int16_t)); } + +void format_DWDeconv_filter(float **data_in, int num, int height, int width, + float *scale_ptr) { + quantize_to_fp16(data_in, num, height, width, scale_ptr); + int16_t **quantize_data = (int16_t **)data_in; // NOLINT + convert_to_hwn(quantize_data, num, height, width); + align_element_n(quantize_data, num, height, width); + fpga_flush(*quantize_data, align_to_x(num, FILTER_ELEMENT_ALIGNMENT) * + height * width * sizeof(int16_t)); +} } // namespace filter } // namespace fpga } // namespace paddle_mobile diff --git a/src/fpga/V1/pe.cpp b/src/fpga/V1/pe.cpp index aeb5cdd65385b87a5da1e15e98b9914ca6be189c..5a81e2422979f08b2113bd9b46022fe4d77154cb 100644 --- a/src/fpga/V1/pe.cpp +++ b/src/fpga/V1/pe.cpp @@ -18,7 +18,6 @@ limitations under the License. */ #include "fpga/V1/image.h" #include "fpga/common/config.h" #include "fpga/common/driver.h" - #ifdef COST_TIME_PRINT #include #include @@ -64,6 +63,7 @@ using namespace std; // NOLINT #define REG_TIMER_COUNTER 0x070 #define REG_SCALE_PARAMETER 0x080 +#define REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR 0x090 #define REG_FLASH_CMD 0x200 #define REG_FLASH_DATA 0x208 @@ -163,6 +163,7 @@ using namespace std; // NOLINT #define REG_DWCONV_FILTER_BASE_ADDR 0xe08 #define REG_DWCONV_FILTER_SHAPE 0xe10 #define REG_DWCONV_FILTER_N_ALIGN 0xe18 +#define REG_DWCONV_FILTER_SUBNUMBER 0xe20 #define REG_DWCONV_CMD 0xe00 int ComputeFpgaConv(const struct SplitConvArgs &args) { @@ -189,8 +190,8 @@ int ComputeFpgaConv(const struct SplitConvArgs &args) { int ComputeBasicConv(const struct ConvArgs &args) { #ifdef FPGA_PRINT_MODE DLOG << "======Compute Basic Conv======"; - DLOG << " relu_enabled:" << args.relu_enabled - << " sb_address:" << args.sb_address + // DLOG << " relu_enabled:" << args.relu_enabled + DLOG << " sb_address:" << args.sb_address << " filter_address:" << args.filter_address << " filter_num:" << args.filter_num << " group_num:" << args.group_num; @@ -212,6 +213,25 @@ int ComputeBasicConv(const struct ConvArgs &args) { #ifdef PADDLE_MOBILE_ZU5 int ret = 0; uint64_t output_scale = 0; + + uint64_t reg_ActivationArgs = 0; + // active function:{none,leakeyrelu,sigmoid,tanh} + ActivationArgs active_args; + // active_args.activation_type = LEAKYRELU; + + active_args.activation_type = args.output.activation.activation_type; + + active_args.leaky_relu_negative_slope = + args.output.activation.leaky_relu_negative_slope; + + reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) | + active_args.leaky_relu_negative_slope; + + DLOG << " activation_type:" << active_args.activation_type + << " leaky_relu_negative_slope:" + << active_args.leaky_relu_negative_slope; + DLOG << " reg_ActivationArgs:" << reg_ActivationArgs; + pthread_mutex_lock(&g_fpgainfo.pe_data->mutex); if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status) { ret = -EIO; @@ -219,6 +239,10 @@ int ComputeBasicConv(const struct ConvArgs &args) { pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); return ret; } + + reg_writeq(reg_ActivationArgs, + REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion + reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq( ((uint64_t)args.image.height) | (((uint64_t)args.image.width) << 32), @@ -278,6 +302,9 @@ int ComputeBasicConv(const struct ConvArgs &args) { output_scale = (output_scale << 32) | (output_scale >> 32); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); + active_args.activation_type = NONE; + reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); + pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); return ret; @@ -314,6 +341,23 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { uint64_t image_physical_address = 0; uint64_t output_physical_address = 0; + uint64_t reg_ActivationArgs = 0; + // active function:{none,leakeyrelu,sigmoid,tanh} + ActivationArgs active_args; + // active_args.activation_type = LEAKYRELU; + active_args.activation_type = args.output.activation.activation_type; + + active_args.leaky_relu_negative_slope = + args.output.activation.leaky_relu_negative_slope; + + reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) | + active_args.leaky_relu_negative_slope; + + DLOG << " activation_type:" << active_args.activation_type + << " leaky_relu_negative_slope:" + << active_args.leaky_relu_negative_slope; + DLOG << " reg_ActivationArgs:" << reg_ActivationArgs; + image_physical_address = vaddr_to_paddr_driver(args.image.address); output_physical_address = vaddr_to_paddr_driver(args.output.address); uint32_t output_height = (uint32_t)( @@ -364,6 +408,9 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { return ret; } + reg_writeq(reg_ActivationArgs, + REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion + reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq(image_physical_address, REG_POOLING_IMAGE_BASE_ADDR); reg_writeq(output_physical_address, REG_POOLING_RESULT_BASE_ADDR); @@ -408,6 +455,10 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = (output_scale << 32) | (output_scale >> 32); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); + + active_args.activation_type = NONE; + reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); + pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); return ret; @@ -418,8 +469,8 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { int ComputeFpgaEWAdd(const struct EWAddArgs &args) { #ifdef FPGA_PRINT_MODE DLOG << "=============ComputeFpgaEWAdd==========="; - DLOG << " relu_enabled:" << args.relu_enabled - << " const0:" << fp16_2_fp32(int16_t(args.const0)) + // DLOG << " relu_enabled:" << args.relu_enabled + DLOG << " const0:" << fp16_2_fp32(int16_t(args.const0)) << " const1:" << fp16_2_fp32(int16_t(args.const1)); DLOG << " image0_address:" << args.image0.address << " image0_scale_address:" << args.image0.scale_address @@ -441,6 +492,19 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { #ifdef PADDLE_MOBILE_ZU5 int ret = 0; uint64_t output_scale = 0; + + uint64_t reg_ActivationArgs = 0; + ActivationArgs active_args; + active_args.activation_type = args.output.activation.activation_type; + active_args.leaky_relu_negative_slope = + args.output.activation.leaky_relu_negative_slope; + reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) | + active_args.leaky_relu_negative_slope; + DLOG << " activation_type:" << active_args.activation_type + << " leaky_relu_negative_slope:" + << active_args.leaky_relu_negative_slope; + DLOG << " reg_ActivationArgs:" << reg_ActivationArgs; + pthread_mutex_lock(&g_fpgainfo.pe_data->mutex); if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_EW]->status) { ret = -EIO; @@ -449,6 +513,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { return ret; } + reg_writeq(reg_ActivationArgs, + REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion + reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq(args.driver.image0_address_phy, REG_EW_IMAGE0_BASE_ADDR); reg_writeq(args.driver.image1_address_phy, REG_EW_IMAGE1_BASE_ADDR); @@ -468,6 +535,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = (output_scale << 32) | (output_scale >> 32); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); + active_args.activation_type = NONE; + reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); + pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); return ret; #endif @@ -501,6 +571,17 @@ int PerformBypass(const struct BypassArgs &args) { uint8_t data_cell_in = 0; uint8_t data_cell_out = 0; int ret = 0; + + uint64_t reg_ActivationArgs = 0; + ActivationArgs active_args; + active_args.activation_type = args.output.activation.activation_type; + + active_args.leaky_relu_negative_slope = + args.output.activation.leaky_relu_negative_slope; + + reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) | + active_args.leaky_relu_negative_slope; + datalen = (uint64_t)args.image.width * (uint64_t)args.image.height * (uint64_t)args.image.channels; datalen = align_to_x(datalen, 16); @@ -559,7 +640,6 @@ int PerformBypass(const struct BypassArgs &args) { (data_cell_out != SIZE_FP16 && data_cell_out != SIZE_FP32)) { return -EFAULT; } - pthread_mutex_lock(&g_fpgainfo.pe_data->mutex); if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_BYPASS]->status) { ret = -EIO; @@ -567,7 +647,8 @@ int PerformBypass(const struct BypassArgs &args) { pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); return ret; } - + reg_writeq(reg_ActivationArgs, + REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq(input_address_phy, REG_CONVERT_SRC_ADDR); reg_writeq(output_address_phy, REG_CONVERT_DST_ADDR); @@ -585,12 +666,27 @@ int PerformBypass(const struct BypassArgs &args) { output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = (output_scale << 32) | (output_scale >> 32); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); + reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); return ret; #endif return 0; } // PerformBypass +uint64_t FPGAVersion() { +#ifdef FPGA_PRINT_MODE + DLOG << "=============ComputeFpgaBypass==========="; +#endif +#ifdef PADDLE_MOBILE_ZU5 + uint64_t fpga_ver = 0; + pthread_mutex_lock(&g_fpgainfo.pe_data->mutex); + fpga_ver = reg_readq(REG_HARDWARE_STATUS); + pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); + return fpga_ver; +#endif + return 0; +} // FPGAVersion + int ComputeFPGAConcat(const struct ConcatArgs &args) { #ifdef FPGA_PRINT_MODE DLOG << "=============ComputeFpgaConcat==========="; @@ -655,6 +751,45 @@ void deconv_post_process(const struct DeconvArgs &args) { fpga_flush(args.output.address, num * align_deconv_row_len * deconv_h * sizeof(int16_t)); } +void DWDeconv_post_process(const struct DWDeconvArgs &args) { + int sub_conv_n = args.sub_conv_num; + int sub_height = args.sub_output_height; + int sub_width = args.sub_output_width; + int omit_size = args.omit_size; + int channel = args.filter_num; + int num = 1; + int origin_h = sub_height * sub_conv_n; + int origin_w = sub_width * sub_conv_n; + int align_origin_w = align_to_x(origin_w * channel, IMAGE_ALIGNMENT); + int deconv_h = origin_h - 2 * omit_size; + int deconv_w = origin_w - 2 * omit_size; + int deconv_row_len = deconv_w * channel; + int align_deconv_row_len = align_to_x(deconv_row_len, IMAGE_ALIGNMENT); + + for (int idx = 0; idx < sub_conv_n; ++idx) { + paddle_mobile::fpga::fpga_invalidate( + args.dw_conv_args[idx]->output.address, + align_origin_w * origin_h * sizeof(int16_t)); + } + + int deconv_idx = 0; + for (int nn = 0; nn < num; ++nn) { + for (int hh = 0; hh < origin_h; ++hh) { + int hx = (hh % sub_conv_n); + auto sub_t = (int16_t *)(args.dw_conv_args[sub_conv_n - hx - 1] // NOLINT + ->output.address); + int hi = (hh / sub_conv_n); + if ((hh < omit_size) || (hh >= (origin_h - omit_size))) continue; + int sidx = (nn * origin_h * align_origin_w + hi * align_origin_w + + omit_size * channel); + fpga_copy((int16_t *)(args.output.address) + deconv_idx, // NOLINT + sub_t + sidx, sizeof(int16_t) * deconv_row_len); // NOLINT + deconv_idx += align_deconv_row_len; + } + } + fpga_flush(args.output.address, + num * align_deconv_row_len * deconv_h * sizeof(int16_t)); +} int ComputeFpgaDeconv(const struct DeconvArgs &args) { #ifdef FPGA_PRINT_MODE @@ -755,7 +890,7 @@ int ComputeFPGASplit(const struct SplitArgs &args) { int ComputeDWConv(const struct DWconvArgs &args) { #ifdef FPGA_PRINT_MODE DLOG << "=============ComputeDWConv==========="; - DLOG << " mode:" << args.relu_enabled; + // DLOG << " mode:" << args.relu_enabled; DLOG << " image_address:" << args.image.address << " image_scale_address:" << args.image.scale_address << " image_channels:" << args.image.channels @@ -778,7 +913,8 @@ int ComputeDWConv(const struct DWconvArgs &args) { uint64_t output_scale = 0; uint64_t timer_cnt = 0; int ret = 0; - uint64_t cmd = args.relu_enabled; + // uint64_t cmd = args.relu_enabled; + uint64_t cmd = 0; uint64_t image_physical_address = 0; uint64_t output_physical_address = 0; uint64_t filter_physical_address = 0; @@ -792,17 +928,21 @@ int ComputeDWConv(const struct DWconvArgs &args) { align_to_x((uint64_t)args.image.channels, IMAGE_ALIGNMENT); uint64_t filter_amount_per_row_align = filter_N_align * (uint64_t)args.kernel.width; - uint64_t filter_amount_align = filter_N_align * (uint64_t)args.kernel.width * - (uint64_t)args.kernel.height; + uint64_t sub_filter_amount_align = filter_N_align * + (uint64_t)args.kernel.width * + (uint64_t)args.kernel.height; + uint64_t filter_amount_align = + sub_filter_amount_align * (uint64_t)args.sub_conv_num; uint32_t output_height = (uint32_t)( (args.image.height + args.image.pad_height * 2 - args.kernel.height) / args.kernel.stride_h + 1); uint32_t output_width = (uint32_t)( - (args.image.width + args.image.pad_width * 2 - args.kernel.width) / - args.kernel.stride_w + - 1); + ((args.image.width + args.image.pad_width * 2 - args.kernel.width) / + args.kernel.stride_w + + 1) * + args.sub_conv_num); uint64_t image_amount_per_row = align_to_x((uint64_t)args.image.width * (uint64_t)args.image.channels, @@ -845,12 +985,15 @@ int ComputeDWConv(const struct DWconvArgs &args) { /*restart scale*/ reg_writeq(output_scale, REG_SCALE_PARAMETER); + reg_writeq(image_physical_address, REG_POOLING_IMAGE_BASE_ADDR); reg_writeq(output_physical_address, REG_POOLING_RESULT_BASE_ADDR); reg_writeq((bias_physical_address << 32 | filter_physical_address), REG_DWCONV_FILTER_BASE_ADDR); reg_writeq(filter_amount_per_row_align | (filter_amount_align << 32), REG_DWCONV_FILTER_SHAPE); + reg_writeq(sub_filter_amount_align | (((uint64_t)args.sub_conv_num) << 32), + REG_DWCONV_FILTER_SUBNUMBER); reg_writeq(filter_N_align, REG_DWCONV_FILTER_N_ALIGN); reg_writeq( @@ -904,10 +1047,88 @@ int ComputeDWConv(const struct DWconvArgs &args) { output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = (output_scale << 32) | (output_scale >> 32); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); + DLOG << "output_scale:" << output_scale; pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); return ret; #endif return 0; } +int ComputeDWDeconv(const struct DWDeconvArgs &args) { +#ifdef FPGA_PRINT_MODE + DLOG << "=============ComputeFPGADeConv==========="; + DLOG << " filter_num:" << args.filter_num + << " group_num:" << args.group_num << "omit_size:" << args.omit_size + << "sub_output_width: " << args.sub_output_width + << "sub_output_height: " << args.sub_output_height + << " sub_conv_num:" << args.sub_conv_num; + DLOG << "args.output.address: " << args.output.address + << "args.output.scale_address: " << args.output.scale_address; + +#endif + + int sub_conv_num = args.sub_conv_num; + +#ifdef COST_TIME_PRINT + timeval start, end; + long dif_sec, dif_usec; // NOLINT +#endif + + for (int i = 0; i < sub_conv_num; i++) { +#ifdef COST_TIME_PRINT + gettimeofday(&start, NULL); +#endif + + ComputeDWConv(*args.dw_conv_args[i]); +#ifdef COST_TIME_PRINT + gettimeofday(&end, NULL); + dif_sec = end.tv_sec - start.tv_sec; + dif_usec = end.tv_usec - start.tv_usec; + std::cout << "deconv basic_conv: " << i << " times: " + << " cost time: " << (dif_sec * 1000000 + dif_usec) << "us" + << std::endl; +#endif + } + + if (sub_conv_num > 1) { + float max_scale = -1.0f; +#ifdef COST_TIME_PRINT + gettimeofday(&start, NULL); +#endif + for (int i = 0; i < sub_conv_num; i++) { + paddle_mobile::fpga::fpga_invalidate( + args.dw_conv_args[i]->output.scale_address, 2 * sizeof(float)); + float ptr_scale = (args.dw_conv_args[i]->output.scale_address)[0]; + if (ptr_scale > max_scale) { + args.output.scale_address[0] = ptr_scale; + args.output.scale_address[1] = + (args.dw_conv_args[i]->output.scale_address)[1]; + } + } + +#ifdef COST_TIME_PRINT + gettimeofday(&end, NULL); + dif_sec = end.tv_sec - start.tv_sec; + dif_usec = end.tv_usec - start.tv_usec; + std::cout << "deconv scale " + << " cost time: " << (dif_sec * 1000000 + dif_usec) << "us" + << std::endl; +#endif + } + +#ifdef COST_TIME_PRINT + gettimeofday(&start, NULL); +#endif + DWDeconv_post_process(args); +#ifdef COST_TIME_PRINT + gettimeofday(&end, NULL); + dif_sec = end.tv_sec - start.tv_sec; + dif_usec = end.tv_usec - start.tv_usec; + std::cout << "deconv_post_process " + << " cost time: " << (dif_sec * 1000000 + dif_usec) << "us" + << std::endl; +#endif + return 0; +} // ComputeFpgaDeconv + } // namespace fpga } // namespace paddle_mobile diff --git a/src/fpga/common/driver.cpp b/src/fpga/common/driver.cpp index 18a310b09cad4a741eb83453a09f3c94d4f0db05..b1d3559dbbb238ae24cc6224e2d253dab744dce1 100644 --- a/src/fpga/common/driver.cpp +++ b/src/fpga/common/driver.cpp @@ -154,7 +154,6 @@ int memory_request(struct fpga_memory *memory, size_t size, uint64_t *addr) { unsigned int nr = (unsigned int)_nr; int ret = 0; uint64_t a_size = FPGA_PAGE_SIZE * nr; - DLOG << a_size; pthread_mutex_lock(&memory->mutex); @@ -391,9 +390,6 @@ int fpga_invalidate_driver(void *address, size_t size) { void fpga_copy_driver(void *dest, const void *src, size_t num) { uint64_t i; - - DLOG << "dest:" << dest << " src:" << src << " size:" << num; - for (i = 0; i < num; i++) { *((int8_t *)dest + i) = *((int8_t *)src + i); // NOLINT } diff --git a/src/fpga/common/driver.h b/src/fpga/common/driver.h index 4fa83b776e7b3df5df5e536de91093fd18ca67a1..d35627cd46b3f233255a98d1e1fbca27469f715c 100644 --- a/src/fpga/common/driver.h +++ b/src/fpga/common/driver.h @@ -29,7 +29,7 @@ namespace driver { #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) -#define FPGA_REG_PHY_ADDR 0xa0000000 +#define FPGA_REG_PHY_ADDR 0x80000000 #define FPGA_REG_SIZE 0x1000 #define FPGA_MEM_PHY_ADDR 0x40000000 #define FPGA_MEM_SIZE 0x80000000 diff --git a/src/fpga/common/fpga_common.cpp b/src/fpga/common/fpga_common.cpp index 0a1787aa3f211a247d95cd7124879ce14af980a9..bf90a3a11926b1f90ed8a659db908a061f79b0e9 100644 --- a/src/fpga/common/fpga_common.cpp +++ b/src/fpga/common/fpga_common.cpp @@ -76,7 +76,7 @@ int32_t convertmantissa(int32_t i) { } float fp16_2_fp32(int16_t fp16_num) { - int16_t se_fp16 = fp16_num >> 10; + int16_t se_fp16 = (fp16_num >> 10) & 0x3f; int16_t m_fp16 = fp16_num & 0x3ff; int32_t e_fp32 = 0; int16_t offset = 0; @@ -94,7 +94,7 @@ float fp16_2_fp32(int16_t fp16_num) { e_fp32 = 0x80000000; offset = 0; } else if (se_fp16 < 63) { - e_fp32 = 0x80000000 + (se_fp16 - 32) << 23; + e_fp32 = 0x80000000 + ((se_fp16 - 32) << 23); offset = 1024; } else { // se_fp16 == 63 e_fp32 = 0xC7800000; diff --git a/src/fpga/common/fpga_common.h b/src/fpga/common/fpga_common.h old mode 100755 new mode 100644 index c9519071fba94ad1e2b526d9e4d5cd96a1bcdbac..60753e5cde1e39a1dbf4a1016667db748fc6b9f9 --- a/src/fpga/common/fpga_common.h +++ b/src/fpga/common/fpga_common.h @@ -45,6 +45,7 @@ enum ActivationType { LEAKYRELU = 1, SIGMOID = 2, TANH = 3, + SOFTMAX = 4, }; struct ActivationArgs { @@ -132,7 +133,7 @@ struct DeconvTxParm { #endif struct ConvArgs { - bool relu_enabled; + // bool relu_enabled; void* sb_address; // scale and bias void* filter_address; float* filter_scale_address; @@ -198,7 +199,7 @@ struct PoolingArgs { }; struct EWAddArgs { - bool relu_enabled; + // bool relu_enabled; uint32_t const0; // output0 = const0 x input0 + const1 x input1; uint32_t const1; struct ImageInputArgs image0; @@ -229,13 +230,27 @@ struct DeconvArgs { std::vector> split_conv_args; }; struct DWconvArgs { - bool relu_enabled; + uint32_t sub_conv_num; + // bool relu_enabled; void* bias_address; void* filter_address; struct KernelArgs kernel; struct ImageInputArgs image; struct ImageOutputArgs output; }; + +struct DWDeconvArgs { + uint32_t sub_conv_num; + uint32_t group_num; + uint32_t filter_num; + uint32_t omit_size; + uint32_t sub_output_width; + uint32_t sub_output_height; + struct ImageOutputArgs output; + std::vector> dw_conv_args; + std::vector> vector_dw_conv_space; +}; + // static inline int align_to_x(int num, int x) { return (num + x - 1) / x * x; // } static inline uint32_t align_to_x(int64_t num, int64_t x) { diff --git a/src/fpga/common/pe.h b/src/fpga/common/pe.h index 9f2800428e431ea302d6cd33685e8ff1dcdc2751..cf0574bc04b05d538766ecba895e97944e1233f8 100644 --- a/src/fpga/common/pe.h +++ b/src/fpga/common/pe.h @@ -18,6 +18,7 @@ limitations under the License. */ namespace paddle_mobile { namespace fpga { +uint64_t FPGAVersion(); int PerformBypass(const struct BypassArgs& args); int ComputeBasicConv(const struct ConvArgs& args); int ComputeFpgaPool(const struct PoolingArgs& args); @@ -28,5 +29,7 @@ int ComputeFPGAConcat(const struct ConcatArgs& args); int ComputeFPGASplit(const struct SplitArgs& args); int ComputeFpgaDeconv(const struct DeconvArgs& args); int ComputeDWConv(const struct DWconvArgs& args); +int ComputeDWDeconv(const struct DWDeconvArgs& args); + } // namespace fpga } // namespace paddle_mobile diff --git a/src/operators/activation_op.cpp b/src/operators/activation_op.cpp index bcff87c9276721c19a970eb328fc0a183ed6c003..76c9e1a014bc0e51b032d8516ba9448fa25b2aa5 100644 --- a/src/operators/activation_op.cpp +++ b/src/operators/activation_op.cpp @@ -31,6 +31,10 @@ DEFINE_ACTIVATION_INFERSHAPE(Relu6); #ifdef SIGMOID_OP DEFINE_ACTIVATION_INFERSHAPE(Sigmoid); +namespace ops = paddle_mobile::operators; +#ifdef PADDLE_MOBILE_FPGA +REGISTER_OPERATOR_FPGA(sigmoid, ops::SigmoidOp); +#endif #endif // SIGMOID_OP #ifdef TANH_OP diff --git a/src/operators/kernel/fpga/V1/conv_add_bn_kernel.cpp b/src/operators/kernel/fpga/V1/conv_add_bn_kernel.cpp index 30ff3155a47c813f303dc59191edd8b60e6d8ce3..3e41efdf76ed5b14d408a1278c7dba0bd1f30a1f 100644 --- a/src/operators/kernel/fpga/V1/conv_add_bn_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_add_bn_kernel.cpp @@ -22,7 +22,10 @@ namespace operators { template <> bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { - bool relu_enabled = false; + // bool relu_enabled = false; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::NONE; + int16_t leaky_relu_negative_slope = 0; auto input = const_cast(param->Input()); auto bias = param->Bias(); @@ -61,10 +64,10 @@ bool ConvAddBNKernel::Init(FusionConvAddBNParam *param) { fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::SplitConvArgs conv_arg = {0}; - fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, - param->Groups(), param->Strides()[0], - param->Strides()[1], param->Paddings()[0], - param->Paddings()[1], bs_ptr); + fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(conv_arg); return true; diff --git a/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp index 7f720323253fff53f7d1bb92f8bfeec77bf0da14..b7b99be78acae80c46b9d1bd1f3cb72d5f4a7cfb 100644 --- a/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_add_bn_relu_kernel.cpp @@ -23,7 +23,10 @@ namespace operators { template <> bool ConvAddBNReluKernel::Init( FusionConvAddBNReluParam *param) { - bool relu_enabled = true; + // bool relu_enabled = true; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::LEAKYRELU; + int16_t leaky_relu_negative_slope = 0; auto input = const_cast(param->Input()); auto bias = param->Bias(); auto bias_ptr = bias->data(); @@ -64,16 +67,16 @@ bool ConvAddBNReluKernel::Init( if (groups == channel) { fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr); fpga::DWconvArgs dwconv_arg = {0}; - fpga::fill_dwconv_arg(&dwconv_arg, input, out, filter, relu_enabled, - strides[0], strides[1], paddings[0], paddings[1], - new_bias_ptr); + fpga::fill_dwconv_arg(&dwconv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, strides[0], strides[1], + paddings[0], paddings[1], new_bias_ptr); param->SetFpgaArgs(dwconv_arg); } else { fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::SplitConvArgs conv_arg = {0}; - fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, - param->Groups(), strides[0], strides[1], paddings[0], - paddings[1], bs_ptr); + fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), strides[0], + strides[1], paddings[0], paddings[1], bs_ptr); param->SetFpgaArgs(conv_arg); } return true; diff --git a/src/operators/kernel/fpga/V1/conv_add_kernel.cpp b/src/operators/kernel/fpga/V1/conv_add_kernel.cpp old mode 100755 new mode 100644 index e566dc9b165811a3e8a9f78d040cc8c571fd93a9..153be5a4f888c2a39a7b05b9a7fbb72e305acb8d --- a/src/operators/kernel/fpga/V1/conv_add_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_add_kernel.cpp @@ -21,7 +21,10 @@ namespace operators { template <> bool ConvAddKernel::Init(FusionConvAddParam *param) { - bool relu_enabled = false; + // bool relu_enabled = false; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::NONE; + int16_t leaky_relu_negative_slope = 0; auto input = const_cast(param->Input()); const Tensor *bias = param->Bias(); auto bias_ptr = bias->data(); @@ -40,10 +43,10 @@ bool ConvAddKernel::Init(FusionConvAddParam *param) { fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::SplitConvArgs conv_arg = {0}; - fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, - param->Groups(), param->Strides()[0], - param->Strides()[1], param->Paddings()[0], - param->Paddings()[1], bs_ptr); + fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(conv_arg); return true; } diff --git a/src/operators/kernel/fpga/V1/conv_add_relu_kernel.cpp b/src/operators/kernel/fpga/V1/conv_add_relu_kernel.cpp old mode 100755 new mode 100644 index 6b2a2d77c0df29b4c319061776491b0583157d6f..eef35bf74b6b28e3ec0c49d6b7ace0a350f3f194 --- a/src/operators/kernel/fpga/V1/conv_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_add_relu_kernel.cpp @@ -21,7 +21,10 @@ namespace operators { template <> bool ConvAddReluKernel::Init(FusionConvAddReluParam *param) { - bool relu_enabled = true; + // bool relu_enabled = true; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::LEAKYRELU; + int16_t leaky_relu_negative_slope = 0; auto input = const_cast(param->Input()); const Tensor *bias = param->Bias(); auto bias_ptr = bias->data(); @@ -40,10 +43,10 @@ bool ConvAddReluKernel::Init(FusionConvAddReluParam *param) { fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::SplitConvArgs conv_arg = {0}; - fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, - param->Groups(), param->Strides()[0], - param->Strides()[1], param->Paddings()[0], - param->Paddings()[1], bs_ptr); + fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(conv_arg); return true; } diff --git a/src/operators/kernel/fpga/V1/conv_bn_kernel.cpp b/src/operators/kernel/fpga/V1/conv_bn_kernel.cpp index 492d418b9023a3c4c802da099a5da5ebf5568649..c4c2bf184d536ace31e52defb59e97c154386464 100644 --- a/src/operators/kernel/fpga/V1/conv_bn_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_bn_kernel.cpp @@ -22,7 +22,10 @@ namespace operators { template <> bool ConvBNKernel::Init(FusionConvBNParam *param) { - bool relu_enabled = false; + // bool relu_enabled = false; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::NONE; + int16_t leaky_relu_negative_slope = 0; auto input = const_cast(param->Input()); auto filter = const_cast(param->Filter()); auto out = param->Output(); @@ -53,10 +56,10 @@ bool ConvBNKernel::Init(FusionConvBNParam *param) { fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::SplitConvArgs conv_arg = {0}; - fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, - param->Groups(), param->Strides()[0], - param->Strides()[1], param->Paddings()[0], - param->Paddings()[1], bs_ptr); + fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(conv_arg); return true; } diff --git a/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp b/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp index 337b25ffa5d3ba00cd60935f8643213cb5ea70d3..463c90d1bb0dcd48a7b41aff73b830d14f989c73 100644 --- a/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/conv_bn_relu_kernel.cpp @@ -22,7 +22,10 @@ namespace operators { template <> bool ConvBNReluKernel::Init(FusionConvBNReluParam *param) { - bool relu_enabled = true; + // bool relu_enabled = true; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::LEAKYRELU; + int16_t leaky_relu_negative_slope = 0; auto input = const_cast(param->Input()); auto filter = const_cast(param->Filter()); auto out = param->Output(); @@ -53,10 +56,10 @@ bool ConvBNReluKernel::Init(FusionConvBNReluParam *param) { fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::SplitConvArgs conv_arg = {0}; - fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, - param->Groups(), param->Strides()[0], - param->Strides()[1], param->Paddings()[0], - param->Paddings()[1], bs_ptr); + fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); param->SetFpgaArgs(conv_arg); return true; } diff --git a/src/operators/kernel/fpga/V1/deconv_add_kernel.cpp b/src/operators/kernel/fpga/V1/deconv_add_kernel.cpp index 83adddabf0213a441779815d312161d1737d1296..97a4d5516b52939a3a1d90a22c8050679810d405 100644 --- a/src/operators/kernel/fpga/V1/deconv_add_kernel.cpp +++ b/src/operators/kernel/fpga/V1/deconv_add_kernel.cpp @@ -23,7 +23,10 @@ namespace operators { template <> bool DeconvAddKernel::Init(FusionDeconvAddParam *param) { - bool relu_enabled = false; + // bool relu_enabled = false; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::NONE; + int16_t leaky_relu_negative_slope = 0; auto input = const_cast(param->Input()); const Tensor *bias = param->Bias(); auto bias_ptr = bias->data(); @@ -49,13 +52,24 @@ bool DeconvAddKernel::Init(FusionDeconvAddParam *param) { "filter width should be equal to filter height "); PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), "filter axis should be the multiple of stride axis "); - fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); - fpga::DeconvArgs deconv_arg = {0}; - fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled, - param->Groups(), param->Strides()[0], - param->Strides()[1], param->Paddings()[0], - param->Paddings()[1], bs_ptr); - param->SetFpgaArgs(deconv_arg); + if (param->Groups() == channel) { + fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), + sub_conv_n); + fpga::DWDeconvArgs DWDeconv_arg = {0}; + fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter, + activation_enable, leaky_relu_negative_slope, + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); + param->SetFpgaArgs(DWDeconv_arg); + } else { + fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); + fpga::DeconvArgs deconv_arg = {0}; + fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); + param->SetFpgaArgs(deconv_arg); + } return true; } @@ -63,7 +77,11 @@ bool DeconvAddKernel::Init(FusionDeconvAddParam *param) { template <> void DeconvAddKernel::Compute( const FusionDeconvAddParam ¶m) { - fpga::ComputeFpgaDeconv(param.FpgaArgs()); + if (param.Groups() == param.Output()->dims()[1]) { + fpga::ComputeDWDeconv(param.FpgaDWDconvArgs()); + } else { + fpga::ComputeFpgaDeconv(param.FpgaArgs()); + } } } // namespace operators diff --git a/src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp b/src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp index 9a96ca6e53644e6b5a8a99a8eed2f5e92449e681..f0b29943d7731d716a19cff1e3cfc904d7610c0b 100644 --- a/src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/deconv_add_relu_kernel.cpp @@ -24,7 +24,10 @@ namespace operators { template <> bool DeconvAddReluKernel::Init( FusionDeconvAddReluParam *param) { - bool relu_enabled = true; + // bool relu_enabled = true; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::LEAKYRELU; + int16_t leaky_relu_negative_slope = 0; auto input = const_cast(param->Input()); const Tensor *bias = param->Bias(); auto bias_ptr = bias->data(); @@ -50,20 +53,36 @@ bool DeconvAddReluKernel::Init( "filter width should be equal to filter height "); PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), "filter axis should be the multiple of stride axis "); - fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); - fpga::DeconvArgs deconv_arg = {0}; - fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled, - param->Groups(), param->Strides()[0], - param->Strides()[1], param->Paddings()[0], - param->Paddings()[1], bs_ptr); - param->SetFpgaArgs(deconv_arg); + if (param->Groups() == channel) { + fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(), + sub_conv_n); + fpga::DWDeconvArgs DWDeconv_arg = {0}; + fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter, + activation_enable, leaky_relu_negative_slope, + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); + param->SetFpgaArgs(DWDeconv_arg); + } else { + fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); + fpga::DeconvArgs deconv_arg = {0}; + fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable, + leaky_relu_negative_slope, param->Groups(), + param->Strides()[0], param->Strides()[1], + param->Paddings()[0], param->Paddings()[1], bs_ptr); + param->SetFpgaArgs(deconv_arg); + } return true; } template <> void DeconvAddReluKernel::Compute( const FusionDeconvAddReluParam ¶m) { - fpga::ComputeFpgaDeconv(param.FpgaArgs()); + // fpga::ComputeFpgaDeconv(param.FpgaArgs()); + if (param.Groups() == param.Output()->dims()[1]) { + fpga::ComputeDWDeconv(param.FpgaDWDconvArgs()); + } else { + fpga::ComputeFpgaDeconv(param.FpgaArgs()); + } } } // namespace operators diff --git a/src/operators/kernel/fpga/V1/elementwise_add_kernel.cpp b/src/operators/kernel/fpga/V1/elementwise_add_kernel.cpp index be773412f099410b02f24b1d38d2a44d6ca77689..27eee7e5ba7045473ff035f45236d04e080a692e 100644 --- a/src/operators/kernel/fpga/V1/elementwise_add_kernel.cpp +++ b/src/operators/kernel/fpga/V1/elementwise_add_kernel.cpp @@ -20,7 +20,10 @@ namespace operators { template <> bool ElementwiseAddKernel::Init(ElementwiseAddParam *param) { - bool relu_enabled = false; + // bool relu_enabled = false; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::NONE; + int16_t leaky_relu_negative_slope = 0; auto *input_x = const_cast(param->InputX()); auto *input_y = const_cast(param->InputY()); auto *out = param->Out(); @@ -30,7 +33,10 @@ bool ElementwiseAddKernel::Init(ElementwiseAddParam *param) { auto out_ptr = out->mutable_data(); fpga::EWAddArgs ewaddArgs = {0}; - ewaddArgs.relu_enabled = relu_enabled; + // ewaddArgs.relu_enabled = relu_enabled; + ewaddArgs.output.activation.activation_type = activation_enable; + ewaddArgs.output.activation.leaky_relu_negative_slope = + leaky_relu_negative_slope; ewaddArgs.const0 = 0x3c00; // =1 ewaddArgs.const1 = 0x3c00; // =1 ewaddArgs.image0.address = input_x_ptr; diff --git a/src/operators/kernel/fpga/V1/elementwise_add_relu_kernel.cpp b/src/operators/kernel/fpga/V1/elementwise_add_relu_kernel.cpp index 541bb6126509dc7da59fa6bed5c46aff3442928b..fbbe679d4b6a6d4b0ca0a25ebb7aacf93a133943 100644 --- a/src/operators/kernel/fpga/V1/elementwise_add_relu_kernel.cpp +++ b/src/operators/kernel/fpga/V1/elementwise_add_relu_kernel.cpp @@ -21,7 +21,10 @@ namespace operators { template <> bool ElementwiseAddReluKernel::Init( ElementwiseAddReluParam *param) { - bool relu_enabled = true; + // bool relu_enabled = true; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::LEAKYRELU; + int16_t leaky_relu_negative_slope = 0; auto *input_x = const_cast(param->InputX()); auto *input_y = const_cast(param->InputY()); auto *out = param->Out(); @@ -31,7 +34,10 @@ bool ElementwiseAddReluKernel::Init( auto out_ptr = out->mutable_data(); fpga::EWAddArgs ewaddArgs = {0}; - ewaddArgs.relu_enabled = relu_enabled; + // ewaddArgs.relu_enabled = relu_enabled; + ewaddArgs.output.activation.activation_type = activation_enable; + ewaddArgs.output.activation.leaky_relu_negative_slope = + leaky_relu_negative_slope; ewaddArgs.const0 = 0x3c00; // =1 ewaddArgs.const1 = 0x3c00; // =1 ewaddArgs.image0.address = input_x_ptr; diff --git a/src/operators/kernel/fpga/V1/fetch_kernel.cpp b/src/operators/kernel/fpga/V1/fetch_kernel.cpp index e6e4591168b90cbe19b207cd9e77eaf5cd07de80..c00bdf57a259e24669c33f011d7b77eb20d4b308 100644 --- a/src/operators/kernel/fpga/V1/fetch_kernel.cpp +++ b/src/operators/kernel/fpga/V1/fetch_kernel.cpp @@ -19,12 +19,34 @@ namespace operators { template <> bool FetchKernel::Init(FetchParam *param) { + Tensor *output = param->Out(); + // fpga::format_fp16_ofm(output); return true; } template <> void FetchKernel::Compute(const FetchParam ¶m) { param.Out()->ShareDataWith(*(param.InputX())); + /*auto input = + reinterpret_cast(const_cast(param.InputX())); + fpga::format_image(input); + auto input_ptr = input->data(); + Tensor *output = param.Out(); + auto output_ptr = output->data(); + + fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; + + args.input_data_type = fpga::DATA_TYPE_FP16; + args.output_data_type = fpga::DATA_TYPE_FP32; + args.input_layout_type = fpga::LAYOUT_CHW; + args.output_layout_type = fpga::LAYOUT_HWC; + args.image.address = reinterpret_cast(input_ptr); + args.image.channels = (uint32_t)input->dims()[1]; + args.image.height = (input->dims().size() == 4) ? (uint32_t)input->dims()[2] : + 1; args.image.width = (input->dims().size() == 4) ? (uint32_t)input->dims()[3] + : 1; args.image.pad_height = 0; args.image.pad_width = 0; args.output.address + = output_ptr; args.output.scale_address = output->scale; + fpga::PerformBypass(args);*/ } template class FetchKernel; diff --git a/src/operators/kernel/fpga/V1/fusion_fc_kernel.cpp b/src/operators/kernel/fpga/V1/fusion_fc_kernel.cpp index 9258fb90e1e6bf9a597a387843ce781858628139..fadeae324ff8f5160bc5ff410c2e02b09539a01e 100644 --- a/src/operators/kernel/fpga/V1/fusion_fc_kernel.cpp +++ b/src/operators/kernel/fpga/V1/fusion_fc_kernel.cpp @@ -20,7 +20,10 @@ namespace operators { template <> bool FusionFcKernel::Init(FusionFcParam *param) { - bool relu_enabled = false; + // bool relu_enabled = false; + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::NONE; + int16_t leaky_relu_negative_slope = 0; auto input_x = const_cast(param->InputX()); auto filter = const_cast(param->InputY()); const Tensor *input_z = param->InputZ(); @@ -55,8 +58,8 @@ bool FusionFcKernel::Init(FusionFcParam *param) { fpga::format_fp16_ofm(out); fpga::SplitConvArgs conv_arg = {0}; - fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, - 0, 0, bs_ptr); + fpga::fill_split_arg(&conv_arg, input_x, out, filter, activation_enable, + leaky_relu_negative_slope, 1, 1, 1, 0, 0, bs_ptr); param->SetFpgaArgs(conv_arg); return true; } diff --git a/src/operators/kernel/fpga/V1/reshape_kernel.cpp b/src/operators/kernel/fpga/V1/reshape_kernel.cpp index f5495e6d005f7f7c14ebd3d290ea9be02b9f0951..5e01bb74bab6996ca59632ae31f37ecfeafc918c 100644 --- a/src/operators/kernel/fpga/V1/reshape_kernel.cpp +++ b/src/operators/kernel/fpga/V1/reshape_kernel.cpp @@ -22,6 +22,12 @@ namespace operators { template <> bool ReshapeKernel::Init(ReshapeParam *param) { param->Out()->ShareDataWith(*param->InputX()); + const int in_n = param->InputX()->dims()[0]; + const int in_c = param->InputX()->dims()[1]; + const int in_h = param->InputX()->dims()[2]; + const int in_w = param->InputX()->dims()[3]; + auto out = param->Out(); + out->Resize(framework::make_ddim({in_n, in_c * in_h * in_w})); return true; } diff --git a/src/operators/kernel/fpga/V1/sigmoid_kernel.cpp b/src/operators/kernel/fpga/V1/sigmoid_kernel.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6c836e2776891f283677287eae54019f0dbef39b --- /dev/null +++ b/src/operators/kernel/fpga/V1/sigmoid_kernel.cpp @@ -0,0 +1,56 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#ifdef SIGMOID_OP + +#include "operators/kernel/activation_kernel.h" + +namespace paddle_mobile { +namespace operators { + +template <> +bool SigmoidKernel::Init(SigmoidParam *param) { + paddle_mobile::fpga::ActivationType activation_enable = + paddle_mobile::fpga::SIGMOID; + int16_t leaky_relu_negative_slope = 0; + auto input = const_cast(param->InputX()); + auto input_ptr = input->data(); + auto out = param->Out(); + fpga::format_fp16_ofm(out); + + fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; + args.input_data_type = fpga::DATA_TYPE_FP16; + args.output_data_type = fpga::DATA_TYPE_FP16; + args.image.address = input_ptr; + args.image.height = + (input->dims().size() == 4) ? (uint32_t)input->dims()[2] : 1; + args.image.width = + (input->dims().size() == 4) ? (uint32_t)input->dims()[3] : 1; + args.image.channels = (uint32_t)input->dims()[1]; + args.output.address = out->data(); + args.output.scale_address = out->scale; + args.output.activation.activation_type = activation_enable; + args.output.activation.leaky_relu_negative_slope = leaky_relu_negative_slope; + param->SetFpgaArgs(args); + return true; +} +template <> +void SigmoidKernel::Compute(const SigmoidParam ¶m) { + fpga::PerformBypass(param.FpgaArgs()); +} + +} // namespace operators +} // namespace paddle_mobile + +#endif diff --git a/src/operators/kernel/fpga/V1/softmax_kernel.cpp b/src/operators/kernel/fpga/V1/softmax_kernel.cpp index e5ada795b120c1438688089078be20e03f078cbb..2698fdece49409aec017112e8613a706c248cf48 100644 --- a/src/operators/kernel/fpga/V1/softmax_kernel.cpp +++ b/src/operators/kernel/fpga/V1/softmax_kernel.cpp @@ -26,7 +26,6 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { auto input_ptr = input->data(); auto out = param->Out(); fpga::format_fp32_ofm(out); - auto float_input = new Tensor; if (input->dims().size() == 2) { float_input->mutable_data({1, input->dims()[1]}); @@ -36,7 +35,6 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { } else { DLOG << "wrong dimension of softmax input"; } - fpga::format_fp32_ofm(float_input); fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; args.input_layout_type = fpga::LAYOUT_HWC; @@ -53,6 +51,7 @@ bool SoftmaxKernel::Init(SoftmaxParam *param) { args.output.scale_address = float_input->scale; param->SetFloatInput(float_input); param->SetFpgaArgs(args); + return true; } diff --git a/src/operators/op_param.h b/src/operators/op_param.h index 959bfd7f743401a453ab0169ca773285e2904d4e..e3da6724a8c33501f50bc463ee25a88166f4351b 100644 --- a/src/operators/op_param.h +++ b/src/operators/op_param.h @@ -1078,6 +1078,15 @@ class SigmoidParam : public OpParam { private: RType *input_x_; RType *out_; +#ifdef PADDLE_MOBILE_FPGA + + private: + fpga::BypassArgs fpga_bypass_args; + + public: + const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; } + void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; } +#endif }; #endif @@ -1200,6 +1209,20 @@ class FetchParam : public OpParam { private: RType *input_x_; Tensor *out_; +#ifdef PADDLE_MOBILE_FPGA + + private: + std::shared_ptr float_input_x_; + fpga::BypassArgs fpga_bypass_args; + + public: + RType *FloatInput() const { + return float_input_x_ == nullptr ? input_x_ : float_input_x_.get(); + } + void SetFloatInput(Tensor *input) { float_input_x_.reset(input); } + const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; } + void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; } +#endif }; #ifdef FILL_CONSTANT_OP @@ -2357,10 +2380,17 @@ class ConvTransposeParam : public OpParam { private: fpga::DeconvArgs fpga_conv_args; + fpga::DWDeconvArgs fpga_DWDeconv_args; public: const fpga::DeconvArgs &FpgaArgs() const { return fpga_conv_args; } + const fpga::DWDeconvArgs &FpgaDWDconvArgs() const { + return fpga_DWDeconv_args; + } void SetFpgaArgs(const fpga::DeconvArgs &args) { fpga_conv_args = args; } + void SetFpgaArgs(const fpga::DWDeconvArgs &args) { + fpga_DWDeconv_args = args; + } #endif };