提交 899e7fdf 编写于 作者: H hjchen2

Merge branch 'develop' of https://github.com/PaddlePaddle/paddle-mobile into ocr_attention

...@@ -243,13 +243,13 @@ ...@@ -243,13 +243,13 @@
); );
inputPaths = ( inputPaths = (
"${SRCROOT}/../Pods/Target Support Files/Pods-MobileNetDemo/Pods-MobileNetDemo-frameworks.sh", "${SRCROOT}/../Pods/Target Support Files/Pods-MobileNetDemo/Pods-MobileNetDemo-frameworks.sh",
"${BUILT_PRODUCTS_DIR}/SwiftProtobuf/SwiftProtobuf.framework", "${BUILT_PRODUCTS_DIR}/Protobuf/Protobuf.framework",
); );
name = "[CP] Embed Pods Frameworks"; name = "[CP] Embed Pods Frameworks";
outputFileListPaths = ( outputFileListPaths = (
); );
outputPaths = ( outputPaths = (
"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/SwiftProtobuf.framework", "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/Protobuf.framework",
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
shellPath = /bin/sh; shellPath = /bin/sh;
...@@ -436,7 +436,7 @@ ...@@ -436,7 +436,7 @@
baseConfigurationReference = 4FE67FF667A24FCB0134F627 /* Pods-MobileNetDemo.debug.xcconfig */; baseConfigurationReference = 4FE67FF667A24FCB0134F627 /* Pods-MobileNetDemo.debug.xcconfig */;
buildSettings = { buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
CODE_SIGN_STYLE = Automatic; CODE_SIGN_STYLE = Manual;
DEVELOPMENT_TEAM = A798K58VVL; DEVELOPMENT_TEAM = A798K58VVL;
INFOPLIST_FILE = MobileNetDemo/Info.plist; INFOPLIST_FILE = MobileNetDemo/Info.plist;
IPHONEOS_DEPLOYMENT_TARGET = 9.0; IPHONEOS_DEPLOYMENT_TARGET = 9.0;
...@@ -446,6 +446,7 @@ ...@@ -446,6 +446,7 @@
); );
PRODUCT_BUNDLE_IDENTIFIER = Ray.MobileNetDemo; PRODUCT_BUNDLE_IDENTIFIER = Ray.MobileNetDemo;
PRODUCT_NAME = "$(TARGET_NAME)"; PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = ForAllDev;
SWIFT_VERSION = 4.0; SWIFT_VERSION = 4.0;
TARGETED_DEVICE_FAMILY = "1,2"; TARGETED_DEVICE_FAMILY = "1,2";
}; };
...@@ -456,7 +457,7 @@ ...@@ -456,7 +457,7 @@
baseConfigurationReference = E57059FE3629E3A8DE6C7ECF /* Pods-MobileNetDemo.release.xcconfig */; baseConfigurationReference = E57059FE3629E3A8DE6C7ECF /* Pods-MobileNetDemo.release.xcconfig */;
buildSettings = { buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
CODE_SIGN_STYLE = Automatic; CODE_SIGN_STYLE = Manual;
DEVELOPMENT_TEAM = A798K58VVL; DEVELOPMENT_TEAM = A798K58VVL;
INFOPLIST_FILE = MobileNetDemo/Info.plist; INFOPLIST_FILE = MobileNetDemo/Info.plist;
IPHONEOS_DEPLOYMENT_TARGET = 9.0; IPHONEOS_DEPLOYMENT_TARGET = 9.0;
...@@ -466,6 +467,7 @@ ...@@ -466,6 +467,7 @@
); );
PRODUCT_BUNDLE_IDENTIFIER = Ray.MobileNetDemo; PRODUCT_BUNDLE_IDENTIFIER = Ray.MobileNetDemo;
PRODUCT_NAME = "$(TARGET_NAME)"; PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = ForAllDev;
SWIFT_VERSION = 4.0; SWIFT_VERSION = 4.0;
TARGETED_DEVICE_FAMILY = "1,2"; TARGETED_DEVICE_FAMILY = "1,2";
}; };
......
...@@ -5,21 +5,25 @@ workspace 'paddle-mobile.xcworkspace' ...@@ -5,21 +5,25 @@ workspace 'paddle-mobile.xcworkspace'
target 'paddle-mobile-demo' do target 'paddle-mobile-demo' do
project 'paddle-mobile-demo/paddle-mobile-demo.xcodeproj' project 'paddle-mobile-demo/paddle-mobile-demo.xcodeproj'
pod 'SwiftProtobuf', '~> 1.0' # pod 'SwiftProtobuf', '~> 1.0'
pod 'Protobuf', '~> 3.0.0'
end end
target 'paddle-mobile' do target 'paddle-mobile' do
project 'paddle-mobile/paddle-mobile.xcodeproj' project 'paddle-mobile/paddle-mobile.xcodeproj'
pod 'SwiftProtobuf', '~> 1.0' # pod 'SwiftProtobuf', '~> 1.0'
pod 'Protobuf', '~> 3.0.0'
end end
target 'paddle-mobile-unit-test' do target 'paddle-mobile-unit-test' do
project 'paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj' project 'paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj'
pod 'SwiftProtobuf', '~> 1.0' # pod 'SwiftProtobuf', '~> 1.0'
pod 'Protobuf', '~> 3.0.0'
end end
target 'MobileNetDemo' do target 'MobileNetDemo' do
project 'MobileNetDemo/MobileNetDemo.xcodeproj' project 'MobileNetDemo/MobileNetDemo.xcodeproj'
pod 'SwiftProtobuf', '~> 1.0' # pod 'SwiftProtobuf', '~> 1.0'
pod 'Protobuf', '~> 3.0.0'
end end
...@@ -411,11 +411,11 @@ ...@@ -411,11 +411,11 @@
); );
inputPaths = ( inputPaths = (
"${SRCROOT}/../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo-frameworks.sh", "${SRCROOT}/../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo-frameworks.sh",
"${BUILT_PRODUCTS_DIR}/SwiftProtobuf/SwiftProtobuf.framework", "${BUILT_PRODUCTS_DIR}/Protobuf/Protobuf.framework",
); );
name = "[CP] Embed Pods Frameworks"; name = "[CP] Embed Pods Frameworks";
outputPaths = ( outputPaths = (
"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/SwiftProtobuf.framework", "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/Protobuf.framework",
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
shellPath = /bin/sh; shellPath = /bin/sh;
......
...@@ -234,11 +234,11 @@ ...@@ -234,11 +234,11 @@
); );
inputPaths = ( inputPaths = (
"${SRCROOT}/../Pods/Target Support Files/Pods-paddle-mobile-unit-test/Pods-paddle-mobile-unit-test-frameworks.sh", "${SRCROOT}/../Pods/Target Support Files/Pods-paddle-mobile-unit-test/Pods-paddle-mobile-unit-test-frameworks.sh",
"${BUILT_PRODUCTS_DIR}/SwiftProtobuf/SwiftProtobuf.framework", "${BUILT_PRODUCTS_DIR}/Protobuf/Protobuf.framework",
); );
name = "[CP] Embed Pods Frameworks"; name = "[CP] Embed Pods Frameworks";
outputPaths = ( outputPaths = (
"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/SwiftProtobuf.framework", "${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/Protobuf.framework",
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
shellPath = /bin/sh; shellPath = /bin/sh;
......
...@@ -7,6 +7,8 @@ ...@@ -7,6 +7,8 @@
objects = { objects = {
/* Begin PBXBuildFile section */ /* Begin PBXBuildFile section */
456BB7B421F5B356001474E2 /* Framework.pbobjc.m in Sources */ = {isa = PBXBuildFile; fileRef = 456BB7B221F5B356001474E2 /* Framework.pbobjc.m */; settings = {COMPILER_FLAGS = "-fno-objc-arc"; }; };
456BB7B521F5B356001474E2 /* Framework.pbobjc.h in Headers */ = {isa = PBXBuildFile; fileRef = 456BB7B321F5B356001474E2 /* Framework.pbobjc.h */; settings = {ATTRIBUTES = (Public, ); }; };
4AA1EA862146625E00D0F791 /* BilinearInterpOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA852146625E00D0F791 /* BilinearInterpOp.swift */; }; 4AA1EA862146625E00D0F791 /* BilinearInterpOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA852146625E00D0F791 /* BilinearInterpOp.swift */; };
4AA1EA88214662BD00D0F791 /* BilinearInterpKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */; }; 4AA1EA88214662BD00D0F791 /* BilinearInterpKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */; };
4AA1EA8A2146631C00D0F791 /* BilinearInterp.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA892146631C00D0F791 /* BilinearInterp.metal */; }; 4AA1EA8A2146631C00D0F791 /* BilinearInterp.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA892146631C00D0F791 /* BilinearInterp.metal */; };
...@@ -47,15 +49,14 @@ ...@@ -47,15 +49,14 @@
FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA620E11CBC0081E9F8 /* Operator.swift */; }; FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA620E11CBC0081E9F8 /* Operator.swift */; };
FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA720E11CBC0081E9F8 /* BatchNormOp.swift */; }; FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA720E11CBC0081E9F8 /* BatchNormOp.swift */; };
FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA820E11CBC0081E9F8 /* ReluOp.swift */; }; FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA820E11CBC0081E9F8 /* ReluOp.swift */; };
FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BAF20E11CC20081E9F8 /* framework.pb.swift */; };
FC039BB920E11CC20081E9F8 /* Scope.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB020E11CC20081E9F8 /* Scope.swift */; }; FC039BB920E11CC20081E9F8 /* Scope.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB020E11CC20081E9F8 /* Scope.swift */; };
FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB120E11CC20081E9F8 /* TensorDesc.swift */; }; FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB120E11CC20081E9F8 /* TensorDesc.swift */; };
FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB220E11CC20081E9F8 /* ProgramDesc.swift */; }; FC039BBB20E11CC20081E9F8 /* PMProgramDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB220E11CC20081E9F8 /* PMProgramDesc.swift */; };
FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB320E11CC20081E9F8 /* VarDesc.swift */; }; FC039BBC20E11CC20081E9F8 /* PMVarDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB320E11CC20081E9F8 /* PMVarDesc.swift */; };
FC039BBD20E11CC20081E9F8 /* Program.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB420E11CC20081E9F8 /* Program.swift */; }; FC039BBD20E11CC20081E9F8 /* Program.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB420E11CC20081E9F8 /* Program.swift */; };
FC039BBE20E11CC20081E9F8 /* OpDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB520E11CC20081E9F8 /* OpDesc.swift */; }; FC039BBE20E11CC20081E9F8 /* PMOpDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB520E11CC20081E9F8 /* PMOpDesc.swift */; };
FC039BBF20E11CC20081E9F8 /* Attribute.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB620E11CC20081E9F8 /* Attribute.swift */; }; FC039BBF20E11CC20081E9F8 /* Attribute.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB620E11CC20081E9F8 /* Attribute.swift */; };
FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB720E11CC20081E9F8 /* BlockDesc.swift */; }; FC039BC020E11CC20081E9F8 /* PMBlockDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB720E11CC20081E9F8 /* PMBlockDesc.swift */; };
FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */; }; FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */; };
FC0E2DBC20EE45FE009C1FAC /* ConvKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */; }; FC0E2DBC20EE45FE009C1FAC /* ConvKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */; };
FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */; }; FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */; };
...@@ -93,6 +94,7 @@ ...@@ -93,6 +94,7 @@
FCA67CD7213827AC00BD58AA /* ConvAddBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */; }; FCA67CD7213827AC00BD58AA /* ConvAddBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */; };
FCA67CD92138287B00BD58AA /* ConvBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */; }; FCA67CD92138287B00BD58AA /* ConvBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */; };
FCB40E5921E0DCAB0075EC91 /* FetchKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCB40E5821E0DCAB0075EC91 /* FetchKernel.swift */; }; FCB40E5921E0DCAB0075EC91 /* FetchKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCB40E5821E0DCAB0075EC91 /* FetchKernel.swift */; };
FCB91DC221FEEE990051C6B2 /* BufferToTexture.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCB91DC121FEEE990051C6B2 /* BufferToTexture.metal */; };
FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */; }; FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */; };
FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */; }; FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */; };
FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */; }; FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */; };
...@@ -137,6 +139,8 @@ ...@@ -137,6 +139,8 @@
/* End PBXBuildFile section */ /* End PBXBuildFile section */
/* Begin PBXFileReference section */ /* Begin PBXFileReference section */
456BB7B221F5B356001474E2 /* Framework.pbobjc.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = Framework.pbobjc.m; sourceTree = "<group>"; };
456BB7B321F5B356001474E2 /* Framework.pbobjc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Framework.pbobjc.h; sourceTree = "<group>"; };
4AA1EA852146625E00D0F791 /* BilinearInterpOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BilinearInterpOp.swift; sourceTree = "<group>"; }; 4AA1EA852146625E00D0F791 /* BilinearInterpOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BilinearInterpOp.swift; sourceTree = "<group>"; };
4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BilinearInterpKernel.swift; sourceTree = "<group>"; }; 4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BilinearInterpKernel.swift; sourceTree = "<group>"; };
4AA1EA892146631C00D0F791 /* BilinearInterp.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BilinearInterp.metal; sourceTree = "<group>"; }; 4AA1EA892146631C00D0F791 /* BilinearInterp.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BilinearInterp.metal; sourceTree = "<group>"; };
...@@ -182,15 +186,14 @@ ...@@ -182,15 +186,14 @@
FC039BA620E11CBC0081E9F8 /* Operator.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Operator.swift; sourceTree = "<group>"; }; FC039BA620E11CBC0081E9F8 /* Operator.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Operator.swift; sourceTree = "<group>"; };
FC039BA720E11CBC0081E9F8 /* BatchNormOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BatchNormOp.swift; sourceTree = "<group>"; }; FC039BA720E11CBC0081E9F8 /* BatchNormOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BatchNormOp.swift; sourceTree = "<group>"; };
FC039BA820E11CBC0081E9F8 /* ReluOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ReluOp.swift; sourceTree = "<group>"; }; FC039BA820E11CBC0081E9F8 /* ReluOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ReluOp.swift; sourceTree = "<group>"; };
FC039BAF20E11CC20081E9F8 /* framework.pb.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = framework.pb.swift; sourceTree = "<group>"; };
FC039BB020E11CC20081E9F8 /* Scope.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Scope.swift; sourceTree = "<group>"; }; FC039BB020E11CC20081E9F8 /* Scope.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Scope.swift; sourceTree = "<group>"; };
FC039BB120E11CC20081E9F8 /* TensorDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TensorDesc.swift; sourceTree = "<group>"; }; FC039BB120E11CC20081E9F8 /* TensorDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TensorDesc.swift; sourceTree = "<group>"; };
FC039BB220E11CC20081E9F8 /* ProgramDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ProgramDesc.swift; sourceTree = "<group>"; }; FC039BB220E11CC20081E9F8 /* PMProgramDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PMProgramDesc.swift; sourceTree = "<group>"; };
FC039BB320E11CC20081E9F8 /* VarDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VarDesc.swift; sourceTree = "<group>"; }; FC039BB320E11CC20081E9F8 /* PMVarDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PMVarDesc.swift; sourceTree = "<group>"; };
FC039BB420E11CC20081E9F8 /* Program.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Program.swift; sourceTree = "<group>"; }; FC039BB420E11CC20081E9F8 /* Program.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Program.swift; sourceTree = "<group>"; };
FC039BB520E11CC20081E9F8 /* OpDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = OpDesc.swift; sourceTree = "<group>"; }; FC039BB520E11CC20081E9F8 /* PMOpDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PMOpDesc.swift; sourceTree = "<group>"; };
FC039BB620E11CC20081E9F8 /* Attribute.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Attribute.swift; sourceTree = "<group>"; }; FC039BB620E11CC20081E9F8 /* Attribute.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Attribute.swift; sourceTree = "<group>"; };
FC039BB720E11CC20081E9F8 /* BlockDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BlockDesc.swift; sourceTree = "<group>"; }; FC039BB720E11CC20081E9F8 /* PMBlockDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PMBlockDesc.swift; sourceTree = "<group>"; };
FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ReluKernel.swift; sourceTree = "<group>"; }; FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ReluKernel.swift; sourceTree = "<group>"; };
FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvKernel.swift; sourceTree = "<group>"; }; FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvKernel.swift; sourceTree = "<group>"; };
FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BatchNormKernel.swift; sourceTree = "<group>"; }; FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BatchNormKernel.swift; sourceTree = "<group>"; };
...@@ -229,6 +232,7 @@ ...@@ -229,6 +232,7 @@
FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddBNReluKernel.metal; sourceTree = "<group>"; }; FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddBNReluKernel.metal; sourceTree = "<group>"; };
FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvBNReluKernel.metal; sourceTree = "<group>"; }; FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvBNReluKernel.metal; sourceTree = "<group>"; };
FCB40E5821E0DCAB0075EC91 /* FetchKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FetchKernel.swift; sourceTree = "<group>"; }; FCB40E5821E0DCAB0075EC91 /* FetchKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FetchKernel.swift; sourceTree = "<group>"; };
FCB91DC121FEEE990051C6B2 /* BufferToTexture.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BufferToTexture.metal; sourceTree = "<group>"; };
FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DwConvBNReluOp.swift; sourceTree = "<group>"; }; FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DwConvBNReluOp.swift; sourceTree = "<group>"; };
FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluOp.swift; sourceTree = "<group>"; }; FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluOp.swift; sourceTree = "<group>"; };
FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluKernel.swift; sourceTree = "<group>"; }; FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluKernel.swift; sourceTree = "<group>"; };
...@@ -403,15 +407,16 @@ ...@@ -403,15 +407,16 @@
FC039BAE20E11CC20081E9F8 /* Program */ = { FC039BAE20E11CC20081E9F8 /* Program */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FC039BAF20E11CC20081E9F8 /* framework.pb.swift */, 456BB7B321F5B356001474E2 /* Framework.pbobjc.h */,
456BB7B221F5B356001474E2 /* Framework.pbobjc.m */,
FC039BB020E11CC20081E9F8 /* Scope.swift */, FC039BB020E11CC20081E9F8 /* Scope.swift */,
FC039BB120E11CC20081E9F8 /* TensorDesc.swift */, FC039BB120E11CC20081E9F8 /* TensorDesc.swift */,
FC039BB220E11CC20081E9F8 /* ProgramDesc.swift */, FC039BB220E11CC20081E9F8 /* PMProgramDesc.swift */,
FC039BB320E11CC20081E9F8 /* VarDesc.swift */, FC039BB320E11CC20081E9F8 /* PMVarDesc.swift */,
FC039BB420E11CC20081E9F8 /* Program.swift */, FC039BB420E11CC20081E9F8 /* Program.swift */,
FC039BB520E11CC20081E9F8 /* OpDesc.swift */, FC039BB520E11CC20081E9F8 /* PMOpDesc.swift */,
FC039BB620E11CC20081E9F8 /* Attribute.swift */, FC039BB620E11CC20081E9F8 /* Attribute.swift */,
FC039BB720E11CC20081E9F8 /* BlockDesc.swift */, FC039BB720E11CC20081E9F8 /* PMBlockDesc.swift */,
FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */, FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */,
); );
path = Program; path = Program;
...@@ -496,6 +501,7 @@ ...@@ -496,6 +501,7 @@
FCEB6837212F00B100D2448E /* metal */ = { FCEB6837212F00B100D2448E /* metal */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FCB91DC121FEEE990051C6B2 /* BufferToTexture.metal */,
4AF928812135673D005B6C3A /* ConcatKernel.metal */, 4AF928812135673D005B6C3A /* ConcatKernel.metal */,
4AA1EA9D2148D6F900D0F791 /* ConcatKernel.inc.metal */, 4AA1EA9D2148D6F900D0F791 /* ConcatKernel.inc.metal */,
4AF9288321357BE3005B6C3A /* Elementwise.metal */, 4AF9288321357BE3005B6C3A /* Elementwise.metal */,
...@@ -545,6 +551,7 @@ ...@@ -545,6 +551,7 @@
isa = PBXHeadersBuildPhase; isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
456BB7B521F5B356001474E2 /* Framework.pbobjc.h in Headers */,
FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */, FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
...@@ -582,7 +589,7 @@ ...@@ -582,7 +589,7 @@
TargetAttributes = { TargetAttributes = {
FC039B6920E11C3C0081E9F8 = { FC039B6920E11C3C0081E9F8 = {
CreatedOnToolsVersion = 9.3.1; CreatedOnToolsVersion = 9.3.1;
LastSwiftMigration = 0940; LastSwiftMigration = 1000;
}; };
}; };
}; };
...@@ -659,7 +666,7 @@ ...@@ -659,7 +666,7 @@
FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */, FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */,
FCE3A1B12153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal in Sources */, FCE3A1B12153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal in Sources */,
FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */, FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */,
FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */, FC039BBB20E11CC20081E9F8 /* PMProgramDesc.swift in Sources */,
FCE3A1AB2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift in Sources */, FCE3A1AB2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift in Sources */,
FC9D037920E229E4000F735A /* OpParam.swift in Sources */, FC9D037920E229E4000F735A /* OpParam.swift in Sources */,
FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */, FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */,
...@@ -670,6 +677,7 @@ ...@@ -670,6 +677,7 @@
FCA67CD52138272900BD58AA /* ConvAddMetal.metal in Sources */, FCA67CD52138272900BD58AA /* ConvAddMetal.metal in Sources */,
FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */, FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */,
4AA1EA8C2146640900D0F791 /* SplitOp.swift in Sources */, 4AA1EA8C2146640900D0F791 /* SplitOp.swift in Sources */,
FCB91DC221FEEE990051C6B2 /* BufferToTexture.metal in Sources */,
4AA1EAAC214F55C800D0F791 /* Softmax.inc.metal in Sources */, 4AA1EAAC214F55C800D0F791 /* Softmax.inc.metal in Sources */,
FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */, FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */,
4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */, 4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */,
...@@ -684,7 +692,6 @@ ...@@ -684,7 +692,6 @@
FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */, FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */,
FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */, FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */,
FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */, FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */,
FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */,
FC039B9920E11C9A0081E9F8 /* Types.swift in Sources */, FC039B9920E11C9A0081E9F8 /* Types.swift in Sources */,
FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */, FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */,
FCA3A1632132A4AC00084FE5 /* ReshapeKernel.metal in Sources */, FCA3A1632132A4AC00084FE5 /* ReshapeKernel.metal in Sources */,
...@@ -698,6 +705,7 @@ ...@@ -698,6 +705,7 @@
4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */, 4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */,
4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */, 4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */,
FC2BFCC221DF2F9100C262B2 /* GlobalConfig.swift in Sources */, FC2BFCC221DF2F9100C262B2 /* GlobalConfig.swift in Sources */,
456BB7B421F5B356001474E2 /* Framework.pbobjc.m in Sources */,
FCBCCC652122FCD700D94F7E /* TransposeOp.swift in Sources */, FCBCCC652122FCD700D94F7E /* TransposeOp.swift in Sources */,
4AA1EAA6214B5F6800D0F791 /* Shape.metal in Sources */, 4AA1EAA6214B5F6800D0F791 /* Shape.metal in Sources */,
FCD04E6E20F31B4B0007374F /* ReshapeOp.swift in Sources */, FCD04E6E20F31B4B0007374F /* ReshapeOp.swift in Sources */,
...@@ -711,7 +719,7 @@ ...@@ -711,7 +719,7 @@
FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */, FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */,
FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */, FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */,
FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */, FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */,
FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */, FC039BBC20E11CC20081E9F8 /* PMVarDesc.swift in Sources */,
FC803BC5214CB8F00094B8E5 /* ConvAddPrelu.inc.metal in Sources */, FC803BC5214CB8F00094B8E5 /* ConvAddPrelu.inc.metal in Sources */,
4AF928822135673D005B6C3A /* ConcatKernel.metal in Sources */, 4AF928822135673D005B6C3A /* ConcatKernel.metal in Sources */,
FCBCCC632122FCC000D94F7E /* TransposeKernel.swift in Sources */, FCBCCC632122FCC000D94F7E /* TransposeKernel.swift in Sources */,
...@@ -749,14 +757,14 @@ ...@@ -749,14 +757,14 @@
FCE3A1A92153DE5100C37CDE /* ConvAddAddPreluOp.swift in Sources */, FCE3A1A92153DE5100C37CDE /* ConvAddAddPreluOp.swift in Sources */,
FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */, FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */,
FCE3A1AD2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift in Sources */, FCE3A1AD2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift in Sources */,
FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */, FC039BC020E11CC20081E9F8 /* PMBlockDesc.swift in Sources */,
FC803BC3214CB79C0094B8E5 /* ConvAddPreluKernel.metal in Sources */, FC803BC3214CB79C0094B8E5 /* ConvAddPreluKernel.metal in Sources */,
4AA1EA90214664CD00D0F791 /* Split.metal in Sources */, 4AA1EA90214664CD00D0F791 /* Split.metal in Sources */,
FCD04E6820F315020007374F /* PoolKernel.swift in Sources */, FCD04E6820F315020007374F /* PoolKernel.swift in Sources */,
FC0226582138F38D00F395E2 /* PoolKernel.metal in Sources */, FC0226582138F38D00F395E2 /* PoolKernel.metal in Sources */,
FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */, FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */,
FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */, FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */,
FC039BBE20E11CC20081E9F8 /* OpDesc.swift in Sources */, FC039BBE20E11CC20081E9F8 /* PMOpDesc.swift in Sources */,
FC9797C921D6101D00F2FD90 /* ResizeBilinearOp.swift in Sources */, FC9797C921D6101D00F2FD90 /* ResizeBilinearOp.swift in Sources */,
4AA1EA88214662BD00D0F791 /* BilinearInterpKernel.swift in Sources */, 4AA1EA88214662BD00D0F791 /* BilinearInterpKernel.swift in Sources */,
FC2BFD4621DF685F00C262B2 /* Scale.swift in Sources */, FC2BFD4621DF685F00C262B2 /* Scale.swift in Sources */,
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
limitations under the License. */ limitations under the License. */
import Foundation import Foundation
import SwiftProtobuf //import SwiftProtobuf
public class Loader<P: PrecisionType> { public class Loader<P: PrecisionType> {
class ParaLoader { class ParaLoader {
...@@ -145,13 +145,17 @@ public class Loader<P: PrecisionType> { ...@@ -145,13 +145,17 @@ public class Loader<P: PrecisionType> {
public init(){} public init(){}
func loadModelandParam(_ device:MTLDevice,_ modelData:Data, _ paraLoaderPointer:ParaLoaderWithPointer?, _ paraLoader:ParaLoader?) throws -> Program { func loadModelandParam(_ device:MTLDevice,_ modelData:Data, _ paraLoaderPointer:ParaLoaderWithPointer?, _ paraLoader:ParaLoader?) throws -> Program {
do { do {
let protoProgram = try PaddleMobile_Framework_Proto_ProgramDesc.init( /// swift protobuf serialized Data to instance class
serializedData: modelData) // let protoProgram = try PaddleMobile_Framework_Proto_ProgramDesc.init(
// serializedData: modelData)
/// oc protobuf serialized Data to instance class
let protoProgram = try ProgramDesc.init(data: (modelData as NSData) as Data)
let originProgramDesc = ProgramDesc.init(protoProgram: protoProgram) let originProgramDesc = PMProgramDesc.init(protoProgram: protoProgram)
let programDesc = ProgramOptimize<P>.init().optimize(originProgramDesc: originProgramDesc) let programDesc = ProgramOptimize<P>.init().optimize(originProgramDesc: originProgramDesc)
// let programDesc = ProgramDesc.init(protoProgram: protoProgram) // let programDesc = PMProgramDesc.init(protoProgram: protoProgram)
print(programDesc) print(programDesc)
......
...@@ -27,7 +27,7 @@ class OpCreator<P: PrecisionType> { ...@@ -27,7 +27,7 @@ class OpCreator<P: PrecisionType> {
} }
} }
func creat(device: MTLDevice, opDesc: OpDesc, scope: Scope, initContext: InitContext) throws -> Runable & InferShaperable { func creat(device: MTLDevice, opDesc: PMOpDesc, scope: Scope, initContext: InitContext) throws -> Runable & InferShaperable {
guard let opCreator = opCreators[opDesc.type] else { guard let opCreator = opCreators[opDesc.type] else {
throw PaddleMobileError.opError(message: "there is no " + opDesc.type + " yet") throw PaddleMobileError.opError(message: "there is no " + opDesc.type + " yet")
} }
...@@ -39,7 +39,7 @@ class OpCreator<P: PrecisionType> { ...@@ -39,7 +39,7 @@ class OpCreator<P: PrecisionType> {
} }
} }
let opCreators: [String : (MTLDevice, OpDesc, Scope, InitContext) throws -> Runable & InferShaperable] = let opCreators: [String : (MTLDevice, PMOpDesc, Scope, InitContext) throws -> Runable & InferShaperable] =
[gConvType : ConvOp<P>.creat, [gConvType : ConvOp<P>.creat,
gBatchNormType : BatchNormOp<P>.creat, gBatchNormType : BatchNormOp<P>.creat,
gReluType : ReluOp<P>.creat, gReluType : ReluOp<P>.creat,
......
...@@ -27,7 +27,7 @@ protocol OpParam { ...@@ -27,7 +27,7 @@ protocol OpParam {
func outputDesc() -> String func outputDesc() -> String
//associatedtype ParamPrecisionType: PrecisionType //associatedtype ParamPrecisionType: PrecisionType
init(opDesc: OpDesc, inScope: Scope) throws init(opDesc: PMOpDesc, inScope: Scope) throws
static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType
static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
......
...@@ -72,11 +72,11 @@ public class InitContext { ...@@ -72,11 +72,11 @@ public class InitContext {
protocol Creator where Self: OperatorProtocol{ protocol Creator where Self: OperatorProtocol{
associatedtype OpType: OperatorProtocol & Runable & InferShaperable associatedtype OpType: OperatorProtocol & Runable & InferShaperable
static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws -> OpType static func creat(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws -> OpType
} }
extension Creator where Self: OperatorProtocol { extension Creator where Self: OperatorProtocol {
static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws -> OpType { static func creat(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws -> OpType {
do { do {
return try OpType.provide(device:device, opDesc: opDesc, inScope: inScope, initContext: initContext) return try OpType.provide(device:device, opDesc: opDesc, inScope: inScope, initContext: initContext)
} catch let error { } catch let error {
...@@ -100,11 +100,11 @@ protocol OperatorProtocol { ...@@ -100,11 +100,11 @@ protocol OperatorProtocol {
var attrs: [String : Attr] { get } var attrs: [String : Attr] { get }
var para: ParamType { get } var para: ParamType { get }
var kernel: KerType { get } var kernel: KerType { get }
init(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws init(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws
} }
extension OperatorProtocol { extension OperatorProtocol {
static func provide(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws -> Self { static func provide(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws -> Self {
do { do {
return try Self.init(device: device, opDesc: opDesc, inScope: inScope, initContext: initContext) return try Self.init(device: device, opDesc: opDesc, inScope: inScope, initContext: initContext)
} catch let error { } catch let error {
...@@ -114,7 +114,7 @@ extension OperatorProtocol { ...@@ -114,7 +114,7 @@ extension OperatorProtocol {
} }
class Operator <KernelType: Computable , ParameterType>: OperatorProtocol where KernelType.ParamType == ParameterType { class Operator <KernelType: Computable , ParameterType>: OperatorProtocol where KernelType.ParamType == ParameterType {
required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws { required init(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws {
type = opDesc.type type = opDesc.type
scope = inScope scope = inScope
inputs = opDesc.inputs inputs = opDesc.inputs
......
...@@ -17,7 +17,7 @@ import Metal ...@@ -17,7 +17,7 @@ import Metal
class BatchNormParam<P: PrecisionType>: OpParam { class BatchNormParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
input = try BatchNormParam.inputX(inputs: opDesc.inputs, from: inScope) input = try BatchNormParam.inputX(inputs: opDesc.inputs, from: inScope)
if input.transpose != [0, 2, 3, 1] { if input.transpose != [0, 2, 3, 1] {
......
...@@ -17,7 +17,7 @@ import Metal ...@@ -17,7 +17,7 @@ import Metal
class BilinearInterpParam<P: PrecisionType>: OpParam { class BilinearInterpParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
input = try BilinearInterpParam.inputX(inputs: opDesc.inputs, from: inScope) input = try BilinearInterpParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try BilinearInterpParam.outputOut(outputs: opDesc.outputs, from: inScope) output = try BilinearInterpParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
...@@ -16,7 +16,7 @@ import Foundation ...@@ -16,7 +16,7 @@ import Foundation
class BoxcoderParam<P: PrecisionType>: OpParam { class BoxcoderParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
priorBox = try BoxcoderParam.getFirstTensor(key: "PriorBox", map: opDesc.inputs, from: inScope) priorBox = try BoxcoderParam.getFirstTensor(key: "PriorBox", map: opDesc.inputs, from: inScope)
priorBoxVar = try BoxcoderParam.getFirstTensor(key: "PriorBoxVar", map: opDesc.inputs, from: inScope) priorBoxVar = try BoxcoderParam.getFirstTensor(key: "PriorBoxVar", map: opDesc.inputs, from: inScope)
......
...@@ -16,7 +16,7 @@ import Foundation ...@@ -16,7 +16,7 @@ import Foundation
class ConcatParam<P: PrecisionType>: OpParam { class ConcatParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
guard let xlist = opDesc.inputs["X"] else { guard let xlist = opDesc.inputs["X"] else {
fatalError() fatalError()
......
...@@ -17,7 +17,7 @@ import Metal ...@@ -17,7 +17,7 @@ import Metal
class ConvAddAddPreluParam<P: PrecisionType>: OpParam { class ConvAddAddPreluParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
filter = try ConvAddAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) filter = try ConvAddAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvAddAddPreluParam.input(inputs: opDesc.inputs, from: inScope) input = try ConvAddAddPreluParam.input(inputs: opDesc.inputs, from: inScope)
......
...@@ -17,7 +17,7 @@ import Foundation ...@@ -17,7 +17,7 @@ import Foundation
class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam { class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
filter = try ConvAddBatchNormReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) filter = try ConvAddBatchNormReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
......
...@@ -16,7 +16,7 @@ import Foundation ...@@ -16,7 +16,7 @@ import Foundation
class ConvAddParam<P: PrecisionType>: OpParam { class ConvAddParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
filter = try ConvAddParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) filter = try ConvAddParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvAddParam.input(inputs: opDesc.inputs, from: inScope) input = try ConvAddParam.input(inputs: opDesc.inputs, from: inScope)
......
...@@ -16,7 +16,7 @@ import Foundation ...@@ -16,7 +16,7 @@ import Foundation
class ConvAddPreluParam<P: PrecisionType>: OpParam { class ConvAddPreluParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
filter = try ConvAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) filter = try ConvAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvAddPreluParam.input(inputs: opDesc.inputs, from: inScope) input = try ConvAddPreluParam.input(inputs: opDesc.inputs, from: inScope)
......
...@@ -16,7 +16,7 @@ import Foundation ...@@ -16,7 +16,7 @@ import Foundation
class ConvBNReluParam<P: PrecisionType>: OpParam { class ConvBNReluParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
filter = try ConvBNReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) filter = try ConvBNReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvBNReluParam.input(inputs: opDesc.inputs, from: inScope) input = try ConvBNReluParam.input(inputs: opDesc.inputs, from: inScope)
......
...@@ -16,7 +16,7 @@ import Foundation ...@@ -16,7 +16,7 @@ import Foundation
class ConvParam<P: PrecisionType>: OpParam { class ConvParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
filter = try ConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope) filter = try ConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvParam.input(inputs: opDesc.inputs, from: inScope) input = try ConvParam.input(inputs: opDesc.inputs, from: inScope)
......
...@@ -16,7 +16,7 @@ import Foundation ...@@ -16,7 +16,7 @@ import Foundation
class ConvTransposeParam<P: PrecisionType>: ConvParam<P> { class ConvTransposeParam<P: PrecisionType>: ConvParam<P> {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
try super.init(opDesc: opDesc, inScope: inScope) try super.init(opDesc: opDesc, inScope: inScope)
} catch let error { } catch let error {
......
...@@ -17,7 +17,7 @@ import Metal ...@@ -17,7 +17,7 @@ import Metal
class ElementwiseAddParam<P: PrecisionType>: OpParam { class ElementwiseAddParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
inputX = try ElementwiseAddParam.inputX(inputs: opDesc.inputs, from: inScope) inputX = try ElementwiseAddParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try ElementwiseAddParam.outputOut(outputs: opDesc.outputs, from: inScope) output = try ElementwiseAddParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
...@@ -17,7 +17,7 @@ import Metal ...@@ -17,7 +17,7 @@ import Metal
class ElementwiseAddPreluParam<P: PrecisionType>: OpParam { class ElementwiseAddPreluParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
alpha = try ElementwiseAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope) alpha = try ElementwiseAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope)
mode = try ElementwiseAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs) mode = try ElementwiseAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs)
......
...@@ -23,7 +23,7 @@ class FeedParam<P: PrecisionType>: OpParam{ ...@@ -23,7 +23,7 @@ class FeedParam<P: PrecisionType>: OpParam{
} }
let scope: Scope let scope: Scope
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
scope = inScope scope = inScope
do { do {
output = try FeedParam.outputOut(outputs: opDesc.outputs, from: inScope) output = try FeedParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
...@@ -19,7 +19,7 @@ class FetchParam<P: PrecisionType>: OpParam{ ...@@ -19,7 +19,7 @@ class FetchParam<P: PrecisionType>: OpParam{
var output: FetchHolder var output: FetchHolder
let input: Texture let input: Texture
let scope: Scope let scope: Scope
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
scope = inScope scope = inScope
do { do {
input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope) input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope)
......
...@@ -16,7 +16,7 @@ import Foundation ...@@ -16,7 +16,7 @@ import Foundation
class FlattenParam<P: PrecisionType>: OpParam { class FlattenParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
input = try FlattenParam.inputX(inputs: opDesc.inputs, from: inScope) input = try FlattenParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try FlattenParam.outputOut(outputs: opDesc.outputs, from: inScope) output = try FlattenParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
//
// RGBToYCrCb_Y.metal
// paddle-mobile-demo
//
// Created by liuRuiLong on 2018/12/28.
// Copyright © 2018 orange. All rights reserved.
//
#include <metal_stdlib>
using namespace metal;
kernel void buffer_to_texture_kernel(
const device float *input [[buffer(0)]],
texture2d<float, access::write> outTexture [[texture(0)]],
uint2 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
float y = input[outTexture.get_width() * gid.y + gid.x];
outTexture.write(float4(y, 0.0f, 0.0f, 0.0f), gid);
}
kernel void buffer_to_texture_kernel_half(const device float *input [[buffer(0)]],
texture2d<half, access::write> outTexture [[texture(0)]],
uint2 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
float y = input[outTexture.get_width() * gid.y + gid.x];
outTexture.write(half4(y, 0.0f, 0.0f, 0.0f), gid);
}
...@@ -16,7 +16,7 @@ import Foundation ...@@ -16,7 +16,7 @@ import Foundation
class MulticlassNMSParam<P: PrecisionType>: OpParam { class MulticlassNMSParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
scores = try MulticlassNMSParam.getFirstTensor(key: "Scores", map: opDesc.inputs, from: inScope) scores = try MulticlassNMSParam.getFirstTensor(key: "Scores", map: opDesc.inputs, from: inScope)
bboxes = try MulticlassNMSParam.getFirstTensor(key: "BBoxes", map: opDesc.inputs, from: inScope) bboxes = try MulticlassNMSParam.getFirstTensor(key: "BBoxes", map: opDesc.inputs, from: inScope)
......
...@@ -16,7 +16,7 @@ import Foundation ...@@ -16,7 +16,7 @@ import Foundation
class PoolParam<P: PrecisionType>: OpParam { class PoolParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
input = try PoolParam.inputX(inputs: opDesc.inputs, from: inScope) input = try PoolParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try PoolParam.outputOut(outputs: opDesc.outputs, from: inScope) output = try PoolParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
...@@ -16,7 +16,7 @@ import Foundation ...@@ -16,7 +16,7 @@ import Foundation
class PreluParam<P: PrecisionType>: OpParam { class PreluParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
input = try PreluParam.inputX(inputs: opDesc.inputs, from: inScope) input = try PreluParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try PreluParam.outputOut(outputs: opDesc.outputs, from: inScope) output = try PreluParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
...@@ -16,7 +16,7 @@ import Foundation ...@@ -16,7 +16,7 @@ import Foundation
class PriorBoxParam<P: PrecisionType>: OpParam { class PriorBoxParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
min_max_aspect_ratios_order = try PriorBoxParam.getAttr(key: "min_max_aspect_ratios_order", attrs: opDesc.attrs) min_max_aspect_ratios_order = try PriorBoxParam.getAttr(key: "min_max_aspect_ratios_order", attrs: opDesc.attrs)
} catch _ { } catch _ {
......
...@@ -17,7 +17,7 @@ import Foundation ...@@ -17,7 +17,7 @@ import Foundation
class ReluParam<P: PrecisionType>: OpParam { class ReluParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
input = try ReluParam.inputX(inputs: opDesc.inputs, from: inScope) input = try ReluParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try ReluParam.outputOut(outputs: opDesc.outputs, from: inScope) output = try ReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
...@@ -17,7 +17,7 @@ import Metal ...@@ -17,7 +17,7 @@ import Metal
class ReshapeParam<P: PrecisionType>: OpParam { class ReshapeParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
input = try ReshapeParam.inputX(inputs: opDesc.inputs, from: inScope) input = try ReshapeParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try ReshapeParam.outputOut(outputs: opDesc.outputs, from: inScope) output = try ReshapeParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
...@@ -16,7 +16,7 @@ import Foundation ...@@ -16,7 +16,7 @@ import Foundation
class ResizeBilinearParam<P: PrecisionType>: OpParam { class ResizeBilinearParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
input = try ResizeBilinearParam.inputX(inputs: opDesc.inputs, from: inScope) input = try ResizeBilinearParam.inputX(inputs: opDesc.inputs, from: inScope)
// if (input.transpose != [0, 2, 3, 1]) || (input.tensorDim.cout() != 4) { // if (input.transpose != [0, 2, 3, 1]) || (input.tensorDim.cout() != 4) {
......
...@@ -16,7 +16,7 @@ import Foundation ...@@ -16,7 +16,7 @@ import Foundation
class ShapeParam<P: PrecisionType>: OpParam { class ShapeParam<P: PrecisionType>: OpParam {
// typealias ParamPrecisionType = P // typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
input = try ShapeParam.input(inputs: opDesc.inputs, from: inScope) input = try ShapeParam.input(inputs: opDesc.inputs, from: inScope)
output = try ShapeParam.outputOut(outputs: opDesc.outputs, from: inScope) output = try ShapeParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
...@@ -17,7 +17,7 @@ import Metal ...@@ -17,7 +17,7 @@ import Metal
class SoftmaxParam<P: PrecisionType>: OpParam { class SoftmaxParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
input = try SoftmaxParam.inputX(inputs: opDesc.inputs, from: inScope) input = try SoftmaxParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try SoftmaxParam.outputOut(outputs: opDesc.outputs, from: inScope) output = try SoftmaxParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
...@@ -16,7 +16,7 @@ import Foundation ...@@ -16,7 +16,7 @@ import Foundation
class SplitParam<P: PrecisionType>: OpParam { class SplitParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
input = try SplitParam.inputX(inputs: opDesc.inputs, from: inScope) input = try SplitParam.inputX(inputs: opDesc.inputs, from: inScope)
output = Texture.init(device: input.metalTexture!.device, inDim: input.dim) output = Texture.init(device: input.metalTexture!.device, inDim: input.dim)
......
...@@ -17,7 +17,7 @@ import Metal ...@@ -17,7 +17,7 @@ import Metal
class TransposeParam<P: PrecisionType>: OpParam { class TransposeParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P //typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: PMOpDesc, inScope: Scope) throws {
do { do {
input = try TransposeParam.inputX(inputs: opDesc.inputs, from: inScope) input = try TransposeParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try TransposeParam.outputOut(outputs: opDesc.outputs, from: inScope) output = try TransposeParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
...@@ -35,7 +35,11 @@ extension Array: Attr { ...@@ -35,7 +35,11 @@ extension Array: Attr {
extension String: Attr { extension String: Attr {
} }
func attrWithProtoDesc(attrDesc: PaddleMobile_Framework_Proto_OpDesc.Attr) -> Attr { extension NSMutableArray :Attr {
}
func attrWithProtoDesc(attrDesc: OpDesc_Attr) -> Attr {
switch attrDesc.type { switch attrDesc.type {
case .boolean: case .boolean:
return attrDesc.b return attrDesc.b
...@@ -47,14 +51,33 @@ func attrWithProtoDesc(attrDesc: PaddleMobile_Framework_Proto_OpDesc.Attr) -> At ...@@ -47,14 +51,33 @@ func attrWithProtoDesc(attrDesc: PaddleMobile_Framework_Proto_OpDesc.Attr) -> At
return attrDesc.l return attrDesc.l
case .float: case .float:
return attrDesc.f return attrDesc.f
/// convert GPB class to swift class
case .booleans: case .booleans:
return attrDesc.bools var dimsArray = [Bool]()
let dimsCount = attrDesc.boolsArray.count
for i in 0..<dimsCount {
let dim = Bool(attrDesc.boolsArray.value(at: i))
dimsArray.append(dim)
}
return dimsArray
case .floats: case .floats:
return attrDesc.floats var dimsArray = [Float]()
let dimsCount = attrDesc.floatsArray.count
for i in 0..<dimsCount {
let dim = Float(attrDesc.floatsArray.value(at: i))
dimsArray.append(dim)
}
return dimsArray
case .ints: case .ints:
return attrDesc.ints var dimsArray = [Int32]()
let dimsCount = attrDesc.intsArray.count
for i in 0..<dimsCount {
let dim = Int32(attrDesc.intsArray.value(at: i))
dimsArray.append(dim)
}
return dimsArray
case .strings: case .strings:
return attrDesc.strings return attrDesc.stringsArray
default: default:
fatalError(" not support this attr type: \(attrDesc.type)") fatalError(" not support this attr type: \(attrDesc.type)")
} }
......
// Generated by the protocol buffer compiler. DO NOT EDIT!
// source: Framework.proto
// This CPP symbol can be defined to use imports that match up to the framework
// imports needed when using CocoaPods.
#if !defined(GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS)
#define GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS 0
#endif
//#if GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS
// #import <Protobuf/GPBProtocolBuffers.h>
//#else
// #import "GPBProtocolBuffers.h"
//#endif
#if GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS
#import <Protobuf/GPBProtocolBuffers.h>
#else
#import <ProtocolBuffers/ProtocolBuffers.h>
#endif
#if GOOGLE_PROTOBUF_OBJC_GEN_VERSION != 30001
#error This file was generated by a different version of protoc which is incompatible with your Protocol Buffer library sources.
#endif
// @@protoc_insertion_point(imports)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
CF_EXTERN_C_BEGIN
@class BlockDesc;
@class OpDesc;
@class OpDesc_Attr;
@class OpDesc_Var;
@class OpProto_Attr;
@class OpProto_Var;
@class VarDesc;
@class VarType;
@class VarType_ChannelDesc;
@class VarType_LoDTensorArrayDesc;
@class VarType_LoDTensorDesc;
@class VarType_ReaderDesc;
@class VarType_TensorDesc;
@class VarType_Tuple;
@class Version;
NS_ASSUME_NONNULL_BEGIN
#pragma mark - Enum AttrType
typedef GPB_ENUM(AttrType) {
AttrType_Int = 0,
AttrType_Float = 1,
AttrType_String = 2,
AttrType_Ints = 3,
AttrType_Floats = 4,
AttrType_Strings = 5,
AttrType_Boolean = 6,
AttrType_Booleans = 7,
AttrType_Block = 8,
AttrType_Long = 9,
AttrType_Blocks = 10,
};
GPBEnumDescriptor *AttrType_EnumDescriptor(void);
/// Checks to see if the given value is defined by the enum or was not known at
/// the time this source was generated.
BOOL AttrType_IsValidValue(int32_t value);
#pragma mark - Enum VarType_Type
typedef GPB_ENUM(VarType_Type) {
/// Pod Types
VarType_Type_Bool = 0,
VarType_Type_Int16 = 1,
VarType_Type_Int32 = 2,
VarType_Type_Int64 = 3,
VarType_Type_Fp16 = 4,
VarType_Type_Fp32 = 5,
VarType_Type_Fp64 = 6,
/// Tensor<size_t> is used in C++.
VarType_Type_SizeT = 19,
VarType_Type_Uint8 = 20,
VarType_Type_Int8 = 21,
/// Other types that may need additional descriptions
VarType_Type_LodTensor = 7,
VarType_Type_SelectedRows = 8,
VarType_Type_FeedMinibatch = 9,
VarType_Type_FetchList = 10,
VarType_Type_StepScopes = 11,
VarType_Type_LodRankTable = 12,
VarType_Type_LodTensorArray = 13,
VarType_Type_PlaceList = 14,
VarType_Type_Reader = 15,
VarType_Type_Channel = 16,
/// Any runtime decided variable type is raw
/// raw variables should manage their own allocations
/// in operators like nccl_op
VarType_Type_Raw = 17,
VarType_Type_Tuple = 18,
};
GPBEnumDescriptor *VarType_Type_EnumDescriptor(void);
/// Checks to see if the given value is defined by the enum or was not known at
/// the time this source was generated.
BOOL VarType_Type_IsValidValue(int32_t value);
#pragma mark - FrameworkRoot
/// Exposes the extension registry for this file.
///
/// The base class provides:
/// @code
/// + (GPBExtensionRegistry *)extensionRegistry;
/// @endcode
/// which is a @c GPBExtensionRegistry that includes all the extensions defined by
/// this file and all files that it depends on.
@interface FrameworkRoot : GPBRootObject
@end
#pragma mark - Version
typedef GPB_ENUM(Version_FieldNumber) {
Version_FieldNumber_Version = 1,
};
/// Any incompatible changes to ProgramDesc and its dependencies should
/// raise the version defined version.h.
///
/// Serailization and Deserialization codes should be modified in a way
/// that supports old versions following the version and compatibility policy.
@interface Version : GPBMessage
@property(nonatomic, readwrite) int64_t version;
@property(nonatomic, readwrite) BOOL hasVersion;
@end
#pragma mark - OpDesc
typedef GPB_ENUM(OpDesc_FieldNumber) {
OpDesc_FieldNumber_InputsArray = 1,
OpDesc_FieldNumber_OutputsArray = 2,
OpDesc_FieldNumber_Type = 3,
OpDesc_FieldNumber_AttrsArray = 4,
OpDesc_FieldNumber_IsTarget = 5,
};
/// OpDesc describes an instance of a C++ framework::OperatorBase
/// derived class type.
@interface OpDesc : GPBMessage
@property(nonatomic, readwrite, copy, null_resettable) NSString *type;
/// Test to see if @c type has been set.
@property(nonatomic, readwrite) BOOL hasType;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpDesc_Var*> *inputsArray;
/// The number of items in @c inputsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger inputsArray_Count;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpDesc_Var*> *outputsArray;
/// The number of items in @c outputsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger outputsArray_Count;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpDesc_Attr*> *attrsArray;
/// The number of items in @c attrsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger attrsArray_Count;
@property(nonatomic, readwrite) BOOL isTarget;
@property(nonatomic, readwrite) BOOL hasIsTarget;
@end
#pragma mark - OpDesc_Attr
typedef GPB_ENUM(OpDesc_Attr_FieldNumber) {
OpDesc_Attr_FieldNumber_Name = 1,
OpDesc_Attr_FieldNumber_Type = 2,
OpDesc_Attr_FieldNumber_I = 3,
OpDesc_Attr_FieldNumber_F = 4,
OpDesc_Attr_FieldNumber_S = 5,
OpDesc_Attr_FieldNumber_IntsArray = 6,
OpDesc_Attr_FieldNumber_FloatsArray = 7,
OpDesc_Attr_FieldNumber_StringsArray = 8,
OpDesc_Attr_FieldNumber_B = 10,
OpDesc_Attr_FieldNumber_BoolsArray = 11,
OpDesc_Attr_FieldNumber_BlockIdx = 12,
OpDesc_Attr_FieldNumber_L = 13,
OpDesc_Attr_FieldNumber_BlocksIdxArray = 14,
};
@interface OpDesc_Attr : GPBMessage
@property(nonatomic, readwrite, copy, null_resettable) NSString *name;
/// Test to see if @c name has been set.
@property(nonatomic, readwrite) BOOL hasName;
@property(nonatomic, readwrite) AttrType type;
@property(nonatomic, readwrite) BOOL hasType;
@property(nonatomic, readwrite) int32_t i;
@property(nonatomic, readwrite) BOOL hasI;
@property(nonatomic, readwrite) float f;
@property(nonatomic, readwrite) BOOL hasF;
@property(nonatomic, readwrite, copy, null_resettable) NSString *s;
/// Test to see if @c s has been set.
@property(nonatomic, readwrite) BOOL hasS;
@property(nonatomic, readwrite, strong, null_resettable) GPBInt32Array *intsArray;
/// The number of items in @c intsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger intsArray_Count;
@property(nonatomic, readwrite, strong, null_resettable) GPBFloatArray *floatsArray;
/// The number of items in @c floatsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger floatsArray_Count;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<NSString*> *stringsArray;
/// The number of items in @c stringsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger stringsArray_Count;
@property(nonatomic, readwrite) BOOL b;
@property(nonatomic, readwrite) BOOL hasB;
@property(nonatomic, readwrite, strong, null_resettable) GPBBoolArray *boolsArray;
/// The number of items in @c boolsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger boolsArray_Count;
@property(nonatomic, readwrite) int32_t blockIdx;
@property(nonatomic, readwrite) BOOL hasBlockIdx;
@property(nonatomic, readwrite) int64_t l;
@property(nonatomic, readwrite) BOOL hasL;
@property(nonatomic, readwrite, strong, null_resettable) GPBInt32Array *blocksIdxArray;
/// The number of items in @c blocksIdxArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger blocksIdxArray_Count;
@end
#pragma mark - OpDesc_Var
typedef GPB_ENUM(OpDesc_Var_FieldNumber) {
OpDesc_Var_FieldNumber_Parameter = 1,
OpDesc_Var_FieldNumber_ArgumentsArray = 2,
};
@interface OpDesc_Var : GPBMessage
@property(nonatomic, readwrite, copy, null_resettable) NSString *parameter;
/// Test to see if @c parameter has been set.
@property(nonatomic, readwrite) BOOL hasParameter;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<NSString*> *argumentsArray;
/// The number of items in @c argumentsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger argumentsArray_Count;
@end
#pragma mark - OpProto
typedef GPB_ENUM(OpProto_FieldNumber) {
OpProto_FieldNumber_Type = 1,
OpProto_FieldNumber_InputsArray = 2,
OpProto_FieldNumber_OutputsArray = 3,
OpProto_FieldNumber_AttrsArray = 4,
OpProto_FieldNumber_Comment = 5,
};
/// OpProto describes a C++ framework::OperatorBase derived class.
@interface OpProto : GPBMessage
@property(nonatomic, readwrite, copy, null_resettable) NSString *type;
/// Test to see if @c type has been set.
@property(nonatomic, readwrite) BOOL hasType;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpProto_Var*> *inputsArray;
/// The number of items in @c inputsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger inputsArray_Count;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpProto_Var*> *outputsArray;
/// The number of items in @c outputsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger outputsArray_Count;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpProto_Attr*> *attrsArray;
/// The number of items in @c attrsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger attrsArray_Count;
@property(nonatomic, readwrite, copy, null_resettable) NSString *comment;
/// Test to see if @c comment has been set.
@property(nonatomic, readwrite) BOOL hasComment;
@end
#pragma mark - OpProto_Var
typedef GPB_ENUM(OpProto_Var_FieldNumber) {
OpProto_Var_FieldNumber_Name = 1,
OpProto_Var_FieldNumber_Comment = 2,
OpProto_Var_FieldNumber_Duplicable = 3,
OpProto_Var_FieldNumber_Intermediate = 4,
OpProto_Var_FieldNumber_Dispensable = 5,
OpProto_Var_FieldNumber_Reuse = 6,
};
/// VarProto describes the C++ type framework::Variable.
@interface OpProto_Var : GPBMessage
@property(nonatomic, readwrite, copy, null_resettable) NSString *name;
/// Test to see if @c name has been set.
@property(nonatomic, readwrite) BOOL hasName;
@property(nonatomic, readwrite, copy, null_resettable) NSString *comment;
/// Test to see if @c comment has been set.
@property(nonatomic, readwrite) BOOL hasComment;
@property(nonatomic, readwrite) BOOL duplicable;
@property(nonatomic, readwrite) BOOL hasDuplicable;
@property(nonatomic, readwrite) BOOL intermediate;
@property(nonatomic, readwrite) BOOL hasIntermediate;
@property(nonatomic, readwrite) BOOL dispensable;
@property(nonatomic, readwrite) BOOL hasDispensable;
@property(nonatomic, readwrite, copy, null_resettable) NSString *reuse;
/// Test to see if @c reuse has been set.
@property(nonatomic, readwrite) BOOL hasReuse;
@end
#pragma mark - OpProto_Attr
typedef GPB_ENUM(OpProto_Attr_FieldNumber) {
OpProto_Attr_FieldNumber_Name = 1,
OpProto_Attr_FieldNumber_Type = 2,
OpProto_Attr_FieldNumber_Comment = 3,
OpProto_Attr_FieldNumber_Generated = 4,
};
/// AttrProto describes the C++ type Attribute.
@interface OpProto_Attr : GPBMessage
@property(nonatomic, readwrite, copy, null_resettable) NSString *name;
/// Test to see if @c name has been set.
@property(nonatomic, readwrite) BOOL hasName;
@property(nonatomic, readwrite) AttrType type;
@property(nonatomic, readwrite) BOOL hasType;
@property(nonatomic, readwrite, copy, null_resettable) NSString *comment;
/// Test to see if @c comment has been set.
@property(nonatomic, readwrite) BOOL hasComment;
/// If that attribute is generated, it means the Paddle third
/// language binding has responsibility to fill that
/// attribute. End-User should not set that attribute.
@property(nonatomic, readwrite) BOOL generated;
@property(nonatomic, readwrite) BOOL hasGenerated;
@end
#pragma mark - VarType
typedef GPB_ENUM(VarType_FieldNumber) {
VarType_FieldNumber_Type = 1,
VarType_FieldNumber_SelectedRows = 2,
VarType_FieldNumber_LodTensor = 3,
VarType_FieldNumber_TensorArray_p = 4,
VarType_FieldNumber_Reader = 5,
VarType_FieldNumber_Channel = 6,
VarType_FieldNumber_Tuple = 7,
};
@interface VarType : GPBMessage
@property(nonatomic, readwrite) VarType_Type type;
@property(nonatomic, readwrite) BOOL hasType;
@property(nonatomic, readwrite, strong, null_resettable) VarType_TensorDesc *selectedRows;
/// Test to see if @c selectedRows has been set.
@property(nonatomic, readwrite) BOOL hasSelectedRows;
@property(nonatomic, readwrite, strong, null_resettable) VarType_LoDTensorDesc *lodTensor;
/// Test to see if @c lodTensor has been set.
@property(nonatomic, readwrite) BOOL hasLodTensor;
@property(nonatomic, readwrite, strong, null_resettable) VarType_LoDTensorArrayDesc *tensorArray_p;
/// Test to see if @c tensorArray_p has been set.
@property(nonatomic, readwrite) BOOL hasTensorArray_p;
@property(nonatomic, readwrite, strong, null_resettable) VarType_ReaderDesc *reader;
/// Test to see if @c reader has been set.
@property(nonatomic, readwrite) BOOL hasReader;
@property(nonatomic, readwrite, strong, null_resettable) VarType_ChannelDesc *channel;
/// Test to see if @c channel has been set.
@property(nonatomic, readwrite) BOOL hasChannel;
@property(nonatomic, readwrite, strong, null_resettable) VarType_Tuple *tuple;
/// Test to see if @c tuple has been set.
@property(nonatomic, readwrite) BOOL hasTuple;
@end
#pragma mark - VarType_TensorDesc
typedef GPB_ENUM(VarType_TensorDesc_FieldNumber) {
VarType_TensorDesc_FieldNumber_DataType = 1,
VarType_TensorDesc_FieldNumber_DimsArray = 2,
};
@interface VarType_TensorDesc : GPBMessage
/// Should only be PODType. Is enforced in C++
@property(nonatomic, readwrite) VarType_Type dataType;
@property(nonatomic, readwrite) BOOL hasDataType;
/// [UNK, 640, 480] is saved as [-1, 640, 480]
@property(nonatomic, readwrite, strong, null_resettable) GPBInt64Array *dimsArray;
/// The number of items in @c dimsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger dimsArray_Count;
@end
#pragma mark - VarType_LoDTensorDesc
typedef GPB_ENUM(VarType_LoDTensorDesc_FieldNumber) {
VarType_LoDTensorDesc_FieldNumber_Tensor = 1,
VarType_LoDTensorDesc_FieldNumber_LodLevel = 2,
};
@interface VarType_LoDTensorDesc : GPBMessage
@property(nonatomic, readwrite, strong, null_resettable) VarType_TensorDesc *tensor;
/// Test to see if @c tensor has been set.
@property(nonatomic, readwrite) BOOL hasTensor;
@property(nonatomic, readwrite) int32_t lodLevel;
@property(nonatomic, readwrite) BOOL hasLodLevel;
@end
#pragma mark - VarType_LoDTensorArrayDesc
typedef GPB_ENUM(VarType_LoDTensorArrayDesc_FieldNumber) {
VarType_LoDTensorArrayDesc_FieldNumber_Tensor = 1,
VarType_LoDTensorArrayDesc_FieldNumber_LodLevel = 2,
};
@interface VarType_LoDTensorArrayDesc : GPBMessage
@property(nonatomic, readwrite, strong, null_resettable) VarType_TensorDesc *tensor;
/// Test to see if @c tensor has been set.
@property(nonatomic, readwrite) BOOL hasTensor;
@property(nonatomic, readwrite) int32_t lodLevel;
@property(nonatomic, readwrite) BOOL hasLodLevel;
@end
#pragma mark - VarType_ReaderDesc
typedef GPB_ENUM(VarType_ReaderDesc_FieldNumber) {
VarType_ReaderDesc_FieldNumber_LodTensorArray = 1,
};
@interface VarType_ReaderDesc : GPBMessage
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<VarType_LoDTensorDesc*> *lodTensorArray;
/// The number of items in @c lodTensorArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger lodTensorArray_Count;
@end
#pragma mark - VarType_ChannelDesc
typedef GPB_ENUM(VarType_ChannelDesc_FieldNumber) {
VarType_ChannelDesc_FieldNumber_DataType = 1,
VarType_ChannelDesc_FieldNumber_Capacity = 2,
};
@interface VarType_ChannelDesc : GPBMessage
@property(nonatomic, readwrite) VarType_Type dataType;
@property(nonatomic, readwrite) BOOL hasDataType;
@property(nonatomic, readwrite) int64_t capacity;
@property(nonatomic, readwrite) BOOL hasCapacity;
@end
#pragma mark - VarType_Tuple
typedef GPB_ENUM(VarType_Tuple_FieldNumber) {
VarType_Tuple_FieldNumber_ElementTypeArray = 1,
};
@interface VarType_Tuple : GPBMessage
// |elementTypeArray| contains |VarType_Type|
@property(nonatomic, readwrite, strong, null_resettable) GPBEnumArray *elementTypeArray;
/// The number of items in @c elementTypeArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger elementTypeArray_Count;
@end
#pragma mark - VarDesc
typedef GPB_ENUM(VarDesc_FieldNumber) {
VarDesc_FieldNumber_Name = 1,
VarDesc_FieldNumber_Type = 2,
VarDesc_FieldNumber_Persistable = 3,
};
@interface VarDesc : GPBMessage
@property(nonatomic, readwrite, copy, null_resettable) NSString *name;
/// Test to see if @c name has been set.
@property(nonatomic, readwrite) BOOL hasName;
@property(nonatomic, readwrite, strong, null_resettable) VarType *type;
/// Test to see if @c type has been set.
@property(nonatomic, readwrite) BOOL hasType;
@property(nonatomic, readwrite) BOOL persistable;
@property(nonatomic, readwrite) BOOL hasPersistable;
@end
#pragma mark - BlockDesc
typedef GPB_ENUM(BlockDesc_FieldNumber) {
BlockDesc_FieldNumber_Idx = 1,
BlockDesc_FieldNumber_ParentIdx = 2,
BlockDesc_FieldNumber_VarsArray = 3,
BlockDesc_FieldNumber_OpsArray = 4,
BlockDesc_FieldNumber_ForwardBlockIdx = 5,
};
@interface BlockDesc : GPBMessage
@property(nonatomic, readwrite) int32_t idx;
@property(nonatomic, readwrite) BOOL hasIdx;
@property(nonatomic, readwrite) int32_t parentIdx;
@property(nonatomic, readwrite) BOOL hasParentIdx;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<VarDesc*> *varsArray;
/// The number of items in @c varsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger varsArray_Count;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpDesc*> *opsArray;
/// The number of items in @c opsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger opsArray_Count;
@property(nonatomic, readwrite) int32_t forwardBlockIdx;
@property(nonatomic, readwrite) BOOL hasForwardBlockIdx;
@end
#pragma mark - ProgramDesc
typedef GPB_ENUM(ProgramDesc_FieldNumber) {
ProgramDesc_FieldNumber_BlocksArray = 1,
ProgramDesc_FieldNumber_Version = 2,
};
/// Please refer to
/// https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/program.md
/// for more details.
/// TODO(panyx0718): A model can have multiple programs. Need a
/// way to distinguish them. Maybe ID or name?
@interface ProgramDesc : GPBMessage
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<BlockDesc*> *blocksArray;
/// The number of items in @c blocksArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger blocksArray_Count;
@property(nonatomic, readwrite, strong, null_resettable) Version *version;
/// Test to see if @c version has been set.
@property(nonatomic, readwrite) BOOL hasVersion;
@end
NS_ASSUME_NONNULL_END
CF_EXTERN_C_END
#pragma clang diagnostic pop
// @@protoc_insertion_point(global_scope)
...@@ -14,28 +14,28 @@ ...@@ -14,28 +14,28 @@
import Foundation import Foundation
public class BlockDesc { public class PMBlockDesc {
let index: Int let index: Int
let parentIndex: Int let parentIndex: Int
public let vars: [VarDesc] public let vars: [PMVarDesc]
let ops: [OpDesc] let ops: [PMOpDesc]
init(block: PaddleMobile_Framework_Proto_BlockDesc) { init(block: BlockDesc) {
index = Int(block.idx) index = Int(block.idx)
parentIndex = Int(block.parentIdx) parentIndex = Int(block.parentIdx)
var vars: [VarDesc] = [] var vars: [PMVarDesc] = []
for varOfBlock in block.vars { for varOfBlock in block.varsArray {
vars.append(VarDesc.init(protoVarDesc: varOfBlock)) vars.append(PMVarDesc.init(protoVarDesc: varOfBlock as! VarDesc))
} }
vars.sort { $0.name < $1.name } vars.sort { $0.name < $1.name }
self.vars = vars self.vars = vars
var ops: [OpDesc] = [] var ops: [PMOpDesc] = []
for op in block.ops { for op in block.opsArray {
ops.append(OpDesc.init(protoOpDesc: op)) ops.append(PMOpDesc.init(protoOpDesc: op as! OpDesc))
} }
self.ops = ops self.ops = ops
} }
init(inVars: [VarDesc], inOps: [OpDesc]) { init(inVars: [PMVarDesc], inOps: [PMOpDesc]) {
vars = inVars vars = inVars
ops = inOps ops = inOps
index = 0 index = 0
...@@ -44,7 +44,7 @@ public class BlockDesc { ...@@ -44,7 +44,7 @@ public class BlockDesc {
} }
extension BlockDesc: CustomStringConvertible, CustomDebugStringConvertible { extension PMBlockDesc: CustomStringConvertible, CustomDebugStringConvertible {
public var description: String { public var description: String {
var str = "" var str = ""
......
...@@ -14,50 +14,50 @@ ...@@ -14,50 +14,50 @@
import Foundation import Foundation
class OpDesc { class PMOpDesc {
let inputs: [String : [String]] let inputs: [String : [String]]
var paraInputs: [String : [String]] var paraInputs: [String : [String]]
var outputs: [String : [String]] var outputs: [String : [String]]
let unusedOutputs: [String : [String]] let unusedOutputs: [String : [String]]
var attrs: [String : Attr] = [:] var attrs: [String : Attr] = [:]
var type: String var type: String
init(protoOpDesc: PaddleMobile_Framework_Proto_OpDesc) { init(protoOpDesc: OpDesc) {
type = protoOpDesc.type type = protoOpDesc.type
let creator = { (vars: [PaddleMobile_Framework_Proto_OpDesc.Var], canAdd: (String) -> Bool) -> [String : [String]] in let creator = { (vars: [OpDesc_Var], canAdd: (String) -> Bool) -> [String : [String]] in
var map: [String : [String]] = [:] var map: [String : [String]] = [:]
for opDescVar in vars { for opDescVar in vars {
if (canAdd(opDescVar.parameter)) { if (canAdd(opDescVar.parameter)) {
map[opDescVar.parameter] = opDescVar.arguments map[opDescVar.parameter] = opDescVar.argumentsArray as? [String]
}
}
return map
}
inputs = creator(protoOpDesc.inputsArray as! [OpDesc_Var]) {
opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false
}
paraInputs = creator(protoOpDesc.inputsArray as! [OpDesc_Var]) {
!(opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false)
}
outputs = creator(protoOpDesc.outputsArray as! [OpDesc_Var]) {
opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false
}
unusedOutputs = creator(protoOpDesc.outputsArray as! [OpDesc_Var]) {
!(opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false)
}
for attr in protoOpDesc.attrsArray {
if ((attr as! OpDesc_Attr).type != .block) {
attrs[(attr as! OpDesc_Attr).name] = attrWithProtoDesc(attrDesc: attr as! OpDesc_Attr)
}
} }
}
return map
}
inputs = creator(protoOpDesc.inputs) {
opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false
}
paraInputs = creator(protoOpDesc.inputs) {
!(opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false)
}
outputs = creator(protoOpDesc.outputs) {
opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false
}
unusedOutputs = creator(protoOpDesc.outputs) {
!(opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false)
}
for attr in protoOpDesc.attrs {
if (attr.type != .block) {
attrs[attr.name] = attrWithProtoDesc(attrDesc: attr)
}
} }
}
} }
extension OpDesc: CustomStringConvertible, CustomDebugStringConvertible { extension PMOpDesc: CustomStringConvertible, CustomDebugStringConvertible {
var description: String { var description: String {
var str = "" var str = ""
str += "op type: \(type): \n" str += "op type: \(type): \n"
......
...@@ -14,11 +14,11 @@ ...@@ -14,11 +14,11 @@
import Foundation import Foundation
public class ProgramDesc { public class PMProgramDesc {
public var blocks: [BlockDesc] = [] public var blocks: [PMBlockDesc] = []
init(protoProgram: PaddleMobile_Framework_Proto_ProgramDesc) { init(protoProgram: ProgramDesc) {
for block in protoProgram.blocks { for block in protoProgram.blocksArray {
self.blocks.append(BlockDesc.init(block: block)) self.blocks.append(PMBlockDesc.init(block: block as! BlockDesc))
} }
} }
...@@ -26,7 +26,7 @@ public class ProgramDesc { ...@@ -26,7 +26,7 @@ public class ProgramDesc {
} }
} }
extension ProgramDesc: CustomStringConvertible, CustomDebugStringConvertible { extension PMProgramDesc: CustomStringConvertible, CustomDebugStringConvertible {
public var description: String { public var description: String {
var str: String = "" var str: String = ""
for i in 0..<blocks.count { for i in 0..<blocks.count {
......
...@@ -56,13 +56,13 @@ public enum VarTypeType: Int { ...@@ -56,13 +56,13 @@ public enum VarTypeType: Int {
} }
} }
public class VarDesc { public class PMVarDesc {
public let name: String public let name: String
public let persistable: Bool public let persistable: Bool
public let type: VarTypeType public let type: VarTypeType
let tensorDesc: TensorDesc? let tensorDesc: TensorDesc?
init(protoVarDesc: PaddleMobile_Framework_Proto_VarDesc) { init(protoVarDesc: VarDesc) {
type = VarTypeType.init(rawValue: protoVarDesc.type.type.rawValue) ?? .ErrorType type = VarTypeType.init(rawValue: Int(protoVarDesc.type.type.rawValue)) ?? .ErrorType
name = protoVarDesc.name name = protoVarDesc.name
persistable = protoVarDesc.persistable persistable = protoVarDesc.persistable
switch type { switch type {
...@@ -71,14 +71,14 @@ public class VarDesc { ...@@ -71,14 +71,14 @@ public class VarDesc {
case .LodTensor: case .LodTensor:
tensorDesc = TensorDesc.init(protoTensorDesc: protoVarDesc.type.lodTensor.tensor) tensorDesc = TensorDesc.init(protoTensorDesc: protoVarDesc.type.lodTensor.tensor)
case .StepLodTensorArray: case .StepLodTensorArray:
tensorDesc = TensorDesc.init(protoTensorDesc: protoVarDesc.type.tensorArray.tensor); tensorDesc = TensorDesc.init(protoTensorDesc: protoVarDesc.type.tensorArray_p.tensor);
default: default:
tensorDesc = .none tensorDesc = .none
} }
} }
} }
extension VarDesc: CustomStringConvertible, CustomDebugStringConvertible { extension PMVarDesc: CustomStringConvertible, CustomDebugStringConvertible {
public var description: String { public var description: String {
var str = "" var str = ""
str += "var name \(name): \n" str += "var name \(name): \n"
......
...@@ -16,14 +16,14 @@ import Foundation ...@@ -16,14 +16,14 @@ import Foundation
@objc public class Program: NSObject { @objc public class Program: NSObject {
public let paramPath: String public let paramPath: String
public let programDesc: ProgramDesc public let programDesc: PMProgramDesc
public let scope: Scope public let scope: Scope
init(inProgramDesc: ProgramDesc, inParamPath: String, inScope: Scope) { init(inProgramDesc: PMProgramDesc, inParamPath: String, inScope: Scope) {
programDesc = inProgramDesc programDesc = inProgramDesc
paramPath = inParamPath paramPath = inParamPath
scope = inScope scope = inScope
} }
init(inProgramDesc: ProgramDesc, inScope: Scope) { init(inProgramDesc: PMProgramDesc, inScope: Scope) {
programDesc = inProgramDesc programDesc = inProgramDesc
scope = inScope scope = inScope
paramPath = "" paramPath = ""
......
...@@ -25,8 +25,8 @@ class Node { ...@@ -25,8 +25,8 @@ class Node {
var inputs: [Node] = [] var inputs: [Node] = []
var outputs: [Node] = [] var outputs: [Node] = []
var type: String var type: String
var opDesc: OpDesc? var opDesc: PMOpDesc?
init(inOpDesc: OpDesc) { init(inOpDesc: PMOpDesc) {
type = inOpDesc.type type = inOpDesc.type
opDesc = inOpDesc opDesc = inOpDesc
} }
...@@ -192,7 +192,7 @@ class ProgramOptimize<P: PrecisionType> { ...@@ -192,7 +192,7 @@ class ProgramOptimize<P: PrecisionType> {
ElementwiseAddPreluOp<P>.self ElementwiseAddPreluOp<P>.self
] ]
func optimize(originProgramDesc: ProgramDesc) -> ProgramDesc { func optimize(originProgramDesc: PMProgramDesc) -> PMProgramDesc {
guard originProgramDesc.blocks.count == 1 else { guard originProgramDesc.blocks.count == 1 else {
fatalError(" not support yet") fatalError(" not support yet")
...@@ -287,13 +287,13 @@ class ProgramOptimize<P: PrecisionType> { ...@@ -287,13 +287,13 @@ class ProgramOptimize<P: PrecisionType> {
} }
} }
var ops: [OpDesc] = [] var ops: [PMOpDesc] = []
for node in nodes { for node in nodes {
ops.append(node.opDesc!) ops.append(node.opDesc!)
} }
var newProgramDesc = ProgramDesc.init() let newProgramDesc = PMProgramDesc.init()
let newBlock = BlockDesc.init(inVars: block.vars, inOps: ops) let newBlock = PMBlockDesc.init(inVars: block.vars, inOps: ops)
newProgramDesc.blocks.append(newBlock) newProgramDesc.blocks.append(newBlock)
return newProgramDesc return newProgramDesc
} }
......
...@@ -52,9 +52,19 @@ class TensorDesc { ...@@ -52,9 +52,19 @@ class TensorDesc {
} }
} }
init(protoTensorDesc: PaddleMobile_Framework_Proto_VarType.TensorDesc) { init(protoTensorDesc: VarType_TensorDesc) {
dims = protoTensorDesc.dims.map{ Int($0) > 0 ? Int($0) : abs(Int($0)) } // dims = protoTensorDesc.dimsArray.map{ Int64($0)! > 0 ? Int64($0) : abs(Int64($0)) }
dataType = VarTypeType.init(rawValue: protoTensorDesc.dataType.rawValue) ?? .ErrorType
var dimsArray = [Int]()
let dimsCount = protoTensorDesc.dimsArray.count
for i in 0..<dimsCount {
let dim = Int(protoTensorDesc.dimsArray.value(at: i)) > 0 ?Int(protoTensorDesc.dimsArray.value(at: i)) :abs(Int(protoTensorDesc.dimsArray.value(at: i)))
dimsArray.append(dim)
}
dims = dimsArray
dataType = VarTypeType.init(rawValue: Int(protoTensorDesc.dataType.rawValue)) ?? .ErrorType
} }
} }
...@@ -21,7 +21,6 @@ ...@@ -21,7 +21,6 @@
//limitations under the License. //limitations under the License.
import Foundation import Foundation
import SwiftProtobuf
// If the compiler emits an error on this type, it is because this file // If the compiler emits an error on this type, it is because this file
// was generated by a version of the `protoc` Swift plug-in that is // was generated by a version of the `protoc` Swift plug-in that is
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#pragma once #pragma once
#import <UIKit/UIKit.h> #import <UIKit/UIKit.h>
#import <paddle_mobile/Framework.pbobjc.h>
//! Project version number for paddle_mobile. //! Project version number for paddle_mobile.
//FOUNDATION_EXPORT double paddle_mobileVersionNumber; //FOUNDATION_EXPORT double paddle_mobileVersionNumber;
......
...@@ -151,6 +151,30 @@ void format_dwconv_filter(framework::Tensor *filter_tensor, float *scale_ptr) { ...@@ -151,6 +151,30 @@ void format_dwconv_filter(framework::Tensor *filter_tensor, float *scale_ptr) {
filter_tensor->reset_data_ptr(new_data); filter_tensor->reset_data_ptr(new_data);
} }
void format_DWDconv_filter(framework::Tensor *filter_tensor, float *scale_ptr,
int stride) {
auto dims = filter_tensor->dims();
auto num = dims[0], height = dims[2], width = dims[3];
auto data_ptr = filter_tensor->data<float>();
size_t memory_size = num * height * width * sizeof(float);
auto new_data = (float *)fpga_malloc(memory_size); // NOLINT
fpga_copy(new_data, data_ptr, memory_size);
int hw = height * width;
deconv_filter::deconv_NC_convert(&new_data, num, 1, hw);
num = dims[1];
int channel = dims[0];
deconv_filter::DWDconv_format_filter(&new_data, num, channel, height, width,
scale_ptr, stride);
// framework::DDim dims_new =
// framework::make_ddim({num, 1, height, width});
// filter_tensor->Resize(dims_new);
filter_tensor->reset_data_ptr(new_data);
}
void format_fc_filter(framework::Tensor *filter_tensor, float max_value) { void format_fc_filter(framework::Tensor *filter_tensor, float max_value) {
filter_tensor->scale[0] = float(max_value / 127.0); // NOLINT filter_tensor->scale[0] = float(max_value / 127.0); // NOLINT
filter_tensor->scale[1] = float(127.0 / max_value); // NOLINT filter_tensor->scale[1] = float(127.0 / max_value); // NOLINT
...@@ -243,6 +267,17 @@ void format_dwconv_data(framework::Tensor *filter_tensor, ...@@ -243,6 +267,17 @@ void format_dwconv_data(framework::Tensor *filter_tensor,
format_bias_array(bias_ptr, channel); format_bias_array(bias_ptr, channel);
format_fp16_ofm(ofm_tensor); format_fp16_ofm(ofm_tensor);
} }
void format_DWDeconv_data(framework::Tensor *filter_tensor,
framework::Tensor *ofm_tensor, float **bs_ptr,
int group, int sub_conv_n) {
int channel = ofm_tensor->dims()[1];
// dw-deconv
format_DWDconv_filter(
filter_tensor,
(reinterpret_cast<float *>(*bs_ptr) + sub_conv_n * channel), sub_conv_n);
format_bias_array(bs_ptr, channel);
format_fp16_ofm(ofm_tensor);
}
void expand_conv_arg(ConvArgs *arg) { void expand_conv_arg(ConvArgs *arg) {
ConvArgs args = *arg; ConvArgs args = *arg;
...@@ -311,9 +346,9 @@ void expand_conv_arg(ConvArgs *arg) { ...@@ -311,9 +346,9 @@ void expand_conv_arg(ConvArgs *arg) {
auto filter_pad_width_mul_channel = auto filter_pad_width_mul_channel =
args.image.pad_width * args.image.channels; args.image.pad_width * args.image.channels;
auto image_amount_per_row_multi_win_first = auto image_amount_per_row_multi_win_first =
image_amount_per_row * (4 * args.kernel.stride_h - args.image.pad_height); image_amount_per_row * (2 * args.kernel.stride_h - args.image.pad_height);
auto image_amount_per_row_multi_win = auto image_amount_per_row_multi_win =
image_amount_per_row * (4 * args.kernel.stride_h); image_amount_per_row * (2 * args.kernel.stride_h);
auto image_block_num = block_num; auto image_block_num = block_num;
auto image_block_len = auto image_block_len =
...@@ -340,7 +375,8 @@ void expand_conv_arg(ConvArgs *arg) { ...@@ -340,7 +375,8 @@ void expand_conv_arg(ConvArgs *arg) {
(512 / (align_to_x(args.filter_num, 4) / 4 * 2) > 2) (512 / (align_to_x(args.filter_num, 4) / 4 * 2) > 2)
? (512 / (align_to_x(args.filter_num, 4) / 4 * 2) - 2) ? (512 / (align_to_x(args.filter_num, 4) / 4 * 2) - 2)
: 0; : 0;
auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS; // auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS;
auto cmd = 0UL | USE_BIAS;
auto deconv_param = ((args.deconv_tx_param.deconv_en) << 24) | auto deconv_param = ((args.deconv_tx_param.deconv_en) << 24) |
((args.deconv_tx_param.sub_conv_num) << 16) | ((args.deconv_tx_param.sub_conv_num) << 16) |
...@@ -378,7 +414,8 @@ void expand_conv_arg(ConvArgs *arg) { ...@@ -378,7 +414,8 @@ void expand_conv_arg(ConvArgs *arg) {
void expand_EW_arg(EWAddArgs *arg) { void expand_EW_arg(EWAddArgs *arg) {
EWAddArgs args = *arg; EWAddArgs args = *arg;
uint64_t cmd = args.relu_enabled ? USE_RELU : 0; // uint64_t cmd = args.relu_enabled ? USE_RELU : 0;
uint64_t cmd = 0;
uint64_t datalen = (uint64_t)args.image0.width * uint64_t datalen = (uint64_t)args.image0.width *
(uint64_t)args.image0.height * (uint64_t)args.image0.height *
(uint64_t)args.image0.channels; (uint64_t)args.image0.channels;
...@@ -406,8 +443,10 @@ void expand_EW_arg(EWAddArgs *arg) { ...@@ -406,8 +443,10 @@ void expand_EW_arg(EWAddArgs *arg) {
void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter, framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int group_num, int stride_h, ActivationType activation_enable,
int stride_w, int padding_h, int padding_w, float *bs_ptr) { int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float *bs_ptr) {
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto filter_ptr = filter->data<float>(); auto filter_ptr = filter->data<float>();
auto out_ptr = out->data<float>(); auto out_ptr = out->data<float>();
...@@ -453,7 +492,10 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, ...@@ -453,7 +492,10 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
filter->dims()[3])); filter->dims()[3]));
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
arg->conv_arg[i].relu_enabled = relu_enabled; // arg->conv_arg[i].relu_enabled = relu_enabled;
arg->conv_arg[i].output.activation.activation_type = activation_enable;
arg->conv_arg[i].output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
arg->conv_arg[i].group_num = (uint32_t)group_num; arg->conv_arg[i].group_num = (uint32_t)group_num;
arg->conv_arg[i].kernel.stride_h = (uint32_t)stride_h; arg->conv_arg[i].kernel.stride_h = (uint32_t)stride_h;
arg->conv_arg[i].kernel.stride_w = (uint32_t)stride_w; arg->conv_arg[i].kernel.stride_w = (uint32_t)stride_w;
...@@ -525,8 +567,9 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input, ...@@ -525,8 +567,9 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter, framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int group_num, int stride_h, ActivationType activation_enable,
int stride_w, int padding_h, int padding_w, int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float *bs_ptr) { float *bs_ptr) {
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto filter_ptr = filter->data<float>(); auto filter_ptr = filter->data<float>();
...@@ -652,7 +695,13 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, ...@@ -652,7 +695,13 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
} }
for (int j = 0; j < split_num; ++j) { for (int j = 0; j < split_num; ++j) {
arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled; // arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled;
arg->split_conv_args[i]->conv_arg[j].output.activation.activation_type =
activation_enable;
arg->split_conv_args[i]
->conv_arg[j]
.output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
arg->split_conv_args[i]->conv_arg[j].group_num = (uint32_t)group_num; arg->split_conv_args[i]->conv_arg[j].group_num = (uint32_t)group_num;
arg->split_conv_args[i]->conv_arg[j].kernel.width = arg->split_conv_args[i]->conv_arg[j].kernel.width =
...@@ -765,12 +814,17 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input, ...@@ -765,12 +814,17 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input, void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter, framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int stride_h, int stride_w, ActivationType activation_enable,
int padding_h, int padding_w, float *bias_ptr) { int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float *bias_ptr) {
auto filter_ptr = filter->data<float>(); auto filter_ptr = filter->data<float>();
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto output_ptr = out->mutable_data<float>(); auto output_ptr = out->mutable_data<float>();
arg->relu_enabled = relu_enabled; arg->sub_conv_num = 1;
// arg->relu_enabled = relu_enabled;
arg->output.activation.activation_type = activation_enable;
arg->output.activation.leaky_relu_negative_slope = leaky_relu_negative_slope;
arg->bias_address = bias_ptr; arg->bias_address = bias_ptr;
arg->filter_address = filter_ptr; arg->filter_address = filter_ptr;
arg->kernel.height = (uint32_t)filter->dims()[2]; arg->kernel.height = (uint32_t)filter->dims()[2];
...@@ -788,5 +842,114 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input, ...@@ -788,5 +842,114 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
arg->output.scale_address = out->scale; arg->output.scale_address = out->scale;
} // end dwconv arg fill } // end dwconv arg fill
void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter,
ActivationType activation_enable,
int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float *bias_ptr) {
auto filter_ptr = filter->data<float>();
auto input_ptr = input->data<float>();
auto output_ptr = out->mutable_data<float>();
auto deleter = [](void *p) { fpga_free(p); };
arg->group_num = (uint32_t)filter->dims()[0];
arg->sub_conv_num = (uint32_t)stride_w;
arg->filter_num = (uint32_t)filter->dims()[0];
int sub_conv_num = stride_w;
int sub_pad =
deconv_filter::deconv_calc_sub_pad((int)filter->dims()[3], // NOLINT
padding_w, stride_w);
auto sub_filter_width = (uint32_t)deconv_filter::deconv_get_sub_filter_axis(
(int)filter->dims()[3], stride_w); // NOLINT
auto sub_output_width = (uint32_t)deconv_filter::deconv_get_sub_out_axis(
(int)input->dims()[3], sub_pad, sub_filter_width); // NOLINT
auto sub_output_height = (uint32_t)deconv_filter::deconv_get_sub_out_axis(
(int)input->dims()[2], sub_pad, sub_filter_width); // NOLINT
arg->sub_output_width = (uint32_t)sub_output_width;
arg->sub_output_height = (uint32_t)sub_output_height;
arg->omit_size = (uint32_t)deconv_filter::deconv_get_omit(
stride_w, (int)filter->dims()[3], padding_w); // NOLINT
auto sub_channels = (int)input->dims()[1]; // NOLINT
uint32_t omit_size = arg->omit_size;
int real_out_width = sub_output_width * sub_conv_num - 2 * omit_size;
int real_out_height = sub_output_height * sub_conv_num - 2 * omit_size;
int sub_filter_num = sub_conv_num * (arg->filter_num);
framework::DDim dims_out_new = framework::make_ddim(
{1, arg->filter_num, real_out_height, real_out_width});
fpga::format_fp16_ofm(out, dims_out_new);
auto out_ptr = out->data<float>();
/*====For Addition
arg->output.address =
(half *)out_ptr + // NOLINT
omit_size * sizeof(half) *
(align_to_x(real_out_width * arg->filter_num, IMAGE_ALIGNMENT));
*/
arg->output.address = out_ptr;
arg->output.scale_address = out->scale;
int filter_offset = sub_filter_width * sub_filter_width *
align_to_x(sub_channels, FILTER_ELEMENT_ALIGNMENT) *
arg->sub_conv_num;
for (int i = 0; i < sub_conv_num; ++i) {
arg->dw_conv_args.push_back(std::make_shared<DWconvArgs>());
arg->dw_conv_args[i]->sub_conv_num = sub_conv_num;
// arg->dw_conv_args[i]->relu_enabled = relu_enabled;
arg->dw_conv_args[i]->output.activation.activation_type = activation_enable;
arg->dw_conv_args[i]->output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
arg->dw_conv_args[i]->bias_address = bias_ptr;
arg->dw_conv_args[i]->filter_address =
fpga_malloc(filter_offset * sizeof(int16_t));
memcpy(arg->dw_conv_args[i]->filter_address,
(reinterpret_cast<half *>(filter_ptr) + i * filter_offset),
filter_offset * sizeof(int16_t));
arg->vector_dw_conv_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(arg->dw_conv_args[i]->filter_address),
deleter));
arg->dw_conv_args[i]->kernel.height = (uint32_t)sub_filter_width;
arg->dw_conv_args[i]->kernel.width = (uint32_t)sub_filter_width;
arg->dw_conv_args[i]->kernel.stride_h = (uint32_t)1;
arg->dw_conv_args[i]->kernel.stride_w = (uint32_t)1;
arg->dw_conv_args[i]->image.address = input_ptr;
arg->dw_conv_args[i]->image.channels = (uint32_t)input->dims()[1];
arg->dw_conv_args[i]->image.height = (uint32_t)input->dims()[2];
arg->dw_conv_args[i]->image.width = (uint32_t)input->dims()[3];
arg->dw_conv_args[i]->image.pad_height = sub_pad;
arg->dw_conv_args[i]->image.pad_width = sub_pad;
arg->dw_conv_args[i]->image.scale_address = input->scale;
arg->dw_conv_args[i]->output.address =
fpga_malloc(sub_output_height *
align_to_x(sub_output_width * sub_channels * sub_conv_num,
IMAGE_ALIGNMENT) *
sizeof(int16_t));
arg->dw_conv_args[i]->output.scale_address =
static_cast<float *>(fpga_malloc(2 * sizeof(float)));
arg->vector_dw_conv_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(arg->dw_conv_args[i]->output.address),
deleter));
arg->vector_dw_conv_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(arg->dw_conv_args[i]->output.scale_address),
deleter));
}
// arg->output.scale_address = out->scale;
} // end dwconv arg fill
} // namespace fpga } // namespace fpga
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -47,16 +47,28 @@ void format_concat_output(framework::Tensor* out, int height, int width, ...@@ -47,16 +47,28 @@ void format_concat_output(framework::Tensor* out, int height, int width,
void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input, void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter, framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int group_num, int stride_h, ActivationType activation_enable,
int stride_w, int padding_h, int padding_w, float* bs_ptr); int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float* bs_ptr);
void fill_deconv_arg(struct DeconvArgs* arg, framework::Tensor* input, void fill_deconv_arg(struct DeconvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter, framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int group_num, int stride_h, ActivationType activation_enable,
int stride_w, int padding_h, int padding_w, float* bs_ptr); int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float* bs_ptr);
void fill_dwconv_arg(struct DWconvArgs* arg, framework::Tensor* input, void fill_dwconv_arg(struct DWconvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter, framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int stride_h, int stride_w, ActivationType activation_enable,
int padding_h, int padding_w, float* bias_ptr); int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float* bias_ptr);
void fill_DWDeconv_arg(struct DWDeconvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter,
ActivationType activation_enable,
int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float* bs_ptr);
void format_deconv_filter(framework::Tensor* filter_tensor, float max_value, void format_deconv_filter(framework::Tensor* filter_tensor, float max_value,
int group_num, int stride); int group_num, int stride);
...@@ -69,6 +81,10 @@ void format_deconv_data(framework::Tensor* filter_tensor, ...@@ -69,6 +81,10 @@ void format_deconv_data(framework::Tensor* filter_tensor,
void format_dwconv_data(framework::Tensor* filter_tensor, void format_dwconv_data(framework::Tensor* filter_tensor,
framework::Tensor* ofm_tensor, float* scale_ptr, framework::Tensor* ofm_tensor, float* scale_ptr,
float** bias_ptr); float** bias_ptr);
void format_DWDeconv_data(framework::Tensor* filter_tensor,
framework::Tensor* ofm_tensor, float** bs_ptr,
int group, int sub_conv_n);
template <typename Dtype> template <typename Dtype>
void savefile(std::string filename, void* buffer, int dataSize, Dtype tmp) { void savefile(std::string filename, void* buffer, int dataSize, Dtype tmp) {
float data; float data;
......
...@@ -19,16 +19,6 @@ limitations under the License. */ ...@@ -19,16 +19,6 @@ limitations under the License. */
#include "fpga/V1/filter.h" #include "fpga/V1/filter.h"
// #include "filter.h" // #include "filter.h"
#include "fpga/V1/api.h" #include "fpga/V1/api.h"
// #include "fpga_api.h"
// just for test
//#include <string>
//#include "deconv.h"
//#include "deconv_api.h"
// using namespace std;
// using namespace paddle_mobile::fpga;
// using namespace baidu::fpga::deconv::api;
// namespace api = baidu::fpga::deconv::api;
namespace paddle_mobile { namespace paddle_mobile {
namespace fpga { namespace fpga {
...@@ -42,7 +32,8 @@ void deconv_inverse_filter(float** data_in, int num, int channel, int width, ...@@ -42,7 +32,8 @@ void deconv_inverse_filter(float** data_in, int num, int channel, int width,
float* tmp = *data_in; float* tmp = *data_in;
int data_size = num * channel * width * height; int data_size = num * channel * width * height;
int hw_len = height * width; int hw_len = height * width;
auto tmp_data = (float*)fpga_malloc(data_size * sizeof(float)); auto tmp_data =
reinterpret_cast<float*>(fpga_malloc(data_size * sizeof(float)));
for (int i = 0; i < num; ++i) { for (int i = 0; i < num; ++i) {
for (int j = 0; j < channel; ++j) { for (int j = 0; j < channel; ++j) {
for (int k = 0; k < hw_len; ++k) { for (int k = 0; k < hw_len; ++k) {
...@@ -97,9 +88,10 @@ int deconv_get_omit(int stride, int filter_width, int pad) { ...@@ -97,9 +88,10 @@ int deconv_get_omit(int stride, int filter_width, int pad) {
return (stride - idx); return (stride - idx);
} }
void deconv_get_sub_filter(char** data_in, int height, int width, template <typename T>
int sub_conv_n, int kernel_num, int channel) { void deconv_get_sub_filter(T** data_in, int height, int width, int sub_conv_n,
char* ptr_tmp = *data_in; int kernel_num, int channel) {
T* ptr_tmp = *data_in;
int sub_num = kernel_num * sub_conv_n; int sub_num = kernel_num * sub_conv_n;
int sub_h = height / sub_conv_n; int sub_h = height / sub_conv_n;
int sub_w = width / sub_conv_n; int sub_w = width / sub_conv_n;
...@@ -107,7 +99,8 @@ void deconv_get_sub_filter(char** data_in, int height, int width, ...@@ -107,7 +99,8 @@ void deconv_get_sub_filter(char** data_in, int height, int width,
int sub_filter_size = int sub_filter_size =
kernel_num * sub_h * sub_w * channel * sub_conv_n * sub_conv_n; kernel_num * sub_h * sub_w * channel * sub_conv_n * sub_conv_n;
char* ptr_sub_filter = (char*)fpga_malloc(sub_filter_size * sizeof(char)); T* ptr_sub_filter =
reinterpret_cast<T*>(fpga_malloc(sub_filter_size * sizeof(T)));
for (int idx = 0; idx < sub_conv_n; ++idx) { for (int idx = 0; idx < sub_conv_n; ++idx) {
for (int nn = 0; nn < sub_num; ++nn) { for (int nn = 0; nn < sub_num; ++nn) {
int ni = nn % kernel_num; int ni = nn % kernel_num;
...@@ -124,7 +117,7 @@ void deconv_get_sub_filter(char** data_in, int height, int width, ...@@ -124,7 +117,7 @@ void deconv_get_sub_filter(char** data_in, int height, int width,
fpga_copy( fpga_copy(
ptr_sub_filter + idx * sub_h * sub_w * channel * sub_num + sidx, ptr_sub_filter + idx * sub_h * sub_w * channel * sub_num + sidx,
(*data_in) + kidx, channel * sizeof(char)); (*data_in) + kidx, channel * sizeof(T));
// for (int cc =0; cc < channel; ++cc) { // for (int cc =0; cc < channel; ++cc) {
// ptr_sub_filter[idx*sub_h*sub_w*channel*sub_num + sidx + cc] = // ptr_sub_filter[idx*sub_h*sub_w*channel*sub_num + sidx + cc] =
// (*data_in)[kidx + cc]; // (*data_in)[kidx + cc];
...@@ -140,7 +133,7 @@ void deconv_get_sub_filter(char** data_in, int height, int width, ...@@ -140,7 +133,7 @@ void deconv_get_sub_filter(char** data_in, int height, int width,
void deconv_NC_convert(float** filter_in, int kernel_num, int channels, void deconv_NC_convert(float** filter_in, int kernel_num, int channels,
int hw) { int hw) {
float* tmp = *filter_in; float* tmp = *filter_in;
float* ptr_filter = (float*)(paddle_mobile::fpga::fpga_malloc( float* ptr_filter = reinterpret_cast<float*>(paddle_mobile::fpga::fpga_malloc(
hw * kernel_num * channels * sizeof(float))); hw * kernel_num * channels * sizeof(float)));
for (int c = 0; c < channels; ++c) { for (int c = 0; c < channels; ++c) {
...@@ -188,7 +181,8 @@ void deconv_format_filter(float** data_in, int num, int channel, int height, ...@@ -188,7 +181,8 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
result2); result2);
}*/ }*/
deconv_get_sub_filter(quantize_data, height, width, stride, num, channel); deconv_get_sub_filter<char>(quantize_data, height, width, stride, num,
channel);
/*{ /*{
char result2 = (char)0; char result2 = (char)0;
string filename = "sub_filter_filter_data"; string filename = "sub_filter_filter_data";
...@@ -212,10 +206,12 @@ void deconv_format_filter(float** data_in, int num, int channel, int height, ...@@ -212,10 +206,12 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
((residual == 0) ? div_num : (div_num - 1)) + ((residual == 0) ? div_num : (div_num - 1)) +
align_to_x(residual, FILTER_NUM_ALIGNMENT); align_to_x(residual, FILTER_NUM_ALIGNMENT);
char** ptr_ptr_data = (char**)fpga_malloc(sub_conv_n * sizeof(char*)); char** ptr_ptr_data =
reinterpret_cast<char**>(fpga_malloc(sub_conv_n * sizeof(char*)));
int origin_offset = sub_chw * sub_num; int origin_offset = sub_chw * sub_num;
for (int i = 0; i < sub_conv_n; ++i) { for (int i = 0; i < sub_conv_n; ++i) {
(ptr_ptr_data)[i] = (char*)fpga_malloc(origin_offset * sizeof(char)); (ptr_ptr_data)[i] =
reinterpret_cast<char*>(fpga_malloc(origin_offset * sizeof(char)));
fpga_copy((ptr_ptr_data)[i], (*quantize_data) + origin_offset * i, fpga_copy((ptr_ptr_data)[i], (*quantize_data) + origin_offset * i,
origin_offset * sizeof(char)); origin_offset * sizeof(char));
...@@ -233,8 +229,8 @@ void deconv_format_filter(float** data_in, int num, int channel, int height, ...@@ -233,8 +229,8 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
int align_offset = int align_offset =
align_to_x(sub_chw, FILTER_ELEMENT_ALIGNMENT) * num_after_alignment; align_to_x(sub_chw, FILTER_ELEMENT_ALIGNMENT) * num_after_alignment;
char* ptr_space = (char*)fpga_malloc(sub_conv_n * align_offset * char* ptr_space = reinterpret_cast<char*>(fpga_malloc(
sizeof(char)); // continuous space sub_conv_n * align_offset * sizeof(char))); // continuous space
for (int i = 0; i < sub_conv_n; ++i) { for (int i = 0; i < sub_conv_n; ++i) {
char* ptr_tmp = (ptr_ptr_data)[i]; char* ptr_tmp = (ptr_ptr_data)[i];
...@@ -251,7 +247,7 @@ void deconv_format_filter(float** data_in, int num, int channel, int height, ...@@ -251,7 +247,7 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
fpga_copy(ptr_space + i * align_offset, ptr_tmp, align_offset); fpga_copy(ptr_space + i * align_offset, ptr_tmp, align_offset);
fpga_free(ptr_tmp); fpga_free(ptr_tmp);
} }
*data_in = (float*)ptr_space; *data_in = reinterpret_cast<float*>(ptr_space);
/* { /* {
char result2 = (char)0; char result2 = (char)0;
...@@ -262,6 +258,22 @@ void deconv_format_filter(float** data_in, int num, int channel, int height, ...@@ -262,6 +258,22 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
fpga_flush(ptr_space, sub_conv_n * align_offset * sizeof(char)); fpga_flush(ptr_space, sub_conv_n * align_offset * sizeof(char));
} }
void DWDconv_format_filter(float** data_in, int num, int channel, int height,
int width, float* scale_ptr, int stride) {
deconv_inverse_filter(data_in, num, channel, width, height);
filter::quantize_to_fp16(data_in, channel, height, width, scale_ptr);
int16_t** quantize_data = (int16_t**)data_in; // NOLINT
filter::convert_to_hwn(quantize_data, channel, height, width);
deconv_get_sub_filter<int16_t>(quantize_data, height, width, stride, num,
channel);
filter::align_element_n(quantize_data, channel, height, width);
fpga_flush(*quantize_data, align_to_x(channel, FILTER_ELEMENT_ALIGNMENT) *
height * width * sizeof(int16_t));
}
} // namespace deconv_filter } // namespace deconv_filter
} // namespace fpga } // namespace fpga
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -24,11 +24,15 @@ int deconv_calc_sub_pad(int filter_axis, int pad, int stride); ...@@ -24,11 +24,15 @@ int deconv_calc_sub_pad(int filter_axis, int pad, int stride);
int deconv_get_sub_filter_axis(int filter_axis, int stride); int deconv_get_sub_filter_axis(int filter_axis, int stride);
int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis); int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis);
int deconv_get_omit(int stride, int filter_width, int pad); int deconv_get_omit(int stride, int filter_width, int pad);
void deconv_get_sub_filter(char** data_in, int height, int width,
int sub_conv_n, int kernel_num, int channel); template <typename T>
void deconv_get_sub_filter(T** data_in, int height, int width, int sub_conv_n,
int kernel_num, int channel);
void deconv_format_filter(float** data_in, int num, int channel, int height, void deconv_format_filter(float** data_in, int num, int channel, int height,
int width, int group_num, float max, int stride); int width, int group_num, float max, int stride);
void deconv_NC_convert(float** filter_in, int kernel_num, int channels, int hw); void deconv_NC_convert(float** filter_in, int kernel_num, int channels, int hw);
void DWDconv_format_filter(float** data_in, int num, int channel, int height,
int width, float* scale_ptr, int stride);
} // namespace deconv_filter } // namespace deconv_filter
} // namespace fpga } // namespace fpga
......
...@@ -346,6 +346,16 @@ void format_dwconv_filter(float **data_in, int num, int height, int width, ...@@ -346,6 +346,16 @@ void format_dwconv_filter(float **data_in, int num, int height, int width,
fpga_flush(*quantize_data, align_to_x(num, FILTER_ELEMENT_ALIGNMENT) * fpga_flush(*quantize_data, align_to_x(num, FILTER_ELEMENT_ALIGNMENT) *
height * width * sizeof(int16_t)); height * width * sizeof(int16_t));
} }
void format_DWDeconv_filter(float **data_in, int num, int height, int width,
float *scale_ptr) {
quantize_to_fp16(data_in, num, height, width, scale_ptr);
int16_t **quantize_data = (int16_t **)data_in; // NOLINT
convert_to_hwn(quantize_data, num, height, width);
align_element_n(quantize_data, num, height, width);
fpga_flush(*quantize_data, align_to_x(num, FILTER_ELEMENT_ALIGNMENT) *
height * width * sizeof(int16_t));
}
} // namespace filter } // namespace filter
} // namespace fpga } // namespace fpga
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -18,7 +18,6 @@ limitations under the License. */ ...@@ -18,7 +18,6 @@ limitations under the License. */
#include "fpga/V1/image.h" #include "fpga/V1/image.h"
#include "fpga/common/config.h" #include "fpga/common/config.h"
#include "fpga/common/driver.h" #include "fpga/common/driver.h"
#ifdef COST_TIME_PRINT #ifdef COST_TIME_PRINT
#include <sys/time.h> #include <sys/time.h>
#include <time.h> #include <time.h>
...@@ -64,6 +63,7 @@ using namespace std; // NOLINT ...@@ -64,6 +63,7 @@ using namespace std; // NOLINT
#define REG_TIMER_COUNTER 0x070 #define REG_TIMER_COUNTER 0x070
#define REG_SCALE_PARAMETER 0x080 #define REG_SCALE_PARAMETER 0x080
#define REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR 0x090
#define REG_FLASH_CMD 0x200 #define REG_FLASH_CMD 0x200
#define REG_FLASH_DATA 0x208 #define REG_FLASH_DATA 0x208
...@@ -163,6 +163,7 @@ using namespace std; // NOLINT ...@@ -163,6 +163,7 @@ using namespace std; // NOLINT
#define REG_DWCONV_FILTER_BASE_ADDR 0xe08 #define REG_DWCONV_FILTER_BASE_ADDR 0xe08
#define REG_DWCONV_FILTER_SHAPE 0xe10 #define REG_DWCONV_FILTER_SHAPE 0xe10
#define REG_DWCONV_FILTER_N_ALIGN 0xe18 #define REG_DWCONV_FILTER_N_ALIGN 0xe18
#define REG_DWCONV_FILTER_SUBNUMBER 0xe20
#define REG_DWCONV_CMD 0xe00 #define REG_DWCONV_CMD 0xe00
int ComputeFpgaConv(const struct SplitConvArgs &args) { int ComputeFpgaConv(const struct SplitConvArgs &args) {
...@@ -189,8 +190,8 @@ int ComputeFpgaConv(const struct SplitConvArgs &args) { ...@@ -189,8 +190,8 @@ int ComputeFpgaConv(const struct SplitConvArgs &args) {
int ComputeBasicConv(const struct ConvArgs &args) { int ComputeBasicConv(const struct ConvArgs &args) {
#ifdef FPGA_PRINT_MODE #ifdef FPGA_PRINT_MODE
DLOG << "======Compute Basic Conv======"; DLOG << "======Compute Basic Conv======";
DLOG << " relu_enabled:" << args.relu_enabled // DLOG << " relu_enabled:" << args.relu_enabled
<< " sb_address:" << args.sb_address DLOG << " sb_address:" << args.sb_address
<< " filter_address:" << args.filter_address << " filter_address:" << args.filter_address
<< " filter_num:" << args.filter_num << " filter_num:" << args.filter_num
<< " group_num:" << args.group_num; << " group_num:" << args.group_num;
...@@ -212,6 +213,25 @@ int ComputeBasicConv(const struct ConvArgs &args) { ...@@ -212,6 +213,25 @@ int ComputeBasicConv(const struct ConvArgs &args) {
#ifdef PADDLE_MOBILE_ZU5 #ifdef PADDLE_MOBILE_ZU5
int ret = 0; int ret = 0;
uint64_t output_scale = 0; uint64_t output_scale = 0;
uint64_t reg_ActivationArgs = 0;
// active function:{none,leakeyrelu,sigmoid,tanh}
ActivationArgs active_args;
// active_args.activation_type = LEAKYRELU;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
DLOG << " activation_type:" << active_args.activation_type
<< " leaky_relu_negative_slope:"
<< active_args.leaky_relu_negative_slope;
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex); pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status) { if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status) {
ret = -EIO; ret = -EIO;
...@@ -219,6 +239,10 @@ int ComputeBasicConv(const struct ConvArgs &args) { ...@@ -219,6 +239,10 @@ int ComputeBasicConv(const struct ConvArgs &args) {
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret; return ret;
} }
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq( reg_writeq(
((uint64_t)args.image.height) | (((uint64_t)args.image.width) << 32), ((uint64_t)args.image.height) | (((uint64_t)args.image.width) << 32),
...@@ -278,6 +302,9 @@ int ComputeBasicConv(const struct ConvArgs &args) { ...@@ -278,6 +302,9 @@ int ComputeBasicConv(const struct ConvArgs &args) {
output_scale = (output_scale << 32) | (output_scale >> 32); output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
active_args.activation_type = NONE;
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret; return ret;
...@@ -314,6 +341,23 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { ...@@ -314,6 +341,23 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
uint64_t image_physical_address = 0; uint64_t image_physical_address = 0;
uint64_t output_physical_address = 0; uint64_t output_physical_address = 0;
uint64_t reg_ActivationArgs = 0;
// active function:{none,leakeyrelu,sigmoid,tanh}
ActivationArgs active_args;
// active_args.activation_type = LEAKYRELU;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
DLOG << " activation_type:" << active_args.activation_type
<< " leaky_relu_negative_slope:"
<< active_args.leaky_relu_negative_slope;
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
image_physical_address = vaddr_to_paddr_driver(args.image.address); image_physical_address = vaddr_to_paddr_driver(args.image.address);
output_physical_address = vaddr_to_paddr_driver(args.output.address); output_physical_address = vaddr_to_paddr_driver(args.output.address);
uint32_t output_height = (uint32_t)( uint32_t output_height = (uint32_t)(
...@@ -364,6 +408,9 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { ...@@ -364,6 +408,9 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
return ret; return ret;
} }
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(image_physical_address, REG_POOLING_IMAGE_BASE_ADDR); reg_writeq(image_physical_address, REG_POOLING_IMAGE_BASE_ADDR);
reg_writeq(output_physical_address, REG_POOLING_RESULT_BASE_ADDR); reg_writeq(output_physical_address, REG_POOLING_RESULT_BASE_ADDR);
...@@ -408,6 +455,10 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { ...@@ -408,6 +455,10 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32); output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
active_args.activation_type = NONE;
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret; return ret;
...@@ -418,8 +469,8 @@ int ComputeFpgaPool(const struct PoolingArgs &args) { ...@@ -418,8 +469,8 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
int ComputeFpgaEWAdd(const struct EWAddArgs &args) { int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
#ifdef FPGA_PRINT_MODE #ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeFpgaEWAdd==========="; DLOG << "=============ComputeFpgaEWAdd===========";
DLOG << " relu_enabled:" << args.relu_enabled // DLOG << " relu_enabled:" << args.relu_enabled
<< " const0:" << fp16_2_fp32(int16_t(args.const0)) DLOG << " const0:" << fp16_2_fp32(int16_t(args.const0))
<< " const1:" << fp16_2_fp32(int16_t(args.const1)); << " const1:" << fp16_2_fp32(int16_t(args.const1));
DLOG << " image0_address:" << args.image0.address DLOG << " image0_address:" << args.image0.address
<< " image0_scale_address:" << args.image0.scale_address << " image0_scale_address:" << args.image0.scale_address
...@@ -441,6 +492,19 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { ...@@ -441,6 +492,19 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
#ifdef PADDLE_MOBILE_ZU5 #ifdef PADDLE_MOBILE_ZU5
int ret = 0; int ret = 0;
uint64_t output_scale = 0; uint64_t output_scale = 0;
uint64_t reg_ActivationArgs = 0;
ActivationArgs active_args;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
DLOG << " activation_type:" << active_args.activation_type
<< " leaky_relu_negative_slope:"
<< active_args.leaky_relu_negative_slope;
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex); pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_EW]->status) { if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_EW]->status) {
ret = -EIO; ret = -EIO;
...@@ -449,6 +513,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { ...@@ -449,6 +513,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
return ret; return ret;
} }
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(args.driver.image0_address_phy, REG_EW_IMAGE0_BASE_ADDR); reg_writeq(args.driver.image0_address_phy, REG_EW_IMAGE0_BASE_ADDR);
reg_writeq(args.driver.image1_address_phy, REG_EW_IMAGE1_BASE_ADDR); reg_writeq(args.driver.image1_address_phy, REG_EW_IMAGE1_BASE_ADDR);
...@@ -468,6 +535,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) { ...@@ -468,6 +535,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32); output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
active_args.activation_type = NONE;
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret; return ret;
#endif #endif
...@@ -501,6 +571,17 @@ int PerformBypass(const struct BypassArgs &args) { ...@@ -501,6 +571,17 @@ int PerformBypass(const struct BypassArgs &args) {
uint8_t data_cell_in = 0; uint8_t data_cell_in = 0;
uint8_t data_cell_out = 0; uint8_t data_cell_out = 0;
int ret = 0; int ret = 0;
uint64_t reg_ActivationArgs = 0;
ActivationArgs active_args;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
datalen = (uint64_t)args.image.width * (uint64_t)args.image.height * datalen = (uint64_t)args.image.width * (uint64_t)args.image.height *
(uint64_t)args.image.channels; (uint64_t)args.image.channels;
datalen = align_to_x(datalen, 16); datalen = align_to_x(datalen, 16);
...@@ -559,7 +640,6 @@ int PerformBypass(const struct BypassArgs &args) { ...@@ -559,7 +640,6 @@ int PerformBypass(const struct BypassArgs &args) {
(data_cell_out != SIZE_FP16 && data_cell_out != SIZE_FP32)) { (data_cell_out != SIZE_FP16 && data_cell_out != SIZE_FP32)) {
return -EFAULT; return -EFAULT;
} }
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex); pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_BYPASS]->status) { if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_BYPASS]->status) {
ret = -EIO; ret = -EIO;
...@@ -567,7 +647,8 @@ int PerformBypass(const struct BypassArgs &args) { ...@@ -567,7 +647,8 @@ int PerformBypass(const struct BypassArgs &args) {
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret; return ret;
} }
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(input_address_phy, REG_CONVERT_SRC_ADDR); reg_writeq(input_address_phy, REG_CONVERT_SRC_ADDR);
reg_writeq(output_address_phy, REG_CONVERT_DST_ADDR); reg_writeq(output_address_phy, REG_CONVERT_DST_ADDR);
...@@ -585,12 +666,27 @@ int PerformBypass(const struct BypassArgs &args) { ...@@ -585,12 +666,27 @@ int PerformBypass(const struct BypassArgs &args) {
output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32); output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret; return ret;
#endif #endif
return 0; return 0;
} // PerformBypass } // PerformBypass
uint64_t FPGAVersion() {
#ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeFpgaBypass===========";
#endif
#ifdef PADDLE_MOBILE_ZU5
uint64_t fpga_ver = 0;
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
fpga_ver = reg_readq(REG_HARDWARE_STATUS);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return fpga_ver;
#endif
return 0;
} // FPGAVersion
int ComputeFPGAConcat(const struct ConcatArgs &args) { int ComputeFPGAConcat(const struct ConcatArgs &args) {
#ifdef FPGA_PRINT_MODE #ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeFpgaConcat==========="; DLOG << "=============ComputeFpgaConcat===========";
...@@ -655,6 +751,45 @@ void deconv_post_process(const struct DeconvArgs &args) { ...@@ -655,6 +751,45 @@ void deconv_post_process(const struct DeconvArgs &args) {
fpga_flush(args.output.address, fpga_flush(args.output.address,
num * align_deconv_row_len * deconv_h * sizeof(int16_t)); num * align_deconv_row_len * deconv_h * sizeof(int16_t));
} }
void DWDeconv_post_process(const struct DWDeconvArgs &args) {
int sub_conv_n = args.sub_conv_num;
int sub_height = args.sub_output_height;
int sub_width = args.sub_output_width;
int omit_size = args.omit_size;
int channel = args.filter_num;
int num = 1;
int origin_h = sub_height * sub_conv_n;
int origin_w = sub_width * sub_conv_n;
int align_origin_w = align_to_x(origin_w * channel, IMAGE_ALIGNMENT);
int deconv_h = origin_h - 2 * omit_size;
int deconv_w = origin_w - 2 * omit_size;
int deconv_row_len = deconv_w * channel;
int align_deconv_row_len = align_to_x(deconv_row_len, IMAGE_ALIGNMENT);
for (int idx = 0; idx < sub_conv_n; ++idx) {
paddle_mobile::fpga::fpga_invalidate(
args.dw_conv_args[idx]->output.address,
align_origin_w * origin_h * sizeof(int16_t));
}
int deconv_idx = 0;
for (int nn = 0; nn < num; ++nn) {
for (int hh = 0; hh < origin_h; ++hh) {
int hx = (hh % sub_conv_n);
auto sub_t = (int16_t *)(args.dw_conv_args[sub_conv_n - hx - 1] // NOLINT
->output.address);
int hi = (hh / sub_conv_n);
if ((hh < omit_size) || (hh >= (origin_h - omit_size))) continue;
int sidx = (nn * origin_h * align_origin_w + hi * align_origin_w +
omit_size * channel);
fpga_copy((int16_t *)(args.output.address) + deconv_idx, // NOLINT
sub_t + sidx, sizeof(int16_t) * deconv_row_len); // NOLINT
deconv_idx += align_deconv_row_len;
}
}
fpga_flush(args.output.address,
num * align_deconv_row_len * deconv_h * sizeof(int16_t));
}
int ComputeFpgaDeconv(const struct DeconvArgs &args) { int ComputeFpgaDeconv(const struct DeconvArgs &args) {
#ifdef FPGA_PRINT_MODE #ifdef FPGA_PRINT_MODE
...@@ -755,7 +890,7 @@ int ComputeFPGASplit(const struct SplitArgs &args) { ...@@ -755,7 +890,7 @@ int ComputeFPGASplit(const struct SplitArgs &args) {
int ComputeDWConv(const struct DWconvArgs &args) { int ComputeDWConv(const struct DWconvArgs &args) {
#ifdef FPGA_PRINT_MODE #ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeDWConv==========="; DLOG << "=============ComputeDWConv===========";
DLOG << " mode:" << args.relu_enabled; // DLOG << " mode:" << args.relu_enabled;
DLOG << " image_address:" << args.image.address DLOG << " image_address:" << args.image.address
<< " image_scale_address:" << args.image.scale_address << " image_scale_address:" << args.image.scale_address
<< " image_channels:" << args.image.channels << " image_channels:" << args.image.channels
...@@ -778,7 +913,8 @@ int ComputeDWConv(const struct DWconvArgs &args) { ...@@ -778,7 +913,8 @@ int ComputeDWConv(const struct DWconvArgs &args) {
uint64_t output_scale = 0; uint64_t output_scale = 0;
uint64_t timer_cnt = 0; uint64_t timer_cnt = 0;
int ret = 0; int ret = 0;
uint64_t cmd = args.relu_enabled; // uint64_t cmd = args.relu_enabled;
uint64_t cmd = 0;
uint64_t image_physical_address = 0; uint64_t image_physical_address = 0;
uint64_t output_physical_address = 0; uint64_t output_physical_address = 0;
uint64_t filter_physical_address = 0; uint64_t filter_physical_address = 0;
...@@ -792,17 +928,21 @@ int ComputeDWConv(const struct DWconvArgs &args) { ...@@ -792,17 +928,21 @@ int ComputeDWConv(const struct DWconvArgs &args) {
align_to_x((uint64_t)args.image.channels, IMAGE_ALIGNMENT); align_to_x((uint64_t)args.image.channels, IMAGE_ALIGNMENT);
uint64_t filter_amount_per_row_align = uint64_t filter_amount_per_row_align =
filter_N_align * (uint64_t)args.kernel.width; filter_N_align * (uint64_t)args.kernel.width;
uint64_t filter_amount_align = filter_N_align * (uint64_t)args.kernel.width * uint64_t sub_filter_amount_align = filter_N_align *
(uint64_t)args.kernel.height; (uint64_t)args.kernel.width *
(uint64_t)args.kernel.height;
uint64_t filter_amount_align =
sub_filter_amount_align * (uint64_t)args.sub_conv_num;
uint32_t output_height = (uint32_t)( uint32_t output_height = (uint32_t)(
(args.image.height + args.image.pad_height * 2 - args.kernel.height) / (args.image.height + args.image.pad_height * 2 - args.kernel.height) /
args.kernel.stride_h + args.kernel.stride_h +
1); 1);
uint32_t output_width = (uint32_t)( uint32_t output_width = (uint32_t)(
(args.image.width + args.image.pad_width * 2 - args.kernel.width) / ((args.image.width + args.image.pad_width * 2 - args.kernel.width) /
args.kernel.stride_w + args.kernel.stride_w +
1); 1) *
args.sub_conv_num);
uint64_t image_amount_per_row = uint64_t image_amount_per_row =
align_to_x((uint64_t)args.image.width * (uint64_t)args.image.channels, align_to_x((uint64_t)args.image.width * (uint64_t)args.image.channels,
...@@ -845,12 +985,15 @@ int ComputeDWConv(const struct DWconvArgs &args) { ...@@ -845,12 +985,15 @@ int ComputeDWConv(const struct DWconvArgs &args) {
/*restart scale*/ /*restart scale*/
reg_writeq(output_scale, REG_SCALE_PARAMETER); reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(image_physical_address, REG_POOLING_IMAGE_BASE_ADDR); reg_writeq(image_physical_address, REG_POOLING_IMAGE_BASE_ADDR);
reg_writeq(output_physical_address, REG_POOLING_RESULT_BASE_ADDR); reg_writeq(output_physical_address, REG_POOLING_RESULT_BASE_ADDR);
reg_writeq((bias_physical_address << 32 | filter_physical_address), reg_writeq((bias_physical_address << 32 | filter_physical_address),
REG_DWCONV_FILTER_BASE_ADDR); REG_DWCONV_FILTER_BASE_ADDR);
reg_writeq(filter_amount_per_row_align | (filter_amount_align << 32), reg_writeq(filter_amount_per_row_align | (filter_amount_align << 32),
REG_DWCONV_FILTER_SHAPE); REG_DWCONV_FILTER_SHAPE);
reg_writeq(sub_filter_amount_align | (((uint64_t)args.sub_conv_num) << 32),
REG_DWCONV_FILTER_SUBNUMBER);
reg_writeq(filter_N_align, REG_DWCONV_FILTER_N_ALIGN); reg_writeq(filter_N_align, REG_DWCONV_FILTER_N_ALIGN);
reg_writeq( reg_writeq(
...@@ -904,10 +1047,88 @@ int ComputeDWConv(const struct DWconvArgs &args) { ...@@ -904,10 +1047,88 @@ int ComputeDWConv(const struct DWconvArgs &args) {
output_scale = reg_readq(REG_SCALE_PARAMETER); output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32); output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2); fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
DLOG << "output_scale:" << output_scale;
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex); pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret; return ret;
#endif #endif
return 0; return 0;
} }
int ComputeDWDeconv(const struct DWDeconvArgs &args) {
#ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeFPGADeConv===========";
DLOG << " filter_num:" << args.filter_num
<< " group_num:" << args.group_num << "omit_size:" << args.omit_size
<< "sub_output_width: " << args.sub_output_width
<< "sub_output_height: " << args.sub_output_height
<< " sub_conv_num:" << args.sub_conv_num;
DLOG << "args.output.address: " << args.output.address
<< "args.output.scale_address: " << args.output.scale_address;
#endif
int sub_conv_num = args.sub_conv_num;
#ifdef COST_TIME_PRINT
timeval start, end;
long dif_sec, dif_usec; // NOLINT
#endif
for (int i = 0; i < sub_conv_num; i++) {
#ifdef COST_TIME_PRINT
gettimeofday(&start, NULL);
#endif
ComputeDWConv(*args.dw_conv_args[i]);
#ifdef COST_TIME_PRINT
gettimeofday(&end, NULL);
dif_sec = end.tv_sec - start.tv_sec;
dif_usec = end.tv_usec - start.tv_usec;
std::cout << "deconv basic_conv: " << i << " times: "
<< " cost time: " << (dif_sec * 1000000 + dif_usec) << "us"
<< std::endl;
#endif
}
if (sub_conv_num > 1) {
float max_scale = -1.0f;
#ifdef COST_TIME_PRINT
gettimeofday(&start, NULL);
#endif
for (int i = 0; i < sub_conv_num; i++) {
paddle_mobile::fpga::fpga_invalidate(
args.dw_conv_args[i]->output.scale_address, 2 * sizeof(float));
float ptr_scale = (args.dw_conv_args[i]->output.scale_address)[0];
if (ptr_scale > max_scale) {
args.output.scale_address[0] = ptr_scale;
args.output.scale_address[1] =
(args.dw_conv_args[i]->output.scale_address)[1];
}
}
#ifdef COST_TIME_PRINT
gettimeofday(&end, NULL);
dif_sec = end.tv_sec - start.tv_sec;
dif_usec = end.tv_usec - start.tv_usec;
std::cout << "deconv scale "
<< " cost time: " << (dif_sec * 1000000 + dif_usec) << "us"
<< std::endl;
#endif
}
#ifdef COST_TIME_PRINT
gettimeofday(&start, NULL);
#endif
DWDeconv_post_process(args);
#ifdef COST_TIME_PRINT
gettimeofday(&end, NULL);
dif_sec = end.tv_sec - start.tv_sec;
dif_usec = end.tv_usec - start.tv_usec;
std::cout << "deconv_post_process "
<< " cost time: " << (dif_sec * 1000000 + dif_usec) << "us"
<< std::endl;
#endif
return 0;
} // ComputeFpgaDeconv
} // namespace fpga } // namespace fpga
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -154,7 +154,6 @@ int memory_request(struct fpga_memory *memory, size_t size, uint64_t *addr) { ...@@ -154,7 +154,6 @@ int memory_request(struct fpga_memory *memory, size_t size, uint64_t *addr) {
unsigned int nr = (unsigned int)_nr; unsigned int nr = (unsigned int)_nr;
int ret = 0; int ret = 0;
uint64_t a_size = FPGA_PAGE_SIZE * nr; uint64_t a_size = FPGA_PAGE_SIZE * nr;
DLOG << a_size;
pthread_mutex_lock(&memory->mutex); pthread_mutex_lock(&memory->mutex);
...@@ -391,9 +390,6 @@ int fpga_invalidate_driver(void *address, size_t size) { ...@@ -391,9 +390,6 @@ int fpga_invalidate_driver(void *address, size_t size) {
void fpga_copy_driver(void *dest, const void *src, size_t num) { void fpga_copy_driver(void *dest, const void *src, size_t num) {
uint64_t i; uint64_t i;
DLOG << "dest:" << dest << " src:" << src << " size:" << num;
for (i = 0; i < num; i++) { for (i = 0; i < num; i++) {
*((int8_t *)dest + i) = *((int8_t *)src + i); // NOLINT *((int8_t *)dest + i) = *((int8_t *)src + i); // NOLINT
} }
......
...@@ -29,7 +29,7 @@ namespace driver { ...@@ -29,7 +29,7 @@ namespace driver {
#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d)) #define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
#define FPGA_REG_PHY_ADDR 0xa0000000 #define FPGA_REG_PHY_ADDR 0x80000000
#define FPGA_REG_SIZE 0x1000 #define FPGA_REG_SIZE 0x1000
#define FPGA_MEM_PHY_ADDR 0x40000000 #define FPGA_MEM_PHY_ADDR 0x40000000
#define FPGA_MEM_SIZE 0x80000000 #define FPGA_MEM_SIZE 0x80000000
......
...@@ -76,7 +76,7 @@ int32_t convertmantissa(int32_t i) { ...@@ -76,7 +76,7 @@ int32_t convertmantissa(int32_t i) {
} }
float fp16_2_fp32(int16_t fp16_num) { float fp16_2_fp32(int16_t fp16_num) {
int16_t se_fp16 = fp16_num >> 10; int16_t se_fp16 = (fp16_num >> 10) & 0x3f;
int16_t m_fp16 = fp16_num & 0x3ff; int16_t m_fp16 = fp16_num & 0x3ff;
int32_t e_fp32 = 0; int32_t e_fp32 = 0;
int16_t offset = 0; int16_t offset = 0;
...@@ -94,7 +94,7 @@ float fp16_2_fp32(int16_t fp16_num) { ...@@ -94,7 +94,7 @@ float fp16_2_fp32(int16_t fp16_num) {
e_fp32 = 0x80000000; e_fp32 = 0x80000000;
offset = 0; offset = 0;
} else if (se_fp16 < 63) { } else if (se_fp16 < 63) {
e_fp32 = 0x80000000 + (se_fp16 - 32) << 23; e_fp32 = 0x80000000 + ((se_fp16 - 32) << 23);
offset = 1024; offset = 1024;
} else { // se_fp16 == 63 } else { // se_fp16 == 63
e_fp32 = 0xC7800000; e_fp32 = 0xC7800000;
......
...@@ -45,6 +45,7 @@ enum ActivationType { ...@@ -45,6 +45,7 @@ enum ActivationType {
LEAKYRELU = 1, LEAKYRELU = 1,
SIGMOID = 2, SIGMOID = 2,
TANH = 3, TANH = 3,
SOFTMAX = 4,
}; };
struct ActivationArgs { struct ActivationArgs {
...@@ -132,7 +133,7 @@ struct DeconvTxParm { ...@@ -132,7 +133,7 @@ struct DeconvTxParm {
#endif #endif
struct ConvArgs { struct ConvArgs {
bool relu_enabled; // bool relu_enabled;
void* sb_address; // scale and bias void* sb_address; // scale and bias
void* filter_address; void* filter_address;
float* filter_scale_address; float* filter_scale_address;
...@@ -198,7 +199,7 @@ struct PoolingArgs { ...@@ -198,7 +199,7 @@ struct PoolingArgs {
}; };
struct EWAddArgs { struct EWAddArgs {
bool relu_enabled; // bool relu_enabled;
uint32_t const0; // output0 = const0 x input0 + const1 x input1; uint32_t const0; // output0 = const0 x input0 + const1 x input1;
uint32_t const1; uint32_t const1;
struct ImageInputArgs image0; struct ImageInputArgs image0;
...@@ -229,13 +230,27 @@ struct DeconvArgs { ...@@ -229,13 +230,27 @@ struct DeconvArgs {
std::vector<std::shared_ptr<SplitConvArgs>> split_conv_args; std::vector<std::shared_ptr<SplitConvArgs>> split_conv_args;
}; };
struct DWconvArgs { struct DWconvArgs {
bool relu_enabled; uint32_t sub_conv_num;
// bool relu_enabled;
void* bias_address; void* bias_address;
void* filter_address; void* filter_address;
struct KernelArgs kernel; struct KernelArgs kernel;
struct ImageInputArgs image; struct ImageInputArgs image;
struct ImageOutputArgs output; struct ImageOutputArgs output;
}; };
struct DWDeconvArgs {
uint32_t sub_conv_num;
uint32_t group_num;
uint32_t filter_num;
uint32_t omit_size;
uint32_t sub_output_width;
uint32_t sub_output_height;
struct ImageOutputArgs output;
std::vector<std::shared_ptr<DWconvArgs>> dw_conv_args;
std::vector<std::shared_ptr<char>> vector_dw_conv_space;
};
// static inline int align_to_x(int num, int x) { return (num + x - 1) / x * x; // static inline int align_to_x(int num, int x) { return (num + x - 1) / x * x;
// } // }
static inline uint32_t align_to_x(int64_t num, int64_t x) { static inline uint32_t align_to_x(int64_t num, int64_t x) {
......
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
namespace paddle_mobile { namespace paddle_mobile {
namespace fpga { namespace fpga {
uint64_t FPGAVersion();
int PerformBypass(const struct BypassArgs& args); int PerformBypass(const struct BypassArgs& args);
int ComputeBasicConv(const struct ConvArgs& args); int ComputeBasicConv(const struct ConvArgs& args);
int ComputeFpgaPool(const struct PoolingArgs& args); int ComputeFpgaPool(const struct PoolingArgs& args);
...@@ -28,5 +29,7 @@ int ComputeFPGAConcat(const struct ConcatArgs& args); ...@@ -28,5 +29,7 @@ int ComputeFPGAConcat(const struct ConcatArgs& args);
int ComputeFPGASplit(const struct SplitArgs& args); int ComputeFPGASplit(const struct SplitArgs& args);
int ComputeFpgaDeconv(const struct DeconvArgs& args); int ComputeFpgaDeconv(const struct DeconvArgs& args);
int ComputeDWConv(const struct DWconvArgs& args); int ComputeDWConv(const struct DWconvArgs& args);
int ComputeDWDeconv(const struct DWDeconvArgs& args);
} // namespace fpga } // namespace fpga
} // namespace paddle_mobile } // namespace paddle_mobile
...@@ -31,6 +31,10 @@ DEFINE_ACTIVATION_INFERSHAPE(Relu6); ...@@ -31,6 +31,10 @@ DEFINE_ACTIVATION_INFERSHAPE(Relu6);
#ifdef SIGMOID_OP #ifdef SIGMOID_OP
DEFINE_ACTIVATION_INFERSHAPE(Sigmoid); DEFINE_ACTIVATION_INFERSHAPE(Sigmoid);
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(sigmoid, ops::SigmoidOp);
#endif
#endif // SIGMOID_OP #endif // SIGMOID_OP
#ifdef TANH_OP #ifdef TANH_OP
......
...@@ -22,7 +22,10 @@ namespace operators { ...@@ -22,7 +22,10 @@ namespace operators {
template <> template <>
bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) { bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
bool relu_enabled = false; // bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
auto bias = param->Bias(); auto bias = param->Bias();
...@@ -61,10 +64,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) { ...@@ -61,10 +64,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
param->Groups(), param->Strides()[0], leaky_relu_negative_slope, param->Groups(),
param->Strides()[1], param->Paddings()[0], param->Strides()[0], param->Strides()[1],
param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
......
...@@ -23,7 +23,10 @@ namespace operators { ...@@ -23,7 +23,10 @@ namespace operators {
template <> template <>
bool ConvAddBNReluKernel<FPGA, float>::Init( bool ConvAddBNReluKernel<FPGA, float>::Init(
FusionConvAddBNReluParam<FPGA> *param) { FusionConvAddBNReluParam<FPGA> *param) {
bool relu_enabled = true; // bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
auto bias = param->Bias(); auto bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
...@@ -64,16 +67,16 @@ bool ConvAddBNReluKernel<FPGA, float>::Init( ...@@ -64,16 +67,16 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
if (groups == channel) { if (groups == channel) {
fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr); fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr);
fpga::DWconvArgs dwconv_arg = {0}; fpga::DWconvArgs dwconv_arg = {0};
fpga::fill_dwconv_arg(&dwconv_arg, input, out, filter, relu_enabled, fpga::fill_dwconv_arg(&dwconv_arg, input, out, filter, activation_enable,
strides[0], strides[1], paddings[0], paddings[1], leaky_relu_negative_slope, strides[0], strides[1],
new_bias_ptr); paddings[0], paddings[1], new_bias_ptr);
param->SetFpgaArgs(dwconv_arg); param->SetFpgaArgs(dwconv_arg);
} else { } else {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
param->Groups(), strides[0], strides[1], paddings[0], leaky_relu_negative_slope, param->Groups(), strides[0],
paddings[1], bs_ptr); strides[1], paddings[0], paddings[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
} }
return true; return true;
......
...@@ -21,7 +21,10 @@ namespace operators { ...@@ -21,7 +21,10 @@ namespace operators {
template <> template <>
bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) { bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
bool relu_enabled = false; // bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
...@@ -40,10 +43,10 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) { ...@@ -40,10 +43,10 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
param->Groups(), param->Strides()[0], leaky_relu_negative_slope, param->Groups(),
param->Strides()[1], param->Paddings()[0], param->Strides()[0], param->Strides()[1],
param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -21,7 +21,10 @@ namespace operators { ...@@ -21,7 +21,10 @@ namespace operators {
template <> template <>
bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) { bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
bool relu_enabled = true; // bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
...@@ -40,10 +43,10 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) { ...@@ -40,10 +43,10 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
param->Groups(), param->Strides()[0], leaky_relu_negative_slope, param->Groups(),
param->Strides()[1], param->Paddings()[0], param->Strides()[0], param->Strides()[1],
param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -22,7 +22,10 @@ namespace operators { ...@@ -22,7 +22,10 @@ namespace operators {
template <> template <>
bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) { bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
bool relu_enabled = false; // bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
auto filter = const_cast<Tensor *>(param->Filter()); auto filter = const_cast<Tensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
...@@ -53,10 +56,10 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) { ...@@ -53,10 +56,10 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
param->Groups(), param->Strides()[0], leaky_relu_negative_slope, param->Groups(),
param->Strides()[1], param->Paddings()[0], param->Strides()[0], param->Strides()[1],
param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -22,7 +22,10 @@ namespace operators { ...@@ -22,7 +22,10 @@ namespace operators {
template <> template <>
bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) { bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
bool relu_enabled = true; // bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
auto filter = const_cast<Tensor *>(param->Filter()); auto filter = const_cast<Tensor *>(param->Filter());
auto out = param->Output(); auto out = param->Output();
...@@ -53,10 +56,10 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) { ...@@ -53,10 +56,10 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups()); fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled, fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
param->Groups(), param->Strides()[0], leaky_relu_negative_slope, param->Groups(),
param->Strides()[1], param->Paddings()[0], param->Strides()[0], param->Strides()[1],
param->Paddings()[1], bs_ptr); param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -23,7 +23,10 @@ namespace operators { ...@@ -23,7 +23,10 @@ namespace operators {
template <> template <>
bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) { bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
bool relu_enabled = false; // bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
...@@ -49,13 +52,24 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) { ...@@ -49,13 +52,24 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
"filter width should be equal to filter height "); "filter width should be equal to filter height ");
PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
"filter axis should be the multiple of stride axis "); "filter axis should be the multiple of stride axis ");
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); if (param->Groups() == channel) {
fpga::DeconvArgs deconv_arg = {0}; fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled, sub_conv_n);
param->Groups(), param->Strides()[0], fpga::DWDeconvArgs DWDeconv_arg = {0};
param->Strides()[1], param->Paddings()[0], fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter,
param->Paddings()[1], bs_ptr); activation_enable, leaky_relu_negative_slope,
param->SetFpgaArgs(deconv_arg); param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg);
} else {
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, param->Groups(),
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(deconv_arg);
}
return true; return true;
} }
...@@ -63,7 +77,11 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) { ...@@ -63,7 +77,11 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
template <> template <>
void DeconvAddKernel<FPGA, float>::Compute( void DeconvAddKernel<FPGA, float>::Compute(
const FusionDeconvAddParam<FPGA> &param) { const FusionDeconvAddParam<FPGA> &param) {
fpga::ComputeFpgaDeconv(param.FpgaArgs()); if (param.Groups() == param.Output()->dims()[1]) {
fpga::ComputeDWDeconv(param.FpgaDWDconvArgs());
} else {
fpga::ComputeFpgaDeconv(param.FpgaArgs());
}
} }
} // namespace operators } // namespace operators
......
...@@ -24,7 +24,10 @@ namespace operators { ...@@ -24,7 +24,10 @@ namespace operators {
template <> template <>
bool DeconvAddReluKernel<FPGA, float>::Init( bool DeconvAddReluKernel<FPGA, float>::Init(
FusionDeconvAddReluParam<FPGA> *param) { FusionDeconvAddReluParam<FPGA> *param) {
bool relu_enabled = true; // bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input()); auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias(); const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>(); auto bias_ptr = bias->data<float>();
...@@ -50,20 +53,36 @@ bool DeconvAddReluKernel<FPGA, float>::Init( ...@@ -50,20 +53,36 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
"filter width should be equal to filter height "); "filter width should be equal to filter height ");
PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0), PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
"filter axis should be the multiple of stride axis "); "filter axis should be the multiple of stride axis ");
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n); if (param->Groups() == channel) {
fpga::DeconvArgs deconv_arg = {0}; fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled, sub_conv_n);
param->Groups(), param->Strides()[0], fpga::DWDeconvArgs DWDeconv_arg = {0};
param->Strides()[1], param->Paddings()[0], fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter,
param->Paddings()[1], bs_ptr); activation_enable, leaky_relu_negative_slope,
param->SetFpgaArgs(deconv_arg); param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg);
} else {
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, param->Groups(),
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(deconv_arg);
}
return true; return true;
} }
template <> template <>
void DeconvAddReluKernel<FPGA, float>::Compute( void DeconvAddReluKernel<FPGA, float>::Compute(
const FusionDeconvAddReluParam<FPGA> &param) { const FusionDeconvAddReluParam<FPGA> &param) {
fpga::ComputeFpgaDeconv(param.FpgaArgs()); // fpga::ComputeFpgaDeconv(param.FpgaArgs());
if (param.Groups() == param.Output()->dims()[1]) {
fpga::ComputeDWDeconv(param.FpgaDWDconvArgs());
} else {
fpga::ComputeFpgaDeconv(param.FpgaArgs());
}
} }
} // namespace operators } // namespace operators
......
...@@ -20,7 +20,10 @@ namespace operators { ...@@ -20,7 +20,10 @@ namespace operators {
template <> template <>
bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) { bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
bool relu_enabled = false; // bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto *input_x = const_cast<LoDTensor *>(param->InputX()); auto *input_x = const_cast<LoDTensor *>(param->InputX());
auto *input_y = const_cast<LoDTensor *>(param->InputY()); auto *input_y = const_cast<LoDTensor *>(param->InputY());
auto *out = param->Out(); auto *out = param->Out();
...@@ -30,7 +33,10 @@ bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) { ...@@ -30,7 +33,10 @@ bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<float>();
fpga::EWAddArgs ewaddArgs = {0}; fpga::EWAddArgs ewaddArgs = {0};
ewaddArgs.relu_enabled = relu_enabled; // ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs.output.activation.activation_type = activation_enable;
ewaddArgs.output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
ewaddArgs.const0 = 0x3c00; // =1 ewaddArgs.const0 = 0x3c00; // =1
ewaddArgs.const1 = 0x3c00; // =1 ewaddArgs.const1 = 0x3c00; // =1
ewaddArgs.image0.address = input_x_ptr; ewaddArgs.image0.address = input_x_ptr;
......
...@@ -21,7 +21,10 @@ namespace operators { ...@@ -21,7 +21,10 @@ namespace operators {
template <> template <>
bool ElementwiseAddReluKernel<FPGA, float>::Init( bool ElementwiseAddReluKernel<FPGA, float>::Init(
ElementwiseAddReluParam<FPGA> *param) { ElementwiseAddReluParam<FPGA> *param) {
bool relu_enabled = true; // bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto *input_x = const_cast<LoDTensor *>(param->InputX()); auto *input_x = const_cast<LoDTensor *>(param->InputX());
auto *input_y = const_cast<LoDTensor *>(param->InputY()); auto *input_y = const_cast<LoDTensor *>(param->InputY());
auto *out = param->Out(); auto *out = param->Out();
...@@ -31,7 +34,10 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init( ...@@ -31,7 +34,10 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
auto out_ptr = out->mutable_data<float>(); auto out_ptr = out->mutable_data<float>();
fpga::EWAddArgs ewaddArgs = {0}; fpga::EWAddArgs ewaddArgs = {0};
ewaddArgs.relu_enabled = relu_enabled; // ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs.output.activation.activation_type = activation_enable;
ewaddArgs.output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
ewaddArgs.const0 = 0x3c00; // =1 ewaddArgs.const0 = 0x3c00; // =1
ewaddArgs.const1 = 0x3c00; // =1 ewaddArgs.const1 = 0x3c00; // =1
ewaddArgs.image0.address = input_x_ptr; ewaddArgs.image0.address = input_x_ptr;
......
...@@ -19,12 +19,34 @@ namespace operators { ...@@ -19,12 +19,34 @@ namespace operators {
template <> template <>
bool FetchKernel<FPGA, float>::Init(FetchParam<FPGA> *param) { bool FetchKernel<FPGA, float>::Init(FetchParam<FPGA> *param) {
Tensor *output = param->Out();
// fpga::format_fp16_ofm(output);
return true; return true;
} }
template <> template <>
void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) { void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) {
param.Out()->ShareDataWith(*(param.InputX())); param.Out()->ShareDataWith(*(param.InputX()));
/*auto input =
reinterpret_cast<Tensor *>(const_cast<Tensor *>(param.InputX()));
fpga::format_image(input);
auto input_ptr = input->data<float>();
Tensor *output = param.Out();
auto output_ptr = output->data<float>();
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP32;
args.input_layout_type = fpga::LAYOUT_CHW;
args.output_layout_type = fpga::LAYOUT_HWC;
args.image.address = reinterpret_cast<void *>(input_ptr);
args.image.channels = (uint32_t)input->dims()[1];
args.image.height = (input->dims().size() == 4) ? (uint32_t)input->dims()[2] :
1; args.image.width = (input->dims().size() == 4) ? (uint32_t)input->dims()[3]
: 1; args.image.pad_height = 0; args.image.pad_width = 0; args.output.address
= output_ptr; args.output.scale_address = output->scale;
fpga::PerformBypass(args);*/
} }
template class FetchKernel<FPGA, float>; template class FetchKernel<FPGA, float>;
......
...@@ -20,7 +20,10 @@ namespace operators { ...@@ -20,7 +20,10 @@ namespace operators {
template <> template <>
bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) { bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
bool relu_enabled = false; // bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input_x = const_cast<LoDTensor *>(param->InputX()); auto input_x = const_cast<LoDTensor *>(param->InputX());
auto filter = const_cast<Tensor *>(param->InputY()); auto filter = const_cast<Tensor *>(param->InputY());
const Tensor *input_z = param->InputZ(); const Tensor *input_z = param->InputZ();
...@@ -55,8 +58,8 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) { ...@@ -55,8 +58,8 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
fpga::format_fp16_ofm(out); fpga::format_fp16_ofm(out);
fpga::SplitConvArgs conv_arg = {0}; fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1, fpga::fill_split_arg(&conv_arg, input_x, out, filter, activation_enable,
0, 0, bs_ptr); leaky_relu_negative_slope, 1, 1, 1, 0, 0, bs_ptr);
param->SetFpgaArgs(conv_arg); param->SetFpgaArgs(conv_arg);
return true; return true;
} }
......
...@@ -22,6 +22,12 @@ namespace operators { ...@@ -22,6 +22,12 @@ namespace operators {
template <> template <>
bool ReshapeKernel<FPGA, float>::Init(ReshapeParam<FPGA> *param) { bool ReshapeKernel<FPGA, float>::Init(ReshapeParam<FPGA> *param) {
param->Out()->ShareDataWith(*param->InputX()); param->Out()->ShareDataWith(*param->InputX());
const int in_n = param->InputX()->dims()[0];
const int in_c = param->InputX()->dims()[1];
const int in_h = param->InputX()->dims()[2];
const int in_w = param->InputX()->dims()[3];
auto out = param->Out();
out->Resize(framework::make_ddim({in_n, in_c * in_h * in_w}));
return true; return true;
} }
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SIGMOID_OP
#include "operators/kernel/activation_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) {
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::SIGMOID;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->InputX());
auto input_ptr = input->data<float>();
auto out = param->Out();
fpga::format_fp16_ofm(out);
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP16;
args.image.address = input_ptr;
args.image.height =
(input->dims().size() == 4) ? (uint32_t)input->dims()[2] : 1;
args.image.width =
(input->dims().size() == 4) ? (uint32_t)input->dims()[3] : 1;
args.image.channels = (uint32_t)input->dims()[1];
args.output.address = out->data<float>();
args.output.scale_address = out->scale;
args.output.activation.activation_type = activation_enable;
args.output.activation.leaky_relu_negative_slope = leaky_relu_negative_slope;
param->SetFpgaArgs(args);
return true;
}
template <>
void SigmoidKernel<FPGA, float>::Compute(const SigmoidParam<FPGA> &param) {
fpga::PerformBypass(param.FpgaArgs());
}
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -26,7 +26,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -26,7 +26,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
auto input_ptr = input->data<float>(); auto input_ptr = input->data<float>();
auto out = param->Out(); auto out = param->Out();
fpga::format_fp32_ofm(out); fpga::format_fp32_ofm(out);
auto float_input = new Tensor; auto float_input = new Tensor;
if (input->dims().size() == 2) { if (input->dims().size() == 2) {
float_input->mutable_data<float>({1, input->dims()[1]}); float_input->mutable_data<float>({1, input->dims()[1]});
...@@ -36,7 +35,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -36,7 +35,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
} else { } else {
DLOG << "wrong dimension of softmax input"; DLOG << "wrong dimension of softmax input";
} }
fpga::format_fp32_ofm(float_input); fpga::format_fp32_ofm(float_input);
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16}; fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_layout_type = fpga::LAYOUT_HWC; args.input_layout_type = fpga::LAYOUT_HWC;
...@@ -53,6 +51,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) { ...@@ -53,6 +51,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
args.output.scale_address = float_input->scale; args.output.scale_address = float_input->scale;
param->SetFloatInput(float_input); param->SetFloatInput(float_input);
param->SetFpgaArgs(args); param->SetFpgaArgs(args);
return true; return true;
} }
......
...@@ -1078,6 +1078,15 @@ class SigmoidParam : public OpParam { ...@@ -1078,6 +1078,15 @@ class SigmoidParam : public OpParam {
private: private:
RType *input_x_; RType *input_x_;
RType *out_; RType *out_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::BypassArgs fpga_bypass_args;
public:
const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; }
void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; }
#endif
}; };
#endif #endif
...@@ -1200,6 +1209,20 @@ class FetchParam : public OpParam { ...@@ -1200,6 +1209,20 @@ class FetchParam : public OpParam {
private: private:
RType *input_x_; RType *input_x_;
Tensor *out_; Tensor *out_;
#ifdef PADDLE_MOBILE_FPGA
private:
std::shared_ptr<RType> float_input_x_;
fpga::BypassArgs fpga_bypass_args;
public:
RType *FloatInput() const {
return float_input_x_ == nullptr ? input_x_ : float_input_x_.get();
}
void SetFloatInput(Tensor *input) { float_input_x_.reset(input); }
const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; }
void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; }
#endif
}; };
#ifdef FILL_CONSTANT_OP #ifdef FILL_CONSTANT_OP
...@@ -2357,10 +2380,17 @@ class ConvTransposeParam : public OpParam { ...@@ -2357,10 +2380,17 @@ class ConvTransposeParam : public OpParam {
private: private:
fpga::DeconvArgs fpga_conv_args; fpga::DeconvArgs fpga_conv_args;
fpga::DWDeconvArgs fpga_DWDeconv_args;
public: public:
const fpga::DeconvArgs &FpgaArgs() const { return fpga_conv_args; } const fpga::DeconvArgs &FpgaArgs() const { return fpga_conv_args; }
const fpga::DWDeconvArgs &FpgaDWDconvArgs() const {
return fpga_DWDeconv_args;
}
void SetFpgaArgs(const fpga::DeconvArgs &args) { fpga_conv_args = args; } void SetFpgaArgs(const fpga::DeconvArgs &args) { fpga_conv_args = args; }
void SetFpgaArgs(const fpga::DWDeconvArgs &args) {
fpga_DWDeconv_args = args;
}
#endif #endif
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册