提交 899e7fdf 编写于 作者: H hjchen2

Merge branch 'develop' of https://github.com/PaddlePaddle/paddle-mobile into ocr_attention

......@@ -243,13 +243,13 @@
);
inputPaths = (
"${SRCROOT}/../Pods/Target Support Files/Pods-MobileNetDemo/Pods-MobileNetDemo-frameworks.sh",
"${BUILT_PRODUCTS_DIR}/SwiftProtobuf/SwiftProtobuf.framework",
"${BUILT_PRODUCTS_DIR}/Protobuf/Protobuf.framework",
);
name = "[CP] Embed Pods Frameworks";
outputFileListPaths = (
);
outputPaths = (
"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/SwiftProtobuf.framework",
"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/Protobuf.framework",
);
runOnlyForDeploymentPostprocessing = 0;
shellPath = /bin/sh;
......@@ -436,7 +436,7 @@
baseConfigurationReference = 4FE67FF667A24FCB0134F627 /* Pods-MobileNetDemo.debug.xcconfig */;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
CODE_SIGN_STYLE = Automatic;
CODE_SIGN_STYLE = Manual;
DEVELOPMENT_TEAM = A798K58VVL;
INFOPLIST_FILE = MobileNetDemo/Info.plist;
IPHONEOS_DEPLOYMENT_TARGET = 9.0;
......@@ -446,6 +446,7 @@
);
PRODUCT_BUNDLE_IDENTIFIER = Ray.MobileNetDemo;
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = ForAllDev;
SWIFT_VERSION = 4.0;
TARGETED_DEVICE_FAMILY = "1,2";
};
......@@ -456,7 +457,7 @@
baseConfigurationReference = E57059FE3629E3A8DE6C7ECF /* Pods-MobileNetDemo.release.xcconfig */;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
CODE_SIGN_STYLE = Automatic;
CODE_SIGN_STYLE = Manual;
DEVELOPMENT_TEAM = A798K58VVL;
INFOPLIST_FILE = MobileNetDemo/Info.plist;
IPHONEOS_DEPLOYMENT_TARGET = 9.0;
......@@ -466,6 +467,7 @@
);
PRODUCT_BUNDLE_IDENTIFIER = Ray.MobileNetDemo;
PRODUCT_NAME = "$(TARGET_NAME)";
PROVISIONING_PROFILE_SPECIFIER = ForAllDev;
SWIFT_VERSION = 4.0;
TARGETED_DEVICE_FAMILY = "1,2";
};
......
......@@ -5,21 +5,25 @@ workspace 'paddle-mobile.xcworkspace'
target 'paddle-mobile-demo' do
project 'paddle-mobile-demo/paddle-mobile-demo.xcodeproj'
pod 'SwiftProtobuf', '~> 1.0'
# pod 'SwiftProtobuf', '~> 1.0'
pod 'Protobuf', '~> 3.0.0'
end
target 'paddle-mobile' do
project 'paddle-mobile/paddle-mobile.xcodeproj'
pod 'SwiftProtobuf', '~> 1.0'
# pod 'SwiftProtobuf', '~> 1.0'
pod 'Protobuf', '~> 3.0.0'
end
target 'paddle-mobile-unit-test' do
project 'paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj'
pod 'SwiftProtobuf', '~> 1.0'
# pod 'SwiftProtobuf', '~> 1.0'
pod 'Protobuf', '~> 3.0.0'
end
target 'MobileNetDemo' do
project 'MobileNetDemo/MobileNetDemo.xcodeproj'
pod 'SwiftProtobuf', '~> 1.0'
# pod 'SwiftProtobuf', '~> 1.0'
pod 'Protobuf', '~> 3.0.0'
end
......@@ -411,11 +411,11 @@
);
inputPaths = (
"${SRCROOT}/../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo-frameworks.sh",
"${BUILT_PRODUCTS_DIR}/SwiftProtobuf/SwiftProtobuf.framework",
"${BUILT_PRODUCTS_DIR}/Protobuf/Protobuf.framework",
);
name = "[CP] Embed Pods Frameworks";
outputPaths = (
"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/SwiftProtobuf.framework",
"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/Protobuf.framework",
);
runOnlyForDeploymentPostprocessing = 0;
shellPath = /bin/sh;
......
......@@ -234,11 +234,11 @@
);
inputPaths = (
"${SRCROOT}/../Pods/Target Support Files/Pods-paddle-mobile-unit-test/Pods-paddle-mobile-unit-test-frameworks.sh",
"${BUILT_PRODUCTS_DIR}/SwiftProtobuf/SwiftProtobuf.framework",
"${BUILT_PRODUCTS_DIR}/Protobuf/Protobuf.framework",
);
name = "[CP] Embed Pods Frameworks";
outputPaths = (
"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/SwiftProtobuf.framework",
"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/Protobuf.framework",
);
runOnlyForDeploymentPostprocessing = 0;
shellPath = /bin/sh;
......
......@@ -7,6 +7,8 @@
objects = {
/* Begin PBXBuildFile section */
456BB7B421F5B356001474E2 /* Framework.pbobjc.m in Sources */ = {isa = PBXBuildFile; fileRef = 456BB7B221F5B356001474E2 /* Framework.pbobjc.m */; settings = {COMPILER_FLAGS = "-fno-objc-arc"; }; };
456BB7B521F5B356001474E2 /* Framework.pbobjc.h in Headers */ = {isa = PBXBuildFile; fileRef = 456BB7B321F5B356001474E2 /* Framework.pbobjc.h */; settings = {ATTRIBUTES = (Public, ); }; };
4AA1EA862146625E00D0F791 /* BilinearInterpOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA852146625E00D0F791 /* BilinearInterpOp.swift */; };
4AA1EA88214662BD00D0F791 /* BilinearInterpKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */; };
4AA1EA8A2146631C00D0F791 /* BilinearInterp.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA892146631C00D0F791 /* BilinearInterp.metal */; };
......@@ -47,15 +49,14 @@
FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA620E11CBC0081E9F8 /* Operator.swift */; };
FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA720E11CBC0081E9F8 /* BatchNormOp.swift */; };
FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BA820E11CBC0081E9F8 /* ReluOp.swift */; };
FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BAF20E11CC20081E9F8 /* framework.pb.swift */; };
FC039BB920E11CC20081E9F8 /* Scope.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB020E11CC20081E9F8 /* Scope.swift */; };
FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB120E11CC20081E9F8 /* TensorDesc.swift */; };
FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB220E11CC20081E9F8 /* ProgramDesc.swift */; };
FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB320E11CC20081E9F8 /* VarDesc.swift */; };
FC039BBB20E11CC20081E9F8 /* PMProgramDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB220E11CC20081E9F8 /* PMProgramDesc.swift */; };
FC039BBC20E11CC20081E9F8 /* PMVarDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB320E11CC20081E9F8 /* PMVarDesc.swift */; };
FC039BBD20E11CC20081E9F8 /* Program.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB420E11CC20081E9F8 /* Program.swift */; };
FC039BBE20E11CC20081E9F8 /* OpDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB520E11CC20081E9F8 /* OpDesc.swift */; };
FC039BBE20E11CC20081E9F8 /* PMOpDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB520E11CC20081E9F8 /* PMOpDesc.swift */; };
FC039BBF20E11CC20081E9F8 /* Attribute.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB620E11CC20081E9F8 /* Attribute.swift */; };
FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB720E11CC20081E9F8 /* BlockDesc.swift */; };
FC039BC020E11CC20081E9F8 /* PMBlockDesc.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039BB720E11CC20081E9F8 /* PMBlockDesc.swift */; };
FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */; };
FC0E2DBC20EE45FE009C1FAC /* ConvKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */; };
FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */; };
......@@ -93,6 +94,7 @@
FCA67CD7213827AC00BD58AA /* ConvAddBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */; };
FCA67CD92138287B00BD58AA /* ConvBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */; };
FCB40E5921E0DCAB0075EC91 /* FetchKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCB40E5821E0DCAB0075EC91 /* FetchKernel.swift */; };
FCB91DC221FEEE990051C6B2 /* BufferToTexture.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCB91DC121FEEE990051C6B2 /* BufferToTexture.metal */; };
FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */; };
FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */; };
FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */; };
......@@ -137,6 +139,8 @@
/* End PBXBuildFile section */
/* Begin PBXFileReference section */
456BB7B221F5B356001474E2 /* Framework.pbobjc.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = Framework.pbobjc.m; sourceTree = "<group>"; };
456BB7B321F5B356001474E2 /* Framework.pbobjc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Framework.pbobjc.h; sourceTree = "<group>"; };
4AA1EA852146625E00D0F791 /* BilinearInterpOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BilinearInterpOp.swift; sourceTree = "<group>"; };
4AA1EA87214662BD00D0F791 /* BilinearInterpKernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BilinearInterpKernel.swift; sourceTree = "<group>"; };
4AA1EA892146631C00D0F791 /* BilinearInterp.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BilinearInterp.metal; sourceTree = "<group>"; };
......@@ -182,15 +186,14 @@
FC039BA620E11CBC0081E9F8 /* Operator.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Operator.swift; sourceTree = "<group>"; };
FC039BA720E11CBC0081E9F8 /* BatchNormOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BatchNormOp.swift; sourceTree = "<group>"; };
FC039BA820E11CBC0081E9F8 /* ReluOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ReluOp.swift; sourceTree = "<group>"; };
FC039BAF20E11CC20081E9F8 /* framework.pb.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = framework.pb.swift; sourceTree = "<group>"; };
FC039BB020E11CC20081E9F8 /* Scope.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Scope.swift; sourceTree = "<group>"; };
FC039BB120E11CC20081E9F8 /* TensorDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = TensorDesc.swift; sourceTree = "<group>"; };
FC039BB220E11CC20081E9F8 /* ProgramDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ProgramDesc.swift; sourceTree = "<group>"; };
FC039BB320E11CC20081E9F8 /* VarDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VarDesc.swift; sourceTree = "<group>"; };
FC039BB220E11CC20081E9F8 /* PMProgramDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PMProgramDesc.swift; sourceTree = "<group>"; };
FC039BB320E11CC20081E9F8 /* PMVarDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PMVarDesc.swift; sourceTree = "<group>"; };
FC039BB420E11CC20081E9F8 /* Program.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Program.swift; sourceTree = "<group>"; };
FC039BB520E11CC20081E9F8 /* OpDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = OpDesc.swift; sourceTree = "<group>"; };
FC039BB520E11CC20081E9F8 /* PMOpDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PMOpDesc.swift; sourceTree = "<group>"; };
FC039BB620E11CC20081E9F8 /* Attribute.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Attribute.swift; sourceTree = "<group>"; };
FC039BB720E11CC20081E9F8 /* BlockDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = BlockDesc.swift; sourceTree = "<group>"; };
FC039BB720E11CC20081E9F8 /* PMBlockDesc.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PMBlockDesc.swift; sourceTree = "<group>"; };
FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ReluKernel.swift; sourceTree = "<group>"; };
FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvKernel.swift; sourceTree = "<group>"; };
FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BatchNormKernel.swift; sourceTree = "<group>"; };
......@@ -229,6 +232,7 @@
FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddBNReluKernel.metal; sourceTree = "<group>"; };
FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvBNReluKernel.metal; sourceTree = "<group>"; };
FCB40E5821E0DCAB0075EC91 /* FetchKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FetchKernel.swift; sourceTree = "<group>"; };
FCB91DC121FEEE990051C6B2 /* BufferToTexture.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = BufferToTexture.metal; sourceTree = "<group>"; };
FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DwConvBNReluOp.swift; sourceTree = "<group>"; };
FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluOp.swift; sourceTree = "<group>"; };
FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluKernel.swift; sourceTree = "<group>"; };
......@@ -403,15 +407,16 @@
FC039BAE20E11CC20081E9F8 /* Program */ = {
isa = PBXGroup;
children = (
FC039BAF20E11CC20081E9F8 /* framework.pb.swift */,
456BB7B321F5B356001474E2 /* Framework.pbobjc.h */,
456BB7B221F5B356001474E2 /* Framework.pbobjc.m */,
FC039BB020E11CC20081E9F8 /* Scope.swift */,
FC039BB120E11CC20081E9F8 /* TensorDesc.swift */,
FC039BB220E11CC20081E9F8 /* ProgramDesc.swift */,
FC039BB320E11CC20081E9F8 /* VarDesc.swift */,
FC039BB220E11CC20081E9F8 /* PMProgramDesc.swift */,
FC039BB320E11CC20081E9F8 /* PMVarDesc.swift */,
FC039BB420E11CC20081E9F8 /* Program.swift */,
FC039BB520E11CC20081E9F8 /* OpDesc.swift */,
FC039BB520E11CC20081E9F8 /* PMOpDesc.swift */,
FC039BB620E11CC20081E9F8 /* Attribute.swift */,
FC039BB720E11CC20081E9F8 /* BlockDesc.swift */,
FC039BB720E11CC20081E9F8 /* PMBlockDesc.swift */,
FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */,
);
path = Program;
......@@ -496,6 +501,7 @@
FCEB6837212F00B100D2448E /* metal */ = {
isa = PBXGroup;
children = (
FCB91DC121FEEE990051C6B2 /* BufferToTexture.metal */,
4AF928812135673D005B6C3A /* ConcatKernel.metal */,
4AA1EA9D2148D6F900D0F791 /* ConcatKernel.inc.metal */,
4AF9288321357BE3005B6C3A /* Elementwise.metal */,
......@@ -545,6 +551,7 @@
isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647;
files = (
456BB7B521F5B356001474E2 /* Framework.pbobjc.h in Headers */,
FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
......@@ -582,7 +589,7 @@
TargetAttributes = {
FC039B6920E11C3C0081E9F8 = {
CreatedOnToolsVersion = 9.3.1;
LastSwiftMigration = 0940;
LastSwiftMigration = 1000;
};
};
};
......@@ -659,7 +666,7 @@
FCD04E7020F31B720007374F /* ReshapeKernel.swift in Sources */,
FCE3A1B12153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal in Sources */,
FCD04E7220F343420007374F /* ConvAddOp.swift in Sources */,
FC039BBB20E11CC20081E9F8 /* ProgramDesc.swift in Sources */,
FC039BBB20E11CC20081E9F8 /* PMProgramDesc.swift in Sources */,
FCE3A1AB2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift in Sources */,
FC9D037920E229E4000F735A /* OpParam.swift in Sources */,
FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */,
......@@ -670,6 +677,7 @@
FCA67CD52138272900BD58AA /* ConvAddMetal.metal in Sources */,
FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */,
4AA1EA8C2146640900D0F791 /* SplitOp.swift in Sources */,
FCB91DC221FEEE990051C6B2 /* BufferToTexture.metal in Sources */,
4AA1EAAC214F55C800D0F791 /* Softmax.inc.metal in Sources */,
FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */,
4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */,
......@@ -684,7 +692,6 @@
FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */,
FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */,
FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */,
FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */,
FC039B9920E11C9A0081E9F8 /* Types.swift in Sources */,
FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */,
FCA3A1632132A4AC00084FE5 /* ReshapeKernel.metal in Sources */,
......@@ -698,6 +705,7 @@
4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */,
4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */,
FC2BFCC221DF2F9100C262B2 /* GlobalConfig.swift in Sources */,
456BB7B421F5B356001474E2 /* Framework.pbobjc.m in Sources */,
FCBCCC652122FCD700D94F7E /* TransposeOp.swift in Sources */,
4AA1EAA6214B5F6800D0F791 /* Shape.metal in Sources */,
FCD04E6E20F31B4B0007374F /* ReshapeOp.swift in Sources */,
......@@ -711,7 +719,7 @@
FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */,
FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */,
FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */,
FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */,
FC039BBC20E11CC20081E9F8 /* PMVarDesc.swift in Sources */,
FC803BC5214CB8F00094B8E5 /* ConvAddPrelu.inc.metal in Sources */,
4AF928822135673D005B6C3A /* ConcatKernel.metal in Sources */,
FCBCCC632122FCC000D94F7E /* TransposeKernel.swift in Sources */,
......@@ -749,14 +757,14 @@
FCE3A1A92153DE5100C37CDE /* ConvAddAddPreluOp.swift in Sources */,
FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */,
FCE3A1AD2153E8BA00C37CDE /* ElementwiseAddPreluOp.swift in Sources */,
FC039BC020E11CC20081E9F8 /* BlockDesc.swift in Sources */,
FC039BC020E11CC20081E9F8 /* PMBlockDesc.swift in Sources */,
FC803BC3214CB79C0094B8E5 /* ConvAddPreluKernel.metal in Sources */,
4AA1EA90214664CD00D0F791 /* Split.metal in Sources */,
FCD04E6820F315020007374F /* PoolKernel.swift in Sources */,
FC0226582138F38D00F395E2 /* PoolKernel.metal in Sources */,
FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */,
FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */,
FC039BBE20E11CC20081E9F8 /* OpDesc.swift in Sources */,
FC039BBE20E11CC20081E9F8 /* PMOpDesc.swift in Sources */,
FC9797C921D6101D00F2FD90 /* ResizeBilinearOp.swift in Sources */,
4AA1EA88214662BD00D0F791 /* BilinearInterpKernel.swift in Sources */,
FC2BFD4621DF685F00C262B2 /* Scale.swift in Sources */,
......
......@@ -13,7 +13,7 @@
limitations under the License. */
import Foundation
import SwiftProtobuf
//import SwiftProtobuf
public class Loader<P: PrecisionType> {
class ParaLoader {
......@@ -145,13 +145,17 @@ public class Loader<P: PrecisionType> {
public init(){}
func loadModelandParam(_ device:MTLDevice,_ modelData:Data, _ paraLoaderPointer:ParaLoaderWithPointer?, _ paraLoader:ParaLoader?) throws -> Program {
do {
let protoProgram = try PaddleMobile_Framework_Proto_ProgramDesc.init(
serializedData: modelData)
/// swift protobuf serialized Data to instance class
// let protoProgram = try PaddleMobile_Framework_Proto_ProgramDesc.init(
// serializedData: modelData)
/// oc protobuf serialized Data to instance class
let protoProgram = try ProgramDesc.init(data: (modelData as NSData) as Data)
let originProgramDesc = ProgramDesc.init(protoProgram: protoProgram)
let originProgramDesc = PMProgramDesc.init(protoProgram: protoProgram)
let programDesc = ProgramOptimize<P>.init().optimize(originProgramDesc: originProgramDesc)
// let programDesc = ProgramDesc.init(protoProgram: protoProgram)
// let programDesc = PMProgramDesc.init(protoProgram: protoProgram)
print(programDesc)
......
......@@ -27,7 +27,7 @@ class OpCreator<P: PrecisionType> {
}
}
func creat(device: MTLDevice, opDesc: OpDesc, scope: Scope, initContext: InitContext) throws -> Runable & InferShaperable {
func creat(device: MTLDevice, opDesc: PMOpDesc, scope: Scope, initContext: InitContext) throws -> Runable & InferShaperable {
guard let opCreator = opCreators[opDesc.type] else {
throw PaddleMobileError.opError(message: "there is no " + opDesc.type + " yet")
}
......@@ -39,7 +39,7 @@ class OpCreator<P: PrecisionType> {
}
}
let opCreators: [String : (MTLDevice, OpDesc, Scope, InitContext) throws -> Runable & InferShaperable] =
let opCreators: [String : (MTLDevice, PMOpDesc, Scope, InitContext) throws -> Runable & InferShaperable] =
[gConvType : ConvOp<P>.creat,
gBatchNormType : BatchNormOp<P>.creat,
gReluType : ReluOp<P>.creat,
......
......@@ -27,7 +27,7 @@ protocol OpParam {
func outputDesc() -> String
//associatedtype ParamPrecisionType: PrecisionType
init(opDesc: OpDesc, inScope: Scope) throws
init(opDesc: PMOpDesc, inScope: Scope) throws
static func getFirstTensor<VarType: Variant>(key: String, map: [String : [String]], from: Scope) throws -> VarType
static func inputX<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
static func inputBiase<VarType: Variant>(inputs: [String : [String]], from: Scope) throws -> VarType
......
......@@ -72,11 +72,11 @@ public class InitContext {
protocol Creator where Self: OperatorProtocol{
associatedtype OpType: OperatorProtocol & Runable & InferShaperable
static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws -> OpType
static func creat(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws -> OpType
}
extension Creator where Self: OperatorProtocol {
static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws -> OpType {
static func creat(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws -> OpType {
do {
return try OpType.provide(device:device, opDesc: opDesc, inScope: inScope, initContext: initContext)
} catch let error {
......@@ -100,11 +100,11 @@ protocol OperatorProtocol {
var attrs: [String : Attr] { get }
var para: ParamType { get }
var kernel: KerType { get }
init(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws
init(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws
}
extension OperatorProtocol {
static func provide(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws -> Self {
static func provide(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws -> Self {
do {
return try Self.init(device: device, opDesc: opDesc, inScope: inScope, initContext: initContext)
} catch let error {
......@@ -114,7 +114,7 @@ extension OperatorProtocol {
}
class Operator <KernelType: Computable , ParameterType>: OperatorProtocol where KernelType.ParamType == ParameterType {
required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws {
required init(device: MTLDevice, opDesc: PMOpDesc, inScope: Scope, initContext: InitContext) throws {
type = opDesc.type
scope = inScope
inputs = opDesc.inputs
......
......@@ -17,7 +17,7 @@ import Metal
class BatchNormParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
input = try BatchNormParam.inputX(inputs: opDesc.inputs, from: inScope)
if input.transpose != [0, 2, 3, 1] {
......
......@@ -17,7 +17,7 @@ import Metal
class BilinearInterpParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
input = try BilinearInterpParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try BilinearInterpParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
......@@ -16,7 +16,7 @@ import Foundation
class BoxcoderParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
priorBox = try BoxcoderParam.getFirstTensor(key: "PriorBox", map: opDesc.inputs, from: inScope)
priorBoxVar = try BoxcoderParam.getFirstTensor(key: "PriorBoxVar", map: opDesc.inputs, from: inScope)
......
......@@ -16,7 +16,7 @@ import Foundation
class ConcatParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
guard let xlist = opDesc.inputs["X"] else {
fatalError()
......
......@@ -17,7 +17,7 @@ import Metal
class ConvAddAddPreluParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
filter = try ConvAddAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvAddAddPreluParam.input(inputs: opDesc.inputs, from: inScope)
......
......@@ -17,7 +17,7 @@ import Foundation
class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
filter = try ConvAddBatchNormReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
......
......@@ -16,7 +16,7 @@ import Foundation
class ConvAddParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
filter = try ConvAddParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvAddParam.input(inputs: opDesc.inputs, from: inScope)
......
......@@ -16,7 +16,7 @@ import Foundation
class ConvAddPreluParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
filter = try ConvAddPreluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvAddPreluParam.input(inputs: opDesc.inputs, from: inScope)
......
......@@ -16,7 +16,7 @@ import Foundation
class ConvBNReluParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
filter = try ConvBNReluParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvBNReluParam.input(inputs: opDesc.inputs, from: inScope)
......
......@@ -16,7 +16,7 @@ import Foundation
class ConvParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
filter = try ConvParam.inputFilter(paraInputs: opDesc.paraInputs, from: inScope)
input = try ConvParam.input(inputs: opDesc.inputs, from: inScope)
......
......@@ -16,7 +16,7 @@ import Foundation
class ConvTransposeParam<P: PrecisionType>: ConvParam<P> {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
try super.init(opDesc: opDesc, inScope: inScope)
} catch let error {
......
......@@ -17,7 +17,7 @@ import Metal
class ElementwiseAddParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
inputX = try ElementwiseAddParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try ElementwiseAddParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
......@@ -17,7 +17,7 @@ import Metal
class ElementwiseAddPreluParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
alpha = try ElementwiseAddPreluParam.paramInputAlpha(inputs: opDesc.paraInputs, from: inScope)
mode = try ElementwiseAddPreluParam.getAttr(key: "mode", attrs: opDesc.attrs)
......
......@@ -23,7 +23,7 @@ class FeedParam<P: PrecisionType>: OpParam{
}
let scope: Scope
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
scope = inScope
do {
output = try FeedParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
......@@ -19,7 +19,7 @@ class FetchParam<P: PrecisionType>: OpParam{
var output: FetchHolder
let input: Texture
let scope: Scope
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
scope = inScope
do {
input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope)
......
......@@ -16,7 +16,7 @@ import Foundation
class FlattenParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
input = try FlattenParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try FlattenParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
//
// RGBToYCrCb_Y.metal
// paddle-mobile-demo
//
// Created by liuRuiLong on 2018/12/28.
// Copyright © 2018 orange. All rights reserved.
//
#include <metal_stdlib>
using namespace metal;
kernel void buffer_to_texture_kernel(
const device float *input [[buffer(0)]],
texture2d<float, access::write> outTexture [[texture(0)]],
uint2 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
float y = input[outTexture.get_width() * gid.y + gid.x];
outTexture.write(float4(y, 0.0f, 0.0f, 0.0f), gid);
}
kernel void buffer_to_texture_kernel_half(const device float *input [[buffer(0)]],
texture2d<half, access::write> outTexture [[texture(0)]],
uint2 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
float y = input[outTexture.get_width() * gid.y + gid.x];
outTexture.write(half4(y, 0.0f, 0.0f, 0.0f), gid);
}
......@@ -16,7 +16,7 @@ import Foundation
class MulticlassNMSParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
scores = try MulticlassNMSParam.getFirstTensor(key: "Scores", map: opDesc.inputs, from: inScope)
bboxes = try MulticlassNMSParam.getFirstTensor(key: "BBoxes", map: opDesc.inputs, from: inScope)
......
......@@ -16,7 +16,7 @@ import Foundation
class PoolParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
input = try PoolParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try PoolParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
......@@ -16,7 +16,7 @@ import Foundation
class PreluParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
input = try PreluParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try PreluParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
......@@ -16,7 +16,7 @@ import Foundation
class PriorBoxParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
min_max_aspect_ratios_order = try PriorBoxParam.getAttr(key: "min_max_aspect_ratios_order", attrs: opDesc.attrs)
} catch _ {
......
......@@ -17,7 +17,7 @@ import Foundation
class ReluParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
input = try ReluParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try ReluParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
......@@ -17,7 +17,7 @@ import Metal
class ReshapeParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
input = try ReshapeParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try ReshapeParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
......@@ -16,7 +16,7 @@ import Foundation
class ResizeBilinearParam<P: PrecisionType>: OpParam {
typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
input = try ResizeBilinearParam.inputX(inputs: opDesc.inputs, from: inScope)
// if (input.transpose != [0, 2, 3, 1]) || (input.tensorDim.cout() != 4) {
......
......@@ -16,7 +16,7 @@ import Foundation
class ShapeParam<P: PrecisionType>: OpParam {
// typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
input = try ShapeParam.input(inputs: opDesc.inputs, from: inScope)
output = try ShapeParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
......@@ -17,7 +17,7 @@ import Metal
class SoftmaxParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
input = try SoftmaxParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try SoftmaxParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
......@@ -16,7 +16,7 @@ import Foundation
class SplitParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
input = try SplitParam.inputX(inputs: opDesc.inputs, from: inScope)
output = Texture.init(device: input.metalTexture!.device, inDim: input.dim)
......
......@@ -17,7 +17,7 @@ import Metal
class TransposeParam<P: PrecisionType>: OpParam {
//typealias ParamPrecisionType = P
required init(opDesc: OpDesc, inScope: Scope) throws {
required init(opDesc: PMOpDesc, inScope: Scope) throws {
do {
input = try TransposeParam.inputX(inputs: opDesc.inputs, from: inScope)
output = try TransposeParam.outputOut(outputs: opDesc.outputs, from: inScope)
......
......@@ -35,7 +35,11 @@ extension Array: Attr {
extension String: Attr {
}
func attrWithProtoDesc(attrDesc: PaddleMobile_Framework_Proto_OpDesc.Attr) -> Attr {
extension NSMutableArray :Attr {
}
func attrWithProtoDesc(attrDesc: OpDesc_Attr) -> Attr {
switch attrDesc.type {
case .boolean:
return attrDesc.b
......@@ -47,14 +51,33 @@ func attrWithProtoDesc(attrDesc: PaddleMobile_Framework_Proto_OpDesc.Attr) -> At
return attrDesc.l
case .float:
return attrDesc.f
/// convert GPB class to swift class
case .booleans:
return attrDesc.bools
var dimsArray = [Bool]()
let dimsCount = attrDesc.boolsArray.count
for i in 0..<dimsCount {
let dim = Bool(attrDesc.boolsArray.value(at: i))
dimsArray.append(dim)
}
return dimsArray
case .floats:
return attrDesc.floats
var dimsArray = [Float]()
let dimsCount = attrDesc.floatsArray.count
for i in 0..<dimsCount {
let dim = Float(attrDesc.floatsArray.value(at: i))
dimsArray.append(dim)
}
return dimsArray
case .ints:
return attrDesc.ints
var dimsArray = [Int32]()
let dimsCount = attrDesc.intsArray.count
for i in 0..<dimsCount {
let dim = Int32(attrDesc.intsArray.value(at: i))
dimsArray.append(dim)
}
return dimsArray
case .strings:
return attrDesc.strings
return attrDesc.stringsArray
default:
fatalError(" not support this attr type: \(attrDesc.type)")
}
......
// Generated by the protocol buffer compiler. DO NOT EDIT!
// source: Framework.proto
// This CPP symbol can be defined to use imports that match up to the framework
// imports needed when using CocoaPods.
#if !defined(GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS)
#define GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS 0
#endif
//#if GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS
// #import <Protobuf/GPBProtocolBuffers.h>
//#else
// #import "GPBProtocolBuffers.h"
//#endif
#if GPB_USE_PROTOBUF_FRAMEWORK_IMPORTS
#import <Protobuf/GPBProtocolBuffers.h>
#else
#import <ProtocolBuffers/ProtocolBuffers.h>
#endif
#if GOOGLE_PROTOBUF_OBJC_GEN_VERSION != 30001
#error This file was generated by a different version of protoc which is incompatible with your Protocol Buffer library sources.
#endif
// @@protoc_insertion_point(imports)
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
CF_EXTERN_C_BEGIN
@class BlockDesc;
@class OpDesc;
@class OpDesc_Attr;
@class OpDesc_Var;
@class OpProto_Attr;
@class OpProto_Var;
@class VarDesc;
@class VarType;
@class VarType_ChannelDesc;
@class VarType_LoDTensorArrayDesc;
@class VarType_LoDTensorDesc;
@class VarType_ReaderDesc;
@class VarType_TensorDesc;
@class VarType_Tuple;
@class Version;
NS_ASSUME_NONNULL_BEGIN
#pragma mark - Enum AttrType
typedef GPB_ENUM(AttrType) {
AttrType_Int = 0,
AttrType_Float = 1,
AttrType_String = 2,
AttrType_Ints = 3,
AttrType_Floats = 4,
AttrType_Strings = 5,
AttrType_Boolean = 6,
AttrType_Booleans = 7,
AttrType_Block = 8,
AttrType_Long = 9,
AttrType_Blocks = 10,
};
GPBEnumDescriptor *AttrType_EnumDescriptor(void);
/// Checks to see if the given value is defined by the enum or was not known at
/// the time this source was generated.
BOOL AttrType_IsValidValue(int32_t value);
#pragma mark - Enum VarType_Type
typedef GPB_ENUM(VarType_Type) {
/// Pod Types
VarType_Type_Bool = 0,
VarType_Type_Int16 = 1,
VarType_Type_Int32 = 2,
VarType_Type_Int64 = 3,
VarType_Type_Fp16 = 4,
VarType_Type_Fp32 = 5,
VarType_Type_Fp64 = 6,
/// Tensor<size_t> is used in C++.
VarType_Type_SizeT = 19,
VarType_Type_Uint8 = 20,
VarType_Type_Int8 = 21,
/// Other types that may need additional descriptions
VarType_Type_LodTensor = 7,
VarType_Type_SelectedRows = 8,
VarType_Type_FeedMinibatch = 9,
VarType_Type_FetchList = 10,
VarType_Type_StepScopes = 11,
VarType_Type_LodRankTable = 12,
VarType_Type_LodTensorArray = 13,
VarType_Type_PlaceList = 14,
VarType_Type_Reader = 15,
VarType_Type_Channel = 16,
/// Any runtime decided variable type is raw
/// raw variables should manage their own allocations
/// in operators like nccl_op
VarType_Type_Raw = 17,
VarType_Type_Tuple = 18,
};
GPBEnumDescriptor *VarType_Type_EnumDescriptor(void);
/// Checks to see if the given value is defined by the enum or was not known at
/// the time this source was generated.
BOOL VarType_Type_IsValidValue(int32_t value);
#pragma mark - FrameworkRoot
/// Exposes the extension registry for this file.
///
/// The base class provides:
/// @code
/// + (GPBExtensionRegistry *)extensionRegistry;
/// @endcode
/// which is a @c GPBExtensionRegistry that includes all the extensions defined by
/// this file and all files that it depends on.
@interface FrameworkRoot : GPBRootObject
@end
#pragma mark - Version
typedef GPB_ENUM(Version_FieldNumber) {
Version_FieldNumber_Version = 1,
};
/// Any incompatible changes to ProgramDesc and its dependencies should
/// raise the version defined version.h.
///
/// Serailization and Deserialization codes should be modified in a way
/// that supports old versions following the version and compatibility policy.
@interface Version : GPBMessage
@property(nonatomic, readwrite) int64_t version;
@property(nonatomic, readwrite) BOOL hasVersion;
@end
#pragma mark - OpDesc
typedef GPB_ENUM(OpDesc_FieldNumber) {
OpDesc_FieldNumber_InputsArray = 1,
OpDesc_FieldNumber_OutputsArray = 2,
OpDesc_FieldNumber_Type = 3,
OpDesc_FieldNumber_AttrsArray = 4,
OpDesc_FieldNumber_IsTarget = 5,
};
/// OpDesc describes an instance of a C++ framework::OperatorBase
/// derived class type.
@interface OpDesc : GPBMessage
@property(nonatomic, readwrite, copy, null_resettable) NSString *type;
/// Test to see if @c type has been set.
@property(nonatomic, readwrite) BOOL hasType;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpDesc_Var*> *inputsArray;
/// The number of items in @c inputsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger inputsArray_Count;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpDesc_Var*> *outputsArray;
/// The number of items in @c outputsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger outputsArray_Count;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpDesc_Attr*> *attrsArray;
/// The number of items in @c attrsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger attrsArray_Count;
@property(nonatomic, readwrite) BOOL isTarget;
@property(nonatomic, readwrite) BOOL hasIsTarget;
@end
#pragma mark - OpDesc_Attr
typedef GPB_ENUM(OpDesc_Attr_FieldNumber) {
OpDesc_Attr_FieldNumber_Name = 1,
OpDesc_Attr_FieldNumber_Type = 2,
OpDesc_Attr_FieldNumber_I = 3,
OpDesc_Attr_FieldNumber_F = 4,
OpDesc_Attr_FieldNumber_S = 5,
OpDesc_Attr_FieldNumber_IntsArray = 6,
OpDesc_Attr_FieldNumber_FloatsArray = 7,
OpDesc_Attr_FieldNumber_StringsArray = 8,
OpDesc_Attr_FieldNumber_B = 10,
OpDesc_Attr_FieldNumber_BoolsArray = 11,
OpDesc_Attr_FieldNumber_BlockIdx = 12,
OpDesc_Attr_FieldNumber_L = 13,
OpDesc_Attr_FieldNumber_BlocksIdxArray = 14,
};
@interface OpDesc_Attr : GPBMessage
@property(nonatomic, readwrite, copy, null_resettable) NSString *name;
/// Test to see if @c name has been set.
@property(nonatomic, readwrite) BOOL hasName;
@property(nonatomic, readwrite) AttrType type;
@property(nonatomic, readwrite) BOOL hasType;
@property(nonatomic, readwrite) int32_t i;
@property(nonatomic, readwrite) BOOL hasI;
@property(nonatomic, readwrite) float f;
@property(nonatomic, readwrite) BOOL hasF;
@property(nonatomic, readwrite, copy, null_resettable) NSString *s;
/// Test to see if @c s has been set.
@property(nonatomic, readwrite) BOOL hasS;
@property(nonatomic, readwrite, strong, null_resettable) GPBInt32Array *intsArray;
/// The number of items in @c intsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger intsArray_Count;
@property(nonatomic, readwrite, strong, null_resettable) GPBFloatArray *floatsArray;
/// The number of items in @c floatsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger floatsArray_Count;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<NSString*> *stringsArray;
/// The number of items in @c stringsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger stringsArray_Count;
@property(nonatomic, readwrite) BOOL b;
@property(nonatomic, readwrite) BOOL hasB;
@property(nonatomic, readwrite, strong, null_resettable) GPBBoolArray *boolsArray;
/// The number of items in @c boolsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger boolsArray_Count;
@property(nonatomic, readwrite) int32_t blockIdx;
@property(nonatomic, readwrite) BOOL hasBlockIdx;
@property(nonatomic, readwrite) int64_t l;
@property(nonatomic, readwrite) BOOL hasL;
@property(nonatomic, readwrite, strong, null_resettable) GPBInt32Array *blocksIdxArray;
/// The number of items in @c blocksIdxArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger blocksIdxArray_Count;
@end
#pragma mark - OpDesc_Var
typedef GPB_ENUM(OpDesc_Var_FieldNumber) {
OpDesc_Var_FieldNumber_Parameter = 1,
OpDesc_Var_FieldNumber_ArgumentsArray = 2,
};
@interface OpDesc_Var : GPBMessage
@property(nonatomic, readwrite, copy, null_resettable) NSString *parameter;
/// Test to see if @c parameter has been set.
@property(nonatomic, readwrite) BOOL hasParameter;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<NSString*> *argumentsArray;
/// The number of items in @c argumentsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger argumentsArray_Count;
@end
#pragma mark - OpProto
typedef GPB_ENUM(OpProto_FieldNumber) {
OpProto_FieldNumber_Type = 1,
OpProto_FieldNumber_InputsArray = 2,
OpProto_FieldNumber_OutputsArray = 3,
OpProto_FieldNumber_AttrsArray = 4,
OpProto_FieldNumber_Comment = 5,
};
/// OpProto describes a C++ framework::OperatorBase derived class.
@interface OpProto : GPBMessage
@property(nonatomic, readwrite, copy, null_resettable) NSString *type;
/// Test to see if @c type has been set.
@property(nonatomic, readwrite) BOOL hasType;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpProto_Var*> *inputsArray;
/// The number of items in @c inputsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger inputsArray_Count;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpProto_Var*> *outputsArray;
/// The number of items in @c outputsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger outputsArray_Count;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpProto_Attr*> *attrsArray;
/// The number of items in @c attrsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger attrsArray_Count;
@property(nonatomic, readwrite, copy, null_resettable) NSString *comment;
/// Test to see if @c comment has been set.
@property(nonatomic, readwrite) BOOL hasComment;
@end
#pragma mark - OpProto_Var
typedef GPB_ENUM(OpProto_Var_FieldNumber) {
OpProto_Var_FieldNumber_Name = 1,
OpProto_Var_FieldNumber_Comment = 2,
OpProto_Var_FieldNumber_Duplicable = 3,
OpProto_Var_FieldNumber_Intermediate = 4,
OpProto_Var_FieldNumber_Dispensable = 5,
OpProto_Var_FieldNumber_Reuse = 6,
};
/// VarProto describes the C++ type framework::Variable.
@interface OpProto_Var : GPBMessage
@property(nonatomic, readwrite, copy, null_resettable) NSString *name;
/// Test to see if @c name has been set.
@property(nonatomic, readwrite) BOOL hasName;
@property(nonatomic, readwrite, copy, null_resettable) NSString *comment;
/// Test to see if @c comment has been set.
@property(nonatomic, readwrite) BOOL hasComment;
@property(nonatomic, readwrite) BOOL duplicable;
@property(nonatomic, readwrite) BOOL hasDuplicable;
@property(nonatomic, readwrite) BOOL intermediate;
@property(nonatomic, readwrite) BOOL hasIntermediate;
@property(nonatomic, readwrite) BOOL dispensable;
@property(nonatomic, readwrite) BOOL hasDispensable;
@property(nonatomic, readwrite, copy, null_resettable) NSString *reuse;
/// Test to see if @c reuse has been set.
@property(nonatomic, readwrite) BOOL hasReuse;
@end
#pragma mark - OpProto_Attr
typedef GPB_ENUM(OpProto_Attr_FieldNumber) {
OpProto_Attr_FieldNumber_Name = 1,
OpProto_Attr_FieldNumber_Type = 2,
OpProto_Attr_FieldNumber_Comment = 3,
OpProto_Attr_FieldNumber_Generated = 4,
};
/// AttrProto describes the C++ type Attribute.
@interface OpProto_Attr : GPBMessage
@property(nonatomic, readwrite, copy, null_resettable) NSString *name;
/// Test to see if @c name has been set.
@property(nonatomic, readwrite) BOOL hasName;
@property(nonatomic, readwrite) AttrType type;
@property(nonatomic, readwrite) BOOL hasType;
@property(nonatomic, readwrite, copy, null_resettable) NSString *comment;
/// Test to see if @c comment has been set.
@property(nonatomic, readwrite) BOOL hasComment;
/// If that attribute is generated, it means the Paddle third
/// language binding has responsibility to fill that
/// attribute. End-User should not set that attribute.
@property(nonatomic, readwrite) BOOL generated;
@property(nonatomic, readwrite) BOOL hasGenerated;
@end
#pragma mark - VarType
typedef GPB_ENUM(VarType_FieldNumber) {
VarType_FieldNumber_Type = 1,
VarType_FieldNumber_SelectedRows = 2,
VarType_FieldNumber_LodTensor = 3,
VarType_FieldNumber_TensorArray_p = 4,
VarType_FieldNumber_Reader = 5,
VarType_FieldNumber_Channel = 6,
VarType_FieldNumber_Tuple = 7,
};
@interface VarType : GPBMessage
@property(nonatomic, readwrite) VarType_Type type;
@property(nonatomic, readwrite) BOOL hasType;
@property(nonatomic, readwrite, strong, null_resettable) VarType_TensorDesc *selectedRows;
/// Test to see if @c selectedRows has been set.
@property(nonatomic, readwrite) BOOL hasSelectedRows;
@property(nonatomic, readwrite, strong, null_resettable) VarType_LoDTensorDesc *lodTensor;
/// Test to see if @c lodTensor has been set.
@property(nonatomic, readwrite) BOOL hasLodTensor;
@property(nonatomic, readwrite, strong, null_resettable) VarType_LoDTensorArrayDesc *tensorArray_p;
/// Test to see if @c tensorArray_p has been set.
@property(nonatomic, readwrite) BOOL hasTensorArray_p;
@property(nonatomic, readwrite, strong, null_resettable) VarType_ReaderDesc *reader;
/// Test to see if @c reader has been set.
@property(nonatomic, readwrite) BOOL hasReader;
@property(nonatomic, readwrite, strong, null_resettable) VarType_ChannelDesc *channel;
/// Test to see if @c channel has been set.
@property(nonatomic, readwrite) BOOL hasChannel;
@property(nonatomic, readwrite, strong, null_resettable) VarType_Tuple *tuple;
/// Test to see if @c tuple has been set.
@property(nonatomic, readwrite) BOOL hasTuple;
@end
#pragma mark - VarType_TensorDesc
typedef GPB_ENUM(VarType_TensorDesc_FieldNumber) {
VarType_TensorDesc_FieldNumber_DataType = 1,
VarType_TensorDesc_FieldNumber_DimsArray = 2,
};
@interface VarType_TensorDesc : GPBMessage
/// Should only be PODType. Is enforced in C++
@property(nonatomic, readwrite) VarType_Type dataType;
@property(nonatomic, readwrite) BOOL hasDataType;
/// [UNK, 640, 480] is saved as [-1, 640, 480]
@property(nonatomic, readwrite, strong, null_resettable) GPBInt64Array *dimsArray;
/// The number of items in @c dimsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger dimsArray_Count;
@end
#pragma mark - VarType_LoDTensorDesc
typedef GPB_ENUM(VarType_LoDTensorDesc_FieldNumber) {
VarType_LoDTensorDesc_FieldNumber_Tensor = 1,
VarType_LoDTensorDesc_FieldNumber_LodLevel = 2,
};
@interface VarType_LoDTensorDesc : GPBMessage
@property(nonatomic, readwrite, strong, null_resettable) VarType_TensorDesc *tensor;
/// Test to see if @c tensor has been set.
@property(nonatomic, readwrite) BOOL hasTensor;
@property(nonatomic, readwrite) int32_t lodLevel;
@property(nonatomic, readwrite) BOOL hasLodLevel;
@end
#pragma mark - VarType_LoDTensorArrayDesc
typedef GPB_ENUM(VarType_LoDTensorArrayDesc_FieldNumber) {
VarType_LoDTensorArrayDesc_FieldNumber_Tensor = 1,
VarType_LoDTensorArrayDesc_FieldNumber_LodLevel = 2,
};
@interface VarType_LoDTensorArrayDesc : GPBMessage
@property(nonatomic, readwrite, strong, null_resettable) VarType_TensorDesc *tensor;
/// Test to see if @c tensor has been set.
@property(nonatomic, readwrite) BOOL hasTensor;
@property(nonatomic, readwrite) int32_t lodLevel;
@property(nonatomic, readwrite) BOOL hasLodLevel;
@end
#pragma mark - VarType_ReaderDesc
typedef GPB_ENUM(VarType_ReaderDesc_FieldNumber) {
VarType_ReaderDesc_FieldNumber_LodTensorArray = 1,
};
@interface VarType_ReaderDesc : GPBMessage
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<VarType_LoDTensorDesc*> *lodTensorArray;
/// The number of items in @c lodTensorArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger lodTensorArray_Count;
@end
#pragma mark - VarType_ChannelDesc
typedef GPB_ENUM(VarType_ChannelDesc_FieldNumber) {
VarType_ChannelDesc_FieldNumber_DataType = 1,
VarType_ChannelDesc_FieldNumber_Capacity = 2,
};
@interface VarType_ChannelDesc : GPBMessage
@property(nonatomic, readwrite) VarType_Type dataType;
@property(nonatomic, readwrite) BOOL hasDataType;
@property(nonatomic, readwrite) int64_t capacity;
@property(nonatomic, readwrite) BOOL hasCapacity;
@end
#pragma mark - VarType_Tuple
typedef GPB_ENUM(VarType_Tuple_FieldNumber) {
VarType_Tuple_FieldNumber_ElementTypeArray = 1,
};
@interface VarType_Tuple : GPBMessage
// |elementTypeArray| contains |VarType_Type|
@property(nonatomic, readwrite, strong, null_resettable) GPBEnumArray *elementTypeArray;
/// The number of items in @c elementTypeArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger elementTypeArray_Count;
@end
#pragma mark - VarDesc
typedef GPB_ENUM(VarDesc_FieldNumber) {
VarDesc_FieldNumber_Name = 1,
VarDesc_FieldNumber_Type = 2,
VarDesc_FieldNumber_Persistable = 3,
};
@interface VarDesc : GPBMessage
@property(nonatomic, readwrite, copy, null_resettable) NSString *name;
/// Test to see if @c name has been set.
@property(nonatomic, readwrite) BOOL hasName;
@property(nonatomic, readwrite, strong, null_resettable) VarType *type;
/// Test to see if @c type has been set.
@property(nonatomic, readwrite) BOOL hasType;
@property(nonatomic, readwrite) BOOL persistable;
@property(nonatomic, readwrite) BOOL hasPersistable;
@end
#pragma mark - BlockDesc
typedef GPB_ENUM(BlockDesc_FieldNumber) {
BlockDesc_FieldNumber_Idx = 1,
BlockDesc_FieldNumber_ParentIdx = 2,
BlockDesc_FieldNumber_VarsArray = 3,
BlockDesc_FieldNumber_OpsArray = 4,
BlockDesc_FieldNumber_ForwardBlockIdx = 5,
};
@interface BlockDesc : GPBMessage
@property(nonatomic, readwrite) int32_t idx;
@property(nonatomic, readwrite) BOOL hasIdx;
@property(nonatomic, readwrite) int32_t parentIdx;
@property(nonatomic, readwrite) BOOL hasParentIdx;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<VarDesc*> *varsArray;
/// The number of items in @c varsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger varsArray_Count;
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<OpDesc*> *opsArray;
/// The number of items in @c opsArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger opsArray_Count;
@property(nonatomic, readwrite) int32_t forwardBlockIdx;
@property(nonatomic, readwrite) BOOL hasForwardBlockIdx;
@end
#pragma mark - ProgramDesc
typedef GPB_ENUM(ProgramDesc_FieldNumber) {
ProgramDesc_FieldNumber_BlocksArray = 1,
ProgramDesc_FieldNumber_Version = 2,
};
/// Please refer to
/// https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/program.md
/// for more details.
/// TODO(panyx0718): A model can have multiple programs. Need a
/// way to distinguish them. Maybe ID or name?
@interface ProgramDesc : GPBMessage
@property(nonatomic, readwrite, strong, null_resettable) NSMutableArray<BlockDesc*> *blocksArray;
/// The number of items in @c blocksArray without causing the array to be created.
@property(nonatomic, readonly) NSUInteger blocksArray_Count;
@property(nonatomic, readwrite, strong, null_resettable) Version *version;
/// Test to see if @c version has been set.
@property(nonatomic, readwrite) BOOL hasVersion;
@end
NS_ASSUME_NONNULL_END
CF_EXTERN_C_END
#pragma clang diagnostic pop
// @@protoc_insertion_point(global_scope)
......@@ -14,28 +14,28 @@
import Foundation
public class BlockDesc {
public class PMBlockDesc {
let index: Int
let parentIndex: Int
public let vars: [VarDesc]
let ops: [OpDesc]
init(block: PaddleMobile_Framework_Proto_BlockDesc) {
public let vars: [PMVarDesc]
let ops: [PMOpDesc]
init(block: BlockDesc) {
index = Int(block.idx)
parentIndex = Int(block.parentIdx)
var vars: [VarDesc] = []
for varOfBlock in block.vars {
vars.append(VarDesc.init(protoVarDesc: varOfBlock))
var vars: [PMVarDesc] = []
for varOfBlock in block.varsArray {
vars.append(PMVarDesc.init(protoVarDesc: varOfBlock as! VarDesc))
}
vars.sort { $0.name < $1.name }
self.vars = vars
var ops: [OpDesc] = []
for op in block.ops {
ops.append(OpDesc.init(protoOpDesc: op))
var ops: [PMOpDesc] = []
for op in block.opsArray {
ops.append(PMOpDesc.init(protoOpDesc: op as! OpDesc))
}
self.ops = ops
}
init(inVars: [VarDesc], inOps: [OpDesc]) {
init(inVars: [PMVarDesc], inOps: [PMOpDesc]) {
vars = inVars
ops = inOps
index = 0
......@@ -44,7 +44,7 @@ public class BlockDesc {
}
extension BlockDesc: CustomStringConvertible, CustomDebugStringConvertible {
extension PMBlockDesc: CustomStringConvertible, CustomDebugStringConvertible {
public var description: String {
var str = ""
......
......@@ -14,50 +14,50 @@
import Foundation
class OpDesc {
class PMOpDesc {
let inputs: [String : [String]]
var paraInputs: [String : [String]]
var outputs: [String : [String]]
let unusedOutputs: [String : [String]]
var attrs: [String : Attr] = [:]
var type: String
init(protoOpDesc: PaddleMobile_Framework_Proto_OpDesc) {
type = protoOpDesc.type
let creator = { (vars: [PaddleMobile_Framework_Proto_OpDesc.Var], canAdd: (String) -> Bool) -> [String : [String]] in
var map: [String : [String]] = [:]
for opDescVar in vars {
if (canAdd(opDescVar.parameter)) {
map[opDescVar.parameter] = opDescVar.arguments
init(protoOpDesc: OpDesc) {
type = protoOpDesc.type
let creator = { (vars: [OpDesc_Var], canAdd: (String) -> Bool) -> [String : [String]] in
var map: [String : [String]] = [:]
for opDescVar in vars {
if (canAdd(opDescVar.parameter)) {
map[opDescVar.parameter] = opDescVar.argumentsArray as? [String]
}
}
return map
}
inputs = creator(protoOpDesc.inputsArray as! [OpDesc_Var]) {
opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false
}
paraInputs = creator(protoOpDesc.inputsArray as! [OpDesc_Var]) {
!(opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false)
}
outputs = creator(protoOpDesc.outputsArray as! [OpDesc_Var]) {
opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false
}
unusedOutputs = creator(protoOpDesc.outputsArray as! [OpDesc_Var]) {
!(opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false)
}
for attr in protoOpDesc.attrsArray {
if ((attr as! OpDesc_Attr).type != .block) {
attrs[(attr as! OpDesc_Attr).name] = attrWithProtoDesc(attrDesc: attr as! OpDesc_Attr)
}
}
}
return map
}
inputs = creator(protoOpDesc.inputs) {
opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false
}
paraInputs = creator(protoOpDesc.inputs) {
!(opInfos[protoOpDesc.type]?.inputs.contains($0) ?? false)
}
outputs = creator(protoOpDesc.outputs) {
opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false
}
unusedOutputs = creator(protoOpDesc.outputs) {
!(opInfos[protoOpDesc.type]?.outputs.contains($0) ?? false)
}
for attr in protoOpDesc.attrs {
if (attr.type != .block) {
attrs[attr.name] = attrWithProtoDesc(attrDesc: attr)
}
}
}
}
extension OpDesc: CustomStringConvertible, CustomDebugStringConvertible {
extension PMOpDesc: CustomStringConvertible, CustomDebugStringConvertible {
var description: String {
var str = ""
str += "op type: \(type): \n"
......
......@@ -14,11 +14,11 @@
import Foundation
public class ProgramDesc {
public var blocks: [BlockDesc] = []
init(protoProgram: PaddleMobile_Framework_Proto_ProgramDesc) {
for block in protoProgram.blocks {
self.blocks.append(BlockDesc.init(block: block))
public class PMProgramDesc {
public var blocks: [PMBlockDesc] = []
init(protoProgram: ProgramDesc) {
for block in protoProgram.blocksArray {
self.blocks.append(PMBlockDesc.init(block: block as! BlockDesc))
}
}
......@@ -26,7 +26,7 @@ public class ProgramDesc {
}
}
extension ProgramDesc: CustomStringConvertible, CustomDebugStringConvertible {
extension PMProgramDesc: CustomStringConvertible, CustomDebugStringConvertible {
public var description: String {
var str: String = ""
for i in 0..<blocks.count {
......
......@@ -56,13 +56,13 @@ public enum VarTypeType: Int {
}
}
public class VarDesc {
public class PMVarDesc {
public let name: String
public let persistable: Bool
public let type: VarTypeType
let tensorDesc: TensorDesc?
init(protoVarDesc: PaddleMobile_Framework_Proto_VarDesc) {
type = VarTypeType.init(rawValue: protoVarDesc.type.type.rawValue) ?? .ErrorType
init(protoVarDesc: VarDesc) {
type = VarTypeType.init(rawValue: Int(protoVarDesc.type.type.rawValue)) ?? .ErrorType
name = protoVarDesc.name
persistable = protoVarDesc.persistable
switch type {
......@@ -71,14 +71,14 @@ public class VarDesc {
case .LodTensor:
tensorDesc = TensorDesc.init(protoTensorDesc: protoVarDesc.type.lodTensor.tensor)
case .StepLodTensorArray:
tensorDesc = TensorDesc.init(protoTensorDesc: protoVarDesc.type.tensorArray.tensor);
tensorDesc = TensorDesc.init(protoTensorDesc: protoVarDesc.type.tensorArray_p.tensor);
default:
tensorDesc = .none
}
}
}
extension VarDesc: CustomStringConvertible, CustomDebugStringConvertible {
extension PMVarDesc: CustomStringConvertible, CustomDebugStringConvertible {
public var description: String {
var str = ""
str += "var name \(name): \n"
......
......@@ -16,14 +16,14 @@ import Foundation
@objc public class Program: NSObject {
public let paramPath: String
public let programDesc: ProgramDesc
public let programDesc: PMProgramDesc
public let scope: Scope
init(inProgramDesc: ProgramDesc, inParamPath: String, inScope: Scope) {
init(inProgramDesc: PMProgramDesc, inParamPath: String, inScope: Scope) {
programDesc = inProgramDesc
paramPath = inParamPath
scope = inScope
}
init(inProgramDesc: ProgramDesc, inScope: Scope) {
init(inProgramDesc: PMProgramDesc, inScope: Scope) {
programDesc = inProgramDesc
scope = inScope
paramPath = ""
......
......@@ -25,8 +25,8 @@ class Node {
var inputs: [Node] = []
var outputs: [Node] = []
var type: String
var opDesc: OpDesc?
init(inOpDesc: OpDesc) {
var opDesc: PMOpDesc?
init(inOpDesc: PMOpDesc) {
type = inOpDesc.type
opDesc = inOpDesc
}
......@@ -192,7 +192,7 @@ class ProgramOptimize<P: PrecisionType> {
ElementwiseAddPreluOp<P>.self
]
func optimize(originProgramDesc: ProgramDesc) -> ProgramDesc {
func optimize(originProgramDesc: PMProgramDesc) -> PMProgramDesc {
guard originProgramDesc.blocks.count == 1 else {
fatalError(" not support yet")
......@@ -287,13 +287,13 @@ class ProgramOptimize<P: PrecisionType> {
}
}
var ops: [OpDesc] = []
var ops: [PMOpDesc] = []
for node in nodes {
ops.append(node.opDesc!)
}
var newProgramDesc = ProgramDesc.init()
let newBlock = BlockDesc.init(inVars: block.vars, inOps: ops)
let newProgramDesc = PMProgramDesc.init()
let newBlock = PMBlockDesc.init(inVars: block.vars, inOps: ops)
newProgramDesc.blocks.append(newBlock)
return newProgramDesc
}
......
......@@ -52,9 +52,19 @@ class TensorDesc {
}
}
init(protoTensorDesc: PaddleMobile_Framework_Proto_VarType.TensorDesc) {
dims = protoTensorDesc.dims.map{ Int($0) > 0 ? Int($0) : abs(Int($0)) }
dataType = VarTypeType.init(rawValue: protoTensorDesc.dataType.rawValue) ?? .ErrorType
init(protoTensorDesc: VarType_TensorDesc) {
// dims = protoTensorDesc.dimsArray.map{ Int64($0)! > 0 ? Int64($0) : abs(Int64($0)) }
var dimsArray = [Int]()
let dimsCount = protoTensorDesc.dimsArray.count
for i in 0..<dimsCount {
let dim = Int(protoTensorDesc.dimsArray.value(at: i)) > 0 ?Int(protoTensorDesc.dimsArray.value(at: i)) :abs(Int(protoTensorDesc.dimsArray.value(at: i)))
dimsArray.append(dim)
}
dims = dimsArray
dataType = VarTypeType.init(rawValue: Int(protoTensorDesc.dataType.rawValue)) ?? .ErrorType
}
}
......@@ -21,7 +21,6 @@
//limitations under the License.
import Foundation
import SwiftProtobuf
// If the compiler emits an error on this type, it is because this file
// was generated by a version of the `protoc` Swift plug-in that is
......
......@@ -15,6 +15,7 @@
#pragma once
#import <UIKit/UIKit.h>
#import <paddle_mobile/Framework.pbobjc.h>
//! Project version number for paddle_mobile.
//FOUNDATION_EXPORT double paddle_mobileVersionNumber;
......
......@@ -151,6 +151,30 @@ void format_dwconv_filter(framework::Tensor *filter_tensor, float *scale_ptr) {
filter_tensor->reset_data_ptr(new_data);
}
void format_DWDconv_filter(framework::Tensor *filter_tensor, float *scale_ptr,
int stride) {
auto dims = filter_tensor->dims();
auto num = dims[0], height = dims[2], width = dims[3];
auto data_ptr = filter_tensor->data<float>();
size_t memory_size = num * height * width * sizeof(float);
auto new_data = (float *)fpga_malloc(memory_size); // NOLINT
fpga_copy(new_data, data_ptr, memory_size);
int hw = height * width;
deconv_filter::deconv_NC_convert(&new_data, num, 1, hw);
num = dims[1];
int channel = dims[0];
deconv_filter::DWDconv_format_filter(&new_data, num, channel, height, width,
scale_ptr, stride);
// framework::DDim dims_new =
// framework::make_ddim({num, 1, height, width});
// filter_tensor->Resize(dims_new);
filter_tensor->reset_data_ptr(new_data);
}
void format_fc_filter(framework::Tensor *filter_tensor, float max_value) {
filter_tensor->scale[0] = float(max_value / 127.0); // NOLINT
filter_tensor->scale[1] = float(127.0 / max_value); // NOLINT
......@@ -243,6 +267,17 @@ void format_dwconv_data(framework::Tensor *filter_tensor,
format_bias_array(bias_ptr, channel);
format_fp16_ofm(ofm_tensor);
}
void format_DWDeconv_data(framework::Tensor *filter_tensor,
framework::Tensor *ofm_tensor, float **bs_ptr,
int group, int sub_conv_n) {
int channel = ofm_tensor->dims()[1];
// dw-deconv
format_DWDconv_filter(
filter_tensor,
(reinterpret_cast<float *>(*bs_ptr) + sub_conv_n * channel), sub_conv_n);
format_bias_array(bs_ptr, channel);
format_fp16_ofm(ofm_tensor);
}
void expand_conv_arg(ConvArgs *arg) {
ConvArgs args = *arg;
......@@ -311,9 +346,9 @@ void expand_conv_arg(ConvArgs *arg) {
auto filter_pad_width_mul_channel =
args.image.pad_width * args.image.channels;
auto image_amount_per_row_multi_win_first =
image_amount_per_row * (4 * args.kernel.stride_h - args.image.pad_height);
image_amount_per_row * (2 * args.kernel.stride_h - args.image.pad_height);
auto image_amount_per_row_multi_win =
image_amount_per_row * (4 * args.kernel.stride_h);
image_amount_per_row * (2 * args.kernel.stride_h);
auto image_block_num = block_num;
auto image_block_len =
......@@ -340,7 +375,8 @@ void expand_conv_arg(ConvArgs *arg) {
(512 / (align_to_x(args.filter_num, 4) / 4 * 2) > 2)
? (512 / (align_to_x(args.filter_num, 4) / 4 * 2) - 2)
: 0;
auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS;
// auto cmd = 0UL | (args.relu_enabled ? USE_RELU : 0) | USE_BIAS;
auto cmd = 0UL | USE_BIAS;
auto deconv_param = ((args.deconv_tx_param.deconv_en) << 24) |
((args.deconv_tx_param.sub_conv_num) << 16) |
......@@ -378,7 +414,8 @@ void expand_conv_arg(ConvArgs *arg) {
void expand_EW_arg(EWAddArgs *arg) {
EWAddArgs args = *arg;
uint64_t cmd = args.relu_enabled ? USE_RELU : 0;
// uint64_t cmd = args.relu_enabled ? USE_RELU : 0;
uint64_t cmd = 0;
uint64_t datalen = (uint64_t)args.image0.width *
(uint64_t)args.image0.height *
(uint64_t)args.image0.channels;
......@@ -406,8 +443,10 @@ void expand_EW_arg(EWAddArgs *arg) {
void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int group_num, int stride_h,
int stride_w, int padding_h, int padding_w, float *bs_ptr) {
ActivationType activation_enable,
int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float *bs_ptr) {
auto input_ptr = input->data<float>();
auto filter_ptr = filter->data<float>();
auto out_ptr = out->data<float>();
......@@ -453,7 +492,10 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
filter->dims()[3]));
for (int i = 0; i < n; i++) {
arg->conv_arg[i].relu_enabled = relu_enabled;
// arg->conv_arg[i].relu_enabled = relu_enabled;
arg->conv_arg[i].output.activation.activation_type = activation_enable;
arg->conv_arg[i].output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
arg->conv_arg[i].group_num = (uint32_t)group_num;
arg->conv_arg[i].kernel.stride_h = (uint32_t)stride_h;
arg->conv_arg[i].kernel.stride_w = (uint32_t)stride_w;
......@@ -525,8 +567,9 @@ void fill_split_arg(struct SplitConvArgs *arg, framework::Tensor *input,
void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int group_num, int stride_h,
int stride_w, int padding_h, int padding_w,
ActivationType activation_enable,
int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float *bs_ptr) {
auto input_ptr = input->data<float>();
auto filter_ptr = filter->data<float>();
......@@ -652,7 +695,13 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
}
for (int j = 0; j < split_num; ++j) {
arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled;
// arg->split_conv_args[i]->conv_arg[j].relu_enabled = relu_enabled;
arg->split_conv_args[i]->conv_arg[j].output.activation.activation_type =
activation_enable;
arg->split_conv_args[i]
->conv_arg[j]
.output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
arg->split_conv_args[i]->conv_arg[j].group_num = (uint32_t)group_num;
arg->split_conv_args[i]->conv_arg[j].kernel.width =
......@@ -765,12 +814,17 @@ void fill_deconv_arg(struct DeconvArgs *arg, framework::Tensor *input,
void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter,
bool relu_enabled, int stride_h, int stride_w,
int padding_h, int padding_w, float *bias_ptr) {
ActivationType activation_enable,
int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float *bias_ptr) {
auto filter_ptr = filter->data<float>();
auto input_ptr = input->data<float>();
auto output_ptr = out->mutable_data<float>();
arg->relu_enabled = relu_enabled;
arg->sub_conv_num = 1;
// arg->relu_enabled = relu_enabled;
arg->output.activation.activation_type = activation_enable;
arg->output.activation.leaky_relu_negative_slope = leaky_relu_negative_slope;
arg->bias_address = bias_ptr;
arg->filter_address = filter_ptr;
arg->kernel.height = (uint32_t)filter->dims()[2];
......@@ -788,5 +842,114 @@ void fill_dwconv_arg(struct DWconvArgs *arg, framework::Tensor *input,
arg->output.scale_address = out->scale;
} // end dwconv arg fill
void fill_DWDeconv_arg(struct DWDeconvArgs *arg, framework::Tensor *input,
framework::Tensor *out, framework::Tensor *filter,
ActivationType activation_enable,
int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float *bias_ptr) {
auto filter_ptr = filter->data<float>();
auto input_ptr = input->data<float>();
auto output_ptr = out->mutable_data<float>();
auto deleter = [](void *p) { fpga_free(p); };
arg->group_num = (uint32_t)filter->dims()[0];
arg->sub_conv_num = (uint32_t)stride_w;
arg->filter_num = (uint32_t)filter->dims()[0];
int sub_conv_num = stride_w;
int sub_pad =
deconv_filter::deconv_calc_sub_pad((int)filter->dims()[3], // NOLINT
padding_w, stride_w);
auto sub_filter_width = (uint32_t)deconv_filter::deconv_get_sub_filter_axis(
(int)filter->dims()[3], stride_w); // NOLINT
auto sub_output_width = (uint32_t)deconv_filter::deconv_get_sub_out_axis(
(int)input->dims()[3], sub_pad, sub_filter_width); // NOLINT
auto sub_output_height = (uint32_t)deconv_filter::deconv_get_sub_out_axis(
(int)input->dims()[2], sub_pad, sub_filter_width); // NOLINT
arg->sub_output_width = (uint32_t)sub_output_width;
arg->sub_output_height = (uint32_t)sub_output_height;
arg->omit_size = (uint32_t)deconv_filter::deconv_get_omit(
stride_w, (int)filter->dims()[3], padding_w); // NOLINT
auto sub_channels = (int)input->dims()[1]; // NOLINT
uint32_t omit_size = arg->omit_size;
int real_out_width = sub_output_width * sub_conv_num - 2 * omit_size;
int real_out_height = sub_output_height * sub_conv_num - 2 * omit_size;
int sub_filter_num = sub_conv_num * (arg->filter_num);
framework::DDim dims_out_new = framework::make_ddim(
{1, arg->filter_num, real_out_height, real_out_width});
fpga::format_fp16_ofm(out, dims_out_new);
auto out_ptr = out->data<float>();
/*====For Addition
arg->output.address =
(half *)out_ptr + // NOLINT
omit_size * sizeof(half) *
(align_to_x(real_out_width * arg->filter_num, IMAGE_ALIGNMENT));
*/
arg->output.address = out_ptr;
arg->output.scale_address = out->scale;
int filter_offset = sub_filter_width * sub_filter_width *
align_to_x(sub_channels, FILTER_ELEMENT_ALIGNMENT) *
arg->sub_conv_num;
for (int i = 0; i < sub_conv_num; ++i) {
arg->dw_conv_args.push_back(std::make_shared<DWconvArgs>());
arg->dw_conv_args[i]->sub_conv_num = sub_conv_num;
// arg->dw_conv_args[i]->relu_enabled = relu_enabled;
arg->dw_conv_args[i]->output.activation.activation_type = activation_enable;
arg->dw_conv_args[i]->output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
arg->dw_conv_args[i]->bias_address = bias_ptr;
arg->dw_conv_args[i]->filter_address =
fpga_malloc(filter_offset * sizeof(int16_t));
memcpy(arg->dw_conv_args[i]->filter_address,
(reinterpret_cast<half *>(filter_ptr) + i * filter_offset),
filter_offset * sizeof(int16_t));
arg->vector_dw_conv_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(arg->dw_conv_args[i]->filter_address),
deleter));
arg->dw_conv_args[i]->kernel.height = (uint32_t)sub_filter_width;
arg->dw_conv_args[i]->kernel.width = (uint32_t)sub_filter_width;
arg->dw_conv_args[i]->kernel.stride_h = (uint32_t)1;
arg->dw_conv_args[i]->kernel.stride_w = (uint32_t)1;
arg->dw_conv_args[i]->image.address = input_ptr;
arg->dw_conv_args[i]->image.channels = (uint32_t)input->dims()[1];
arg->dw_conv_args[i]->image.height = (uint32_t)input->dims()[2];
arg->dw_conv_args[i]->image.width = (uint32_t)input->dims()[3];
arg->dw_conv_args[i]->image.pad_height = sub_pad;
arg->dw_conv_args[i]->image.pad_width = sub_pad;
arg->dw_conv_args[i]->image.scale_address = input->scale;
arg->dw_conv_args[i]->output.address =
fpga_malloc(sub_output_height *
align_to_x(sub_output_width * sub_channels * sub_conv_num,
IMAGE_ALIGNMENT) *
sizeof(int16_t));
arg->dw_conv_args[i]->output.scale_address =
static_cast<float *>(fpga_malloc(2 * sizeof(float)));
arg->vector_dw_conv_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(arg->dw_conv_args[i]->output.address),
deleter));
arg->vector_dw_conv_space.push_back(std::shared_ptr<char>(
reinterpret_cast<char *>(arg->dw_conv_args[i]->output.scale_address),
deleter));
}
// arg->output.scale_address = out->scale;
} // end dwconv arg fill
} // namespace fpga
} // namespace paddle_mobile
......@@ -47,16 +47,28 @@ void format_concat_output(framework::Tensor* out, int height, int width,
void fill_split_arg(struct SplitConvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int group_num, int stride_h,
int stride_w, int padding_h, int padding_w, float* bs_ptr);
ActivationType activation_enable,
int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float* bs_ptr);
void fill_deconv_arg(struct DeconvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int group_num, int stride_h,
int stride_w, int padding_h, int padding_w, float* bs_ptr);
ActivationType activation_enable,
int16_t leaky_relu_negative_slope, int group_num,
int stride_h, int stride_w, int padding_h, int padding_w,
float* bs_ptr);
void fill_dwconv_arg(struct DWconvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter,
bool relu_enabled, int stride_h, int stride_w,
int padding_h, int padding_w, float* bias_ptr);
ActivationType activation_enable,
int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float* bias_ptr);
void fill_DWDeconv_arg(struct DWDeconvArgs* arg, framework::Tensor* input,
framework::Tensor* out, framework::Tensor* filter,
ActivationType activation_enable,
int16_t leaky_relu_negative_slope, int stride_h,
int stride_w, int padding_h, int padding_w,
float* bs_ptr);
void format_deconv_filter(framework::Tensor* filter_tensor, float max_value,
int group_num, int stride);
......@@ -69,6 +81,10 @@ void format_deconv_data(framework::Tensor* filter_tensor,
void format_dwconv_data(framework::Tensor* filter_tensor,
framework::Tensor* ofm_tensor, float* scale_ptr,
float** bias_ptr);
void format_DWDeconv_data(framework::Tensor* filter_tensor,
framework::Tensor* ofm_tensor, float** bs_ptr,
int group, int sub_conv_n);
template <typename Dtype>
void savefile(std::string filename, void* buffer, int dataSize, Dtype tmp) {
float data;
......
......@@ -19,16 +19,6 @@ limitations under the License. */
#include "fpga/V1/filter.h"
// #include "filter.h"
#include "fpga/V1/api.h"
// #include "fpga_api.h"
// just for test
//#include <string>
//#include "deconv.h"
//#include "deconv_api.h"
// using namespace std;
// using namespace paddle_mobile::fpga;
// using namespace baidu::fpga::deconv::api;
// namespace api = baidu::fpga::deconv::api;
namespace paddle_mobile {
namespace fpga {
......@@ -42,7 +32,8 @@ void deconv_inverse_filter(float** data_in, int num, int channel, int width,
float* tmp = *data_in;
int data_size = num * channel * width * height;
int hw_len = height * width;
auto tmp_data = (float*)fpga_malloc(data_size * sizeof(float));
auto tmp_data =
reinterpret_cast<float*>(fpga_malloc(data_size * sizeof(float)));
for (int i = 0; i < num; ++i) {
for (int j = 0; j < channel; ++j) {
for (int k = 0; k < hw_len; ++k) {
......@@ -97,9 +88,10 @@ int deconv_get_omit(int stride, int filter_width, int pad) {
return (stride - idx);
}
void deconv_get_sub_filter(char** data_in, int height, int width,
int sub_conv_n, int kernel_num, int channel) {
char* ptr_tmp = *data_in;
template <typename T>
void deconv_get_sub_filter(T** data_in, int height, int width, int sub_conv_n,
int kernel_num, int channel) {
T* ptr_tmp = *data_in;
int sub_num = kernel_num * sub_conv_n;
int sub_h = height / sub_conv_n;
int sub_w = width / sub_conv_n;
......@@ -107,7 +99,8 @@ void deconv_get_sub_filter(char** data_in, int height, int width,
int sub_filter_size =
kernel_num * sub_h * sub_w * channel * sub_conv_n * sub_conv_n;
char* ptr_sub_filter = (char*)fpga_malloc(sub_filter_size * sizeof(char));
T* ptr_sub_filter =
reinterpret_cast<T*>(fpga_malloc(sub_filter_size * sizeof(T)));
for (int idx = 0; idx < sub_conv_n; ++idx) {
for (int nn = 0; nn < sub_num; ++nn) {
int ni = nn % kernel_num;
......@@ -124,7 +117,7 @@ void deconv_get_sub_filter(char** data_in, int height, int width,
fpga_copy(
ptr_sub_filter + idx * sub_h * sub_w * channel * sub_num + sidx,
(*data_in) + kidx, channel * sizeof(char));
(*data_in) + kidx, channel * sizeof(T));
// for (int cc =0; cc < channel; ++cc) {
// ptr_sub_filter[idx*sub_h*sub_w*channel*sub_num + sidx + cc] =
// (*data_in)[kidx + cc];
......@@ -140,7 +133,7 @@ void deconv_get_sub_filter(char** data_in, int height, int width,
void deconv_NC_convert(float** filter_in, int kernel_num, int channels,
int hw) {
float* tmp = *filter_in;
float* ptr_filter = (float*)(paddle_mobile::fpga::fpga_malloc(
float* ptr_filter = reinterpret_cast<float*>(paddle_mobile::fpga::fpga_malloc(
hw * kernel_num * channels * sizeof(float)));
for (int c = 0; c < channels; ++c) {
......@@ -188,7 +181,8 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
result2);
}*/
deconv_get_sub_filter(quantize_data, height, width, stride, num, channel);
deconv_get_sub_filter<char>(quantize_data, height, width, stride, num,
channel);
/*{
char result2 = (char)0;
string filename = "sub_filter_filter_data";
......@@ -212,10 +206,12 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
((residual == 0) ? div_num : (div_num - 1)) +
align_to_x(residual, FILTER_NUM_ALIGNMENT);
char** ptr_ptr_data = (char**)fpga_malloc(sub_conv_n * sizeof(char*));
char** ptr_ptr_data =
reinterpret_cast<char**>(fpga_malloc(sub_conv_n * sizeof(char*)));
int origin_offset = sub_chw * sub_num;
for (int i = 0; i < sub_conv_n; ++i) {
(ptr_ptr_data)[i] = (char*)fpga_malloc(origin_offset * sizeof(char));
(ptr_ptr_data)[i] =
reinterpret_cast<char*>(fpga_malloc(origin_offset * sizeof(char)));
fpga_copy((ptr_ptr_data)[i], (*quantize_data) + origin_offset * i,
origin_offset * sizeof(char));
......@@ -233,8 +229,8 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
int align_offset =
align_to_x(sub_chw, FILTER_ELEMENT_ALIGNMENT) * num_after_alignment;
char* ptr_space = (char*)fpga_malloc(sub_conv_n * align_offset *
sizeof(char)); // continuous space
char* ptr_space = reinterpret_cast<char*>(fpga_malloc(
sub_conv_n * align_offset * sizeof(char))); // continuous space
for (int i = 0; i < sub_conv_n; ++i) {
char* ptr_tmp = (ptr_ptr_data)[i];
......@@ -251,7 +247,7 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
fpga_copy(ptr_space + i * align_offset, ptr_tmp, align_offset);
fpga_free(ptr_tmp);
}
*data_in = (float*)ptr_space;
*data_in = reinterpret_cast<float*>(ptr_space);
/* {
char result2 = (char)0;
......@@ -262,6 +258,22 @@ void deconv_format_filter(float** data_in, int num, int channel, int height,
fpga_flush(ptr_space, sub_conv_n * align_offset * sizeof(char));
}
void DWDconv_format_filter(float** data_in, int num, int channel, int height,
int width, float* scale_ptr, int stride) {
deconv_inverse_filter(data_in, num, channel, width, height);
filter::quantize_to_fp16(data_in, channel, height, width, scale_ptr);
int16_t** quantize_data = (int16_t**)data_in; // NOLINT
filter::convert_to_hwn(quantize_data, channel, height, width);
deconv_get_sub_filter<int16_t>(quantize_data, height, width, stride, num,
channel);
filter::align_element_n(quantize_data, channel, height, width);
fpga_flush(*quantize_data, align_to_x(channel, FILTER_ELEMENT_ALIGNMENT) *
height * width * sizeof(int16_t));
}
} // namespace deconv_filter
} // namespace fpga
} // namespace paddle_mobile
......@@ -24,11 +24,15 @@ int deconv_calc_sub_pad(int filter_axis, int pad, int stride);
int deconv_get_sub_filter_axis(int filter_axis, int stride);
int deconv_get_sub_out_axis(int image_axis, int sub_pad, int sub_filter_axis);
int deconv_get_omit(int stride, int filter_width, int pad);
void deconv_get_sub_filter(char** data_in, int height, int width,
int sub_conv_n, int kernel_num, int channel);
template <typename T>
void deconv_get_sub_filter(T** data_in, int height, int width, int sub_conv_n,
int kernel_num, int channel);
void deconv_format_filter(float** data_in, int num, int channel, int height,
int width, int group_num, float max, int stride);
void deconv_NC_convert(float** filter_in, int kernel_num, int channels, int hw);
void DWDconv_format_filter(float** data_in, int num, int channel, int height,
int width, float* scale_ptr, int stride);
} // namespace deconv_filter
} // namespace fpga
......
......@@ -346,6 +346,16 @@ void format_dwconv_filter(float **data_in, int num, int height, int width,
fpga_flush(*quantize_data, align_to_x(num, FILTER_ELEMENT_ALIGNMENT) *
height * width * sizeof(int16_t));
}
void format_DWDeconv_filter(float **data_in, int num, int height, int width,
float *scale_ptr) {
quantize_to_fp16(data_in, num, height, width, scale_ptr);
int16_t **quantize_data = (int16_t **)data_in; // NOLINT
convert_to_hwn(quantize_data, num, height, width);
align_element_n(quantize_data, num, height, width);
fpga_flush(*quantize_data, align_to_x(num, FILTER_ELEMENT_ALIGNMENT) *
height * width * sizeof(int16_t));
}
} // namespace filter
} // namespace fpga
} // namespace paddle_mobile
......@@ -18,7 +18,6 @@ limitations under the License. */
#include "fpga/V1/image.h"
#include "fpga/common/config.h"
#include "fpga/common/driver.h"
#ifdef COST_TIME_PRINT
#include <sys/time.h>
#include <time.h>
......@@ -64,6 +63,7 @@ using namespace std; // NOLINT
#define REG_TIMER_COUNTER 0x070
#define REG_SCALE_PARAMETER 0x080
#define REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR 0x090
#define REG_FLASH_CMD 0x200
#define REG_FLASH_DATA 0x208
......@@ -163,6 +163,7 @@ using namespace std; // NOLINT
#define REG_DWCONV_FILTER_BASE_ADDR 0xe08
#define REG_DWCONV_FILTER_SHAPE 0xe10
#define REG_DWCONV_FILTER_N_ALIGN 0xe18
#define REG_DWCONV_FILTER_SUBNUMBER 0xe20
#define REG_DWCONV_CMD 0xe00
int ComputeFpgaConv(const struct SplitConvArgs &args) {
......@@ -189,8 +190,8 @@ int ComputeFpgaConv(const struct SplitConvArgs &args) {
int ComputeBasicConv(const struct ConvArgs &args) {
#ifdef FPGA_PRINT_MODE
DLOG << "======Compute Basic Conv======";
DLOG << " relu_enabled:" << args.relu_enabled
<< " sb_address:" << args.sb_address
// DLOG << " relu_enabled:" << args.relu_enabled
DLOG << " sb_address:" << args.sb_address
<< " filter_address:" << args.filter_address
<< " filter_num:" << args.filter_num
<< " group_num:" << args.group_num;
......@@ -212,6 +213,25 @@ int ComputeBasicConv(const struct ConvArgs &args) {
#ifdef PADDLE_MOBILE_ZU5
int ret = 0;
uint64_t output_scale = 0;
uint64_t reg_ActivationArgs = 0;
// active function:{none,leakeyrelu,sigmoid,tanh}
ActivationArgs active_args;
// active_args.activation_type = LEAKYRELU;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
DLOG << " activation_type:" << active_args.activation_type
<< " leaky_relu_negative_slope:"
<< active_args.leaky_relu_negative_slope;
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_CONV]->status) {
ret = -EIO;
......@@ -219,6 +239,10 @@ int ComputeBasicConv(const struct ConvArgs &args) {
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret;
}
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(
((uint64_t)args.image.height) | (((uint64_t)args.image.width) << 32),
......@@ -278,6 +302,9 @@ int ComputeBasicConv(const struct ConvArgs &args) {
output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
active_args.activation_type = NONE;
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret;
......@@ -314,6 +341,23 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
uint64_t image_physical_address = 0;
uint64_t output_physical_address = 0;
uint64_t reg_ActivationArgs = 0;
// active function:{none,leakeyrelu,sigmoid,tanh}
ActivationArgs active_args;
// active_args.activation_type = LEAKYRELU;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
DLOG << " activation_type:" << active_args.activation_type
<< " leaky_relu_negative_slope:"
<< active_args.leaky_relu_negative_slope;
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
image_physical_address = vaddr_to_paddr_driver(args.image.address);
output_physical_address = vaddr_to_paddr_driver(args.output.address);
uint32_t output_height = (uint32_t)(
......@@ -364,6 +408,9 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
return ret;
}
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(image_physical_address, REG_POOLING_IMAGE_BASE_ADDR);
reg_writeq(output_physical_address, REG_POOLING_RESULT_BASE_ADDR);
......@@ -408,6 +455,10 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
active_args.activation_type = NONE;
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret;
......@@ -418,8 +469,8 @@ int ComputeFpgaPool(const struct PoolingArgs &args) {
int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
#ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeFpgaEWAdd===========";
DLOG << " relu_enabled:" << args.relu_enabled
<< " const0:" << fp16_2_fp32(int16_t(args.const0))
// DLOG << " relu_enabled:" << args.relu_enabled
DLOG << " const0:" << fp16_2_fp32(int16_t(args.const0))
<< " const1:" << fp16_2_fp32(int16_t(args.const1));
DLOG << " image0_address:" << args.image0.address
<< " image0_scale_address:" << args.image0.scale_address
......@@ -441,6 +492,19 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
#ifdef PADDLE_MOBILE_ZU5
int ret = 0;
uint64_t output_scale = 0;
uint64_t reg_ActivationArgs = 0;
ActivationArgs active_args;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
DLOG << " activation_type:" << active_args.activation_type
<< " leaky_relu_negative_slope:"
<< active_args.leaky_relu_negative_slope;
DLOG << " reg_ActivationArgs:" << reg_ActivationArgs;
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_EW]->status) {
ret = -EIO;
......@@ -449,6 +513,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
return ret;
}
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(args.driver.image0_address_phy, REG_EW_IMAGE0_BASE_ADDR);
reg_writeq(args.driver.image1_address_phy, REG_EW_IMAGE1_BASE_ADDR);
......@@ -468,6 +535,9 @@ int ComputeFpgaEWAdd(const struct EWAddArgs &args) {
output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
active_args.activation_type = NONE;
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret;
#endif
......@@ -501,6 +571,17 @@ int PerformBypass(const struct BypassArgs &args) {
uint8_t data_cell_in = 0;
uint8_t data_cell_out = 0;
int ret = 0;
uint64_t reg_ActivationArgs = 0;
ActivationArgs active_args;
active_args.activation_type = args.output.activation.activation_type;
active_args.leaky_relu_negative_slope =
args.output.activation.leaky_relu_negative_slope;
reg_ActivationArgs = (uint64_t(active_args.activation_type) << 32) |
active_args.leaky_relu_negative_slope;
datalen = (uint64_t)args.image.width * (uint64_t)args.image.height *
(uint64_t)args.image.channels;
datalen = align_to_x(datalen, 16);
......@@ -559,7 +640,6 @@ int PerformBypass(const struct BypassArgs &args) {
(data_cell_out != SIZE_FP16 && data_cell_out != SIZE_FP32)) {
return -EFAULT;
}
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
if (ERROR == g_fpgainfo.pe_data->pes[PE_IDX_BYPASS]->status) {
ret = -EIO;
......@@ -567,7 +647,8 @@ int PerformBypass(const struct BypassArgs &args) {
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret;
}
reg_writeq(reg_ActivationArgs,
REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR); // active functoion
reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(input_address_phy, REG_CONVERT_SRC_ADDR);
reg_writeq(output_address_phy, REG_CONVERT_DST_ADDR);
......@@ -585,12 +666,27 @@ int PerformBypass(const struct BypassArgs &args) {
output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
reg_writeq(reg_ActivationArgs, REG_ACTIVATION_MODE_AND_LEAKY_RELU_FACTOR);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret;
#endif
return 0;
} // PerformBypass
uint64_t FPGAVersion() {
#ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeFpgaBypass===========";
#endif
#ifdef PADDLE_MOBILE_ZU5
uint64_t fpga_ver = 0;
pthread_mutex_lock(&g_fpgainfo.pe_data->mutex);
fpga_ver = reg_readq(REG_HARDWARE_STATUS);
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return fpga_ver;
#endif
return 0;
} // FPGAVersion
int ComputeFPGAConcat(const struct ConcatArgs &args) {
#ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeFpgaConcat===========";
......@@ -655,6 +751,45 @@ void deconv_post_process(const struct DeconvArgs &args) {
fpga_flush(args.output.address,
num * align_deconv_row_len * deconv_h * sizeof(int16_t));
}
void DWDeconv_post_process(const struct DWDeconvArgs &args) {
int sub_conv_n = args.sub_conv_num;
int sub_height = args.sub_output_height;
int sub_width = args.sub_output_width;
int omit_size = args.omit_size;
int channel = args.filter_num;
int num = 1;
int origin_h = sub_height * sub_conv_n;
int origin_w = sub_width * sub_conv_n;
int align_origin_w = align_to_x(origin_w * channel, IMAGE_ALIGNMENT);
int deconv_h = origin_h - 2 * omit_size;
int deconv_w = origin_w - 2 * omit_size;
int deconv_row_len = deconv_w * channel;
int align_deconv_row_len = align_to_x(deconv_row_len, IMAGE_ALIGNMENT);
for (int idx = 0; idx < sub_conv_n; ++idx) {
paddle_mobile::fpga::fpga_invalidate(
args.dw_conv_args[idx]->output.address,
align_origin_w * origin_h * sizeof(int16_t));
}
int deconv_idx = 0;
for (int nn = 0; nn < num; ++nn) {
for (int hh = 0; hh < origin_h; ++hh) {
int hx = (hh % sub_conv_n);
auto sub_t = (int16_t *)(args.dw_conv_args[sub_conv_n - hx - 1] // NOLINT
->output.address);
int hi = (hh / sub_conv_n);
if ((hh < omit_size) || (hh >= (origin_h - omit_size))) continue;
int sidx = (nn * origin_h * align_origin_w + hi * align_origin_w +
omit_size * channel);
fpga_copy((int16_t *)(args.output.address) + deconv_idx, // NOLINT
sub_t + sidx, sizeof(int16_t) * deconv_row_len); // NOLINT
deconv_idx += align_deconv_row_len;
}
}
fpga_flush(args.output.address,
num * align_deconv_row_len * deconv_h * sizeof(int16_t));
}
int ComputeFpgaDeconv(const struct DeconvArgs &args) {
#ifdef FPGA_PRINT_MODE
......@@ -755,7 +890,7 @@ int ComputeFPGASplit(const struct SplitArgs &args) {
int ComputeDWConv(const struct DWconvArgs &args) {
#ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeDWConv===========";
DLOG << " mode:" << args.relu_enabled;
// DLOG << " mode:" << args.relu_enabled;
DLOG << " image_address:" << args.image.address
<< " image_scale_address:" << args.image.scale_address
<< " image_channels:" << args.image.channels
......@@ -778,7 +913,8 @@ int ComputeDWConv(const struct DWconvArgs &args) {
uint64_t output_scale = 0;
uint64_t timer_cnt = 0;
int ret = 0;
uint64_t cmd = args.relu_enabled;
// uint64_t cmd = args.relu_enabled;
uint64_t cmd = 0;
uint64_t image_physical_address = 0;
uint64_t output_physical_address = 0;
uint64_t filter_physical_address = 0;
......@@ -792,17 +928,21 @@ int ComputeDWConv(const struct DWconvArgs &args) {
align_to_x((uint64_t)args.image.channels, IMAGE_ALIGNMENT);
uint64_t filter_amount_per_row_align =
filter_N_align * (uint64_t)args.kernel.width;
uint64_t filter_amount_align = filter_N_align * (uint64_t)args.kernel.width *
(uint64_t)args.kernel.height;
uint64_t sub_filter_amount_align = filter_N_align *
(uint64_t)args.kernel.width *
(uint64_t)args.kernel.height;
uint64_t filter_amount_align =
sub_filter_amount_align * (uint64_t)args.sub_conv_num;
uint32_t output_height = (uint32_t)(
(args.image.height + args.image.pad_height * 2 - args.kernel.height) /
args.kernel.stride_h +
1);
uint32_t output_width = (uint32_t)(
(args.image.width + args.image.pad_width * 2 - args.kernel.width) /
args.kernel.stride_w +
1);
((args.image.width + args.image.pad_width * 2 - args.kernel.width) /
args.kernel.stride_w +
1) *
args.sub_conv_num);
uint64_t image_amount_per_row =
align_to_x((uint64_t)args.image.width * (uint64_t)args.image.channels,
......@@ -845,12 +985,15 @@ int ComputeDWConv(const struct DWconvArgs &args) {
/*restart scale*/
reg_writeq(output_scale, REG_SCALE_PARAMETER);
reg_writeq(image_physical_address, REG_POOLING_IMAGE_BASE_ADDR);
reg_writeq(output_physical_address, REG_POOLING_RESULT_BASE_ADDR);
reg_writeq((bias_physical_address << 32 | filter_physical_address),
REG_DWCONV_FILTER_BASE_ADDR);
reg_writeq(filter_amount_per_row_align | (filter_amount_align << 32),
REG_DWCONV_FILTER_SHAPE);
reg_writeq(sub_filter_amount_align | (((uint64_t)args.sub_conv_num) << 32),
REG_DWCONV_FILTER_SUBNUMBER);
reg_writeq(filter_N_align, REG_DWCONV_FILTER_N_ALIGN);
reg_writeq(
......@@ -904,10 +1047,88 @@ int ComputeDWConv(const struct DWconvArgs &args) {
output_scale = reg_readq(REG_SCALE_PARAMETER);
output_scale = (output_scale << 32) | (output_scale >> 32);
fpga_copy(args.output.scale_address, &output_scale, sizeof(float) * 2);
DLOG << "output_scale:" << output_scale;
pthread_mutex_unlock(&g_fpgainfo.pe_data->mutex);
return ret;
#endif
return 0;
}
int ComputeDWDeconv(const struct DWDeconvArgs &args) {
#ifdef FPGA_PRINT_MODE
DLOG << "=============ComputeFPGADeConv===========";
DLOG << " filter_num:" << args.filter_num
<< " group_num:" << args.group_num << "omit_size:" << args.omit_size
<< "sub_output_width: " << args.sub_output_width
<< "sub_output_height: " << args.sub_output_height
<< " sub_conv_num:" << args.sub_conv_num;
DLOG << "args.output.address: " << args.output.address
<< "args.output.scale_address: " << args.output.scale_address;
#endif
int sub_conv_num = args.sub_conv_num;
#ifdef COST_TIME_PRINT
timeval start, end;
long dif_sec, dif_usec; // NOLINT
#endif
for (int i = 0; i < sub_conv_num; i++) {
#ifdef COST_TIME_PRINT
gettimeofday(&start, NULL);
#endif
ComputeDWConv(*args.dw_conv_args[i]);
#ifdef COST_TIME_PRINT
gettimeofday(&end, NULL);
dif_sec = end.tv_sec - start.tv_sec;
dif_usec = end.tv_usec - start.tv_usec;
std::cout << "deconv basic_conv: " << i << " times: "
<< " cost time: " << (dif_sec * 1000000 + dif_usec) << "us"
<< std::endl;
#endif
}
if (sub_conv_num > 1) {
float max_scale = -1.0f;
#ifdef COST_TIME_PRINT
gettimeofday(&start, NULL);
#endif
for (int i = 0; i < sub_conv_num; i++) {
paddle_mobile::fpga::fpga_invalidate(
args.dw_conv_args[i]->output.scale_address, 2 * sizeof(float));
float ptr_scale = (args.dw_conv_args[i]->output.scale_address)[0];
if (ptr_scale > max_scale) {
args.output.scale_address[0] = ptr_scale;
args.output.scale_address[1] =
(args.dw_conv_args[i]->output.scale_address)[1];
}
}
#ifdef COST_TIME_PRINT
gettimeofday(&end, NULL);
dif_sec = end.tv_sec - start.tv_sec;
dif_usec = end.tv_usec - start.tv_usec;
std::cout << "deconv scale "
<< " cost time: " << (dif_sec * 1000000 + dif_usec) << "us"
<< std::endl;
#endif
}
#ifdef COST_TIME_PRINT
gettimeofday(&start, NULL);
#endif
DWDeconv_post_process(args);
#ifdef COST_TIME_PRINT
gettimeofday(&end, NULL);
dif_sec = end.tv_sec - start.tv_sec;
dif_usec = end.tv_usec - start.tv_usec;
std::cout << "deconv_post_process "
<< " cost time: " << (dif_sec * 1000000 + dif_usec) << "us"
<< std::endl;
#endif
return 0;
} // ComputeFpgaDeconv
} // namespace fpga
} // namespace paddle_mobile
......@@ -154,7 +154,6 @@ int memory_request(struct fpga_memory *memory, size_t size, uint64_t *addr) {
unsigned int nr = (unsigned int)_nr;
int ret = 0;
uint64_t a_size = FPGA_PAGE_SIZE * nr;
DLOG << a_size;
pthread_mutex_lock(&memory->mutex);
......@@ -391,9 +390,6 @@ int fpga_invalidate_driver(void *address, size_t size) {
void fpga_copy_driver(void *dest, const void *src, size_t num) {
uint64_t i;
DLOG << "dest:" << dest << " src:" << src << " size:" << num;
for (i = 0; i < num; i++) {
*((int8_t *)dest + i) = *((int8_t *)src + i); // NOLINT
}
......
......@@ -29,7 +29,7 @@ namespace driver {
#define DIV_ROUND_UP(n, d) (((n) + (d)-1) / (d))
#define FPGA_REG_PHY_ADDR 0xa0000000
#define FPGA_REG_PHY_ADDR 0x80000000
#define FPGA_REG_SIZE 0x1000
#define FPGA_MEM_PHY_ADDR 0x40000000
#define FPGA_MEM_SIZE 0x80000000
......
......@@ -76,7 +76,7 @@ int32_t convertmantissa(int32_t i) {
}
float fp16_2_fp32(int16_t fp16_num) {
int16_t se_fp16 = fp16_num >> 10;
int16_t se_fp16 = (fp16_num >> 10) & 0x3f;
int16_t m_fp16 = fp16_num & 0x3ff;
int32_t e_fp32 = 0;
int16_t offset = 0;
......@@ -94,7 +94,7 @@ float fp16_2_fp32(int16_t fp16_num) {
e_fp32 = 0x80000000;
offset = 0;
} else if (se_fp16 < 63) {
e_fp32 = 0x80000000 + (se_fp16 - 32) << 23;
e_fp32 = 0x80000000 + ((se_fp16 - 32) << 23);
offset = 1024;
} else { // se_fp16 == 63
e_fp32 = 0xC7800000;
......
......@@ -45,6 +45,7 @@ enum ActivationType {
LEAKYRELU = 1,
SIGMOID = 2,
TANH = 3,
SOFTMAX = 4,
};
struct ActivationArgs {
......@@ -132,7 +133,7 @@ struct DeconvTxParm {
#endif
struct ConvArgs {
bool relu_enabled;
// bool relu_enabled;
void* sb_address; // scale and bias
void* filter_address;
float* filter_scale_address;
......@@ -198,7 +199,7 @@ struct PoolingArgs {
};
struct EWAddArgs {
bool relu_enabled;
// bool relu_enabled;
uint32_t const0; // output0 = const0 x input0 + const1 x input1;
uint32_t const1;
struct ImageInputArgs image0;
......@@ -229,13 +230,27 @@ struct DeconvArgs {
std::vector<std::shared_ptr<SplitConvArgs>> split_conv_args;
};
struct DWconvArgs {
bool relu_enabled;
uint32_t sub_conv_num;
// bool relu_enabled;
void* bias_address;
void* filter_address;
struct KernelArgs kernel;
struct ImageInputArgs image;
struct ImageOutputArgs output;
};
struct DWDeconvArgs {
uint32_t sub_conv_num;
uint32_t group_num;
uint32_t filter_num;
uint32_t omit_size;
uint32_t sub_output_width;
uint32_t sub_output_height;
struct ImageOutputArgs output;
std::vector<std::shared_ptr<DWconvArgs>> dw_conv_args;
std::vector<std::shared_ptr<char>> vector_dw_conv_space;
};
// static inline int align_to_x(int num, int x) { return (num + x - 1) / x * x;
// }
static inline uint32_t align_to_x(int64_t num, int64_t x) {
......
......@@ -18,6 +18,7 @@ limitations under the License. */
namespace paddle_mobile {
namespace fpga {
uint64_t FPGAVersion();
int PerformBypass(const struct BypassArgs& args);
int ComputeBasicConv(const struct ConvArgs& args);
int ComputeFpgaPool(const struct PoolingArgs& args);
......@@ -28,5 +29,7 @@ int ComputeFPGAConcat(const struct ConcatArgs& args);
int ComputeFPGASplit(const struct SplitArgs& args);
int ComputeFpgaDeconv(const struct DeconvArgs& args);
int ComputeDWConv(const struct DWconvArgs& args);
int ComputeDWDeconv(const struct DWDeconvArgs& args);
} // namespace fpga
} // namespace paddle_mobile
......@@ -31,6 +31,10 @@ DEFINE_ACTIVATION_INFERSHAPE(Relu6);
#ifdef SIGMOID_OP
DEFINE_ACTIVATION_INFERSHAPE(Sigmoid);
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_FPGA
REGISTER_OPERATOR_FPGA(sigmoid, ops::SigmoidOp);
#endif
#endif // SIGMOID_OP
#ifdef TANH_OP
......
......@@ -22,7 +22,10 @@ namespace operators {
template <>
bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
bool relu_enabled = false;
// bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input());
auto bias = param->Bias();
......@@ -61,10 +64,10 @@ bool ConvAddBNKernel<FPGA, float>::Init(FusionConvAddBNParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, param->Groups(),
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
......
......@@ -23,7 +23,10 @@ namespace operators {
template <>
bool ConvAddBNReluKernel<FPGA, float>::Init(
FusionConvAddBNReluParam<FPGA> *param) {
bool relu_enabled = true;
// bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input());
auto bias = param->Bias();
auto bias_ptr = bias->data<float>();
......@@ -64,16 +67,16 @@ bool ConvAddBNReluKernel<FPGA, float>::Init(
if (groups == channel) {
fpga::format_dwconv_data(filter, out, new_scale_ptr, &new_bias_ptr);
fpga::DWconvArgs dwconv_arg = {0};
fpga::fill_dwconv_arg(&dwconv_arg, input, out, filter, relu_enabled,
strides[0], strides[1], paddings[0], paddings[1],
new_bias_ptr);
fpga::fill_dwconv_arg(&dwconv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, strides[0], strides[1],
paddings[0], paddings[1], new_bias_ptr);
param->SetFpgaArgs(dwconv_arg);
} else {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), strides[0], strides[1], paddings[0],
paddings[1], bs_ptr);
fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, param->Groups(), strides[0],
strides[1], paddings[0], paddings[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
}
return true;
......
......@@ -21,7 +21,10 @@ namespace operators {
template <>
bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
bool relu_enabled = false;
// bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
......@@ -40,10 +43,10 @@ bool ConvAddKernel<FPGA, float>::Init(FusionConvAddParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, param->Groups(),
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
......
......@@ -21,7 +21,10 @@ namespace operators {
template <>
bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
bool relu_enabled = true;
// bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
......@@ -40,10 +43,10 @@ bool ConvAddReluKernel<FPGA, float>::Init(FusionConvAddReluParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, param->Groups(),
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
......
......@@ -22,7 +22,10 @@ namespace operators {
template <>
bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
bool relu_enabled = false;
// bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input());
auto filter = const_cast<Tensor *>(param->Filter());
auto out = param->Output();
......@@ -53,10 +56,10 @@ bool ConvBNKernel<FPGA, float>::Init(FusionConvBNParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, param->Groups(),
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
......
......@@ -22,7 +22,10 @@ namespace operators {
template <>
bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
bool relu_enabled = true;
// bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input());
auto filter = const_cast<Tensor *>(param->Filter());
auto out = param->Output();
......@@ -53,10 +56,10 @@ bool ConvBNReluKernel<FPGA, float>::Init(FusionConvBNReluParam<FPGA> *param) {
fpga::format_conv_data(filter, out, &bs_ptr, param->Groups());
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
fpga::fill_split_arg(&conv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, param->Groups(),
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
......
......@@ -23,7 +23,10 @@ namespace operators {
template <>
bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
bool relu_enabled = false;
// bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
......@@ -49,13 +52,24 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
"filter width should be equal to filter height ");
PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
"filter axis should be the multiple of stride axis ");
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(deconv_arg);
if (param->Groups() == channel) {
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0};
fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter,
activation_enable, leaky_relu_negative_slope,
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg);
} else {
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, param->Groups(),
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(deconv_arg);
}
return true;
}
......@@ -63,7 +77,11 @@ bool DeconvAddKernel<FPGA, float>::Init(FusionDeconvAddParam<FPGA> *param) {
template <>
void DeconvAddKernel<FPGA, float>::Compute(
const FusionDeconvAddParam<FPGA> &param) {
fpga::ComputeFpgaDeconv(param.FpgaArgs());
if (param.Groups() == param.Output()->dims()[1]) {
fpga::ComputeDWDeconv(param.FpgaDWDconvArgs());
} else {
fpga::ComputeFpgaDeconv(param.FpgaArgs());
}
}
} // namespace operators
......
......@@ -24,7 +24,10 @@ namespace operators {
template <>
bool DeconvAddReluKernel<FPGA, float>::Init(
FusionDeconvAddReluParam<FPGA> *param) {
bool relu_enabled = true;
// bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->Input());
const Tensor *bias = param->Bias();
auto bias_ptr = bias->data<float>();
......@@ -50,20 +53,36 @@ bool DeconvAddReluKernel<FPGA, float>::Init(
"filter width should be equal to filter height ");
PADDLE_MOBILE_ENFORCE(((filter->dims()[2] % param->Strides()[0]) == 0),
"filter axis should be the multiple of stride axis ");
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, relu_enabled,
param->Groups(), param->Strides()[0],
param->Strides()[1], param->Paddings()[0],
param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(deconv_arg);
if (param->Groups() == channel) {
fpga::format_DWDeconv_data(filter, out, &bs_ptr, param->Groups(),
sub_conv_n);
fpga::DWDeconvArgs DWDeconv_arg = {0};
fpga::fill_DWDeconv_arg(&DWDeconv_arg, input, out, filter,
activation_enable, leaky_relu_negative_slope,
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(DWDeconv_arg);
} else {
fpga::format_deconv_data(filter, out, &bs_ptr, param->Groups(), sub_conv_n);
fpga::DeconvArgs deconv_arg = {0};
fpga::fill_deconv_arg(&deconv_arg, input, out, filter, activation_enable,
leaky_relu_negative_slope, param->Groups(),
param->Strides()[0], param->Strides()[1],
param->Paddings()[0], param->Paddings()[1], bs_ptr);
param->SetFpgaArgs(deconv_arg);
}
return true;
}
template <>
void DeconvAddReluKernel<FPGA, float>::Compute(
const FusionDeconvAddReluParam<FPGA> &param) {
fpga::ComputeFpgaDeconv(param.FpgaArgs());
// fpga::ComputeFpgaDeconv(param.FpgaArgs());
if (param.Groups() == param.Output()->dims()[1]) {
fpga::ComputeDWDeconv(param.FpgaDWDconvArgs());
} else {
fpga::ComputeFpgaDeconv(param.FpgaArgs());
}
}
} // namespace operators
......
......@@ -20,7 +20,10 @@ namespace operators {
template <>
bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
bool relu_enabled = false;
// bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto *input_x = const_cast<LoDTensor *>(param->InputX());
auto *input_y = const_cast<LoDTensor *>(param->InputY());
auto *out = param->Out();
......@@ -30,7 +33,10 @@ bool ElementwiseAddKernel<FPGA, float>::Init(ElementwiseAddParam<FPGA> *param) {
auto out_ptr = out->mutable_data<float>();
fpga::EWAddArgs ewaddArgs = {0};
ewaddArgs.relu_enabled = relu_enabled;
// ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs.output.activation.activation_type = activation_enable;
ewaddArgs.output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
ewaddArgs.const0 = 0x3c00; // =1
ewaddArgs.const1 = 0x3c00; // =1
ewaddArgs.image0.address = input_x_ptr;
......
......@@ -21,7 +21,10 @@ namespace operators {
template <>
bool ElementwiseAddReluKernel<FPGA, float>::Init(
ElementwiseAddReluParam<FPGA> *param) {
bool relu_enabled = true;
// bool relu_enabled = true;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::LEAKYRELU;
int16_t leaky_relu_negative_slope = 0;
auto *input_x = const_cast<LoDTensor *>(param->InputX());
auto *input_y = const_cast<LoDTensor *>(param->InputY());
auto *out = param->Out();
......@@ -31,7 +34,10 @@ bool ElementwiseAddReluKernel<FPGA, float>::Init(
auto out_ptr = out->mutable_data<float>();
fpga::EWAddArgs ewaddArgs = {0};
ewaddArgs.relu_enabled = relu_enabled;
// ewaddArgs.relu_enabled = relu_enabled;
ewaddArgs.output.activation.activation_type = activation_enable;
ewaddArgs.output.activation.leaky_relu_negative_slope =
leaky_relu_negative_slope;
ewaddArgs.const0 = 0x3c00; // =1
ewaddArgs.const1 = 0x3c00; // =1
ewaddArgs.image0.address = input_x_ptr;
......
......@@ -19,12 +19,34 @@ namespace operators {
template <>
bool FetchKernel<FPGA, float>::Init(FetchParam<FPGA> *param) {
Tensor *output = param->Out();
// fpga::format_fp16_ofm(output);
return true;
}
template <>
void FetchKernel<FPGA, float>::Compute(const FetchParam<FPGA> &param) {
param.Out()->ShareDataWith(*(param.InputX()));
/*auto input =
reinterpret_cast<Tensor *>(const_cast<Tensor *>(param.InputX()));
fpga::format_image(input);
auto input_ptr = input->data<float>();
Tensor *output = param.Out();
auto output_ptr = output->data<float>();
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP32;
args.input_layout_type = fpga::LAYOUT_CHW;
args.output_layout_type = fpga::LAYOUT_HWC;
args.image.address = reinterpret_cast<void *>(input_ptr);
args.image.channels = (uint32_t)input->dims()[1];
args.image.height = (input->dims().size() == 4) ? (uint32_t)input->dims()[2] :
1; args.image.width = (input->dims().size() == 4) ? (uint32_t)input->dims()[3]
: 1; args.image.pad_height = 0; args.image.pad_width = 0; args.output.address
= output_ptr; args.output.scale_address = output->scale;
fpga::PerformBypass(args);*/
}
template class FetchKernel<FPGA, float>;
......
......@@ -20,7 +20,10 @@ namespace operators {
template <>
bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
bool relu_enabled = false;
// bool relu_enabled = false;
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::NONE;
int16_t leaky_relu_negative_slope = 0;
auto input_x = const_cast<LoDTensor *>(param->InputX());
auto filter = const_cast<Tensor *>(param->InputY());
const Tensor *input_z = param->InputZ();
......@@ -55,8 +58,8 @@ bool FusionFcKernel<FPGA, float>::Init(FusionFcParam<FPGA> *param) {
fpga::format_fp16_ofm(out);
fpga::SplitConvArgs conv_arg = {0};
fpga::fill_split_arg(&conv_arg, input_x, out, filter, relu_enabled, 1, 1, 1,
0, 0, bs_ptr);
fpga::fill_split_arg(&conv_arg, input_x, out, filter, activation_enable,
leaky_relu_negative_slope, 1, 1, 1, 0, 0, bs_ptr);
param->SetFpgaArgs(conv_arg);
return true;
}
......
......@@ -22,6 +22,12 @@ namespace operators {
template <>
bool ReshapeKernel<FPGA, float>::Init(ReshapeParam<FPGA> *param) {
param->Out()->ShareDataWith(*param->InputX());
const int in_n = param->InputX()->dims()[0];
const int in_c = param->InputX()->dims()[1];
const int in_h = param->InputX()->dims()[2];
const int in_w = param->InputX()->dims()[3];
auto out = param->Out();
out->Resize(framework::make_ddim({in_n, in_c * in_h * in_w}));
return true;
}
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef SIGMOID_OP
#include "operators/kernel/activation_kernel.h"
namespace paddle_mobile {
namespace operators {
template <>
bool SigmoidKernel<FPGA, float>::Init(SigmoidParam<FPGA> *param) {
paddle_mobile::fpga::ActivationType activation_enable =
paddle_mobile::fpga::SIGMOID;
int16_t leaky_relu_negative_slope = 0;
auto input = const_cast<Tensor *>(param->InputX());
auto input_ptr = input->data<float>();
auto out = param->Out();
fpga::format_fp16_ofm(out);
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_data_type = fpga::DATA_TYPE_FP16;
args.output_data_type = fpga::DATA_TYPE_FP16;
args.image.address = input_ptr;
args.image.height =
(input->dims().size() == 4) ? (uint32_t)input->dims()[2] : 1;
args.image.width =
(input->dims().size() == 4) ? (uint32_t)input->dims()[3] : 1;
args.image.channels = (uint32_t)input->dims()[1];
args.output.address = out->data<float>();
args.output.scale_address = out->scale;
args.output.activation.activation_type = activation_enable;
args.output.activation.leaky_relu_negative_slope = leaky_relu_negative_slope;
param->SetFpgaArgs(args);
return true;
}
template <>
void SigmoidKernel<FPGA, float>::Compute(const SigmoidParam<FPGA> &param) {
fpga::PerformBypass(param.FpgaArgs());
}
} // namespace operators
} // namespace paddle_mobile
#endif
......@@ -26,7 +26,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
auto input_ptr = input->data<float>();
auto out = param->Out();
fpga::format_fp32_ofm(out);
auto float_input = new Tensor;
if (input->dims().size() == 2) {
float_input->mutable_data<float>({1, input->dims()[1]});
......@@ -36,7 +35,6 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
} else {
DLOG << "wrong dimension of softmax input";
}
fpga::format_fp32_ofm(float_input);
fpga::BypassArgs args = {fpga::DATA_TYPE_FP16};
args.input_layout_type = fpga::LAYOUT_HWC;
......@@ -53,6 +51,7 @@ bool SoftmaxKernel<FPGA, float>::Init(SoftmaxParam<FPGA> *param) {
args.output.scale_address = float_input->scale;
param->SetFloatInput(float_input);
param->SetFpgaArgs(args);
return true;
}
......
......@@ -1078,6 +1078,15 @@ class SigmoidParam : public OpParam {
private:
RType *input_x_;
RType *out_;
#ifdef PADDLE_MOBILE_FPGA
private:
fpga::BypassArgs fpga_bypass_args;
public:
const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; }
void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; }
#endif
};
#endif
......@@ -1200,6 +1209,20 @@ class FetchParam : public OpParam {
private:
RType *input_x_;
Tensor *out_;
#ifdef PADDLE_MOBILE_FPGA
private:
std::shared_ptr<RType> float_input_x_;
fpga::BypassArgs fpga_bypass_args;
public:
RType *FloatInput() const {
return float_input_x_ == nullptr ? input_x_ : float_input_x_.get();
}
void SetFloatInput(Tensor *input) { float_input_x_.reset(input); }
const fpga::BypassArgs &FpgaArgs() const { return fpga_bypass_args; }
void SetFpgaArgs(const fpga::BypassArgs &args) { fpga_bypass_args = args; }
#endif
};
#ifdef FILL_CONSTANT_OP
......@@ -2357,10 +2380,17 @@ class ConvTransposeParam : public OpParam {
private:
fpga::DeconvArgs fpga_conv_args;
fpga::DWDeconvArgs fpga_DWDeconv_args;
public:
const fpga::DeconvArgs &FpgaArgs() const { return fpga_conv_args; }
const fpga::DWDeconvArgs &FpgaDWDconvArgs() const {
return fpga_DWDeconv_args;
}
void SetFpgaArgs(const fpga::DeconvArgs &args) { fpga_conv_args = args; }
void SetFpgaArgs(const fpga::DWDeconvArgs &args) {
fpga_DWDeconv_args = args;
}
#endif
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册