提交 65df2628 编写于 作者: L liuruilong

merge super resolution

......@@ -96,3 +96,7 @@ metal/paddle-mobile/paddle-mobile/CPU/libpaddle-mobile.a
metal/paddle-mobile-demo/paddle-mobile-demo/images
metal/paddle-mobile-demo/paddle-mobile-demo/models
metal/paddle-mobile-demo/paddle-mobile-demo/Resources
metal/paddle-mobile-demo/paddle-mobile-demo/Resources/images
metal/paddle-mobile-demo/paddle-mobile-demo/Resources/models
metal/MobileNetDemo/MobileNetDemo/Resources
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 50;
objects = {
/* Begin PBXBuildFile section */
FA37E99B9AD29A07FEE8E743 /* Pods_MobileNetDemo.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = DD3A2E3175627EF63DACA36C /* Pods_MobileNetDemo.framework */; };
FC74BB3621DFAFEC0055232B /* MobileNet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC74BB3521DFAFEC0055232B /* MobileNet.swift */; };
FCB40DA221E0B7C60075EC91 /* MobilenetPreProcess.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCB40DA121E0B7C60075EC91 /* MobilenetPreProcess.metal */; };
FCB40DA421E0B85B0075EC91 /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCB40DA321E0B85B0075EC91 /* MetalHelper.swift */; };
FCB40DE921E0B9410075EC91 /* banana.jpeg in Resources */ = {isa = PBXBuildFile; fileRef = FCB40DD221E0B9410075EC91 /* banana.jpeg */; };
FCB40E5121E0CEBB0075EC91 /* mobilenet_model in Resources */ = {isa = PBXBuildFile; fileRef = FCB40E4F21E0CEBB0075EC91 /* mobilenet_model */; };
FCB40E5221E0CEBB0075EC91 /* mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FCB40E5021E0CEBB0075EC91 /* mobilenet_params */; };
FCB40E5421E0CEF80075EC91 /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FCB40E5321E0CEF80075EC91 /* synset.txt */; };
FCD3873821E1C31F0052F3D0 /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCD3873721E1C31F0052F3D0 /* paddle_mobile.framework */; };
FCD3873921E1C31F0052F3D0 /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCD3873721E1C31F0052F3D0 /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
FCF2870921DFAEC7009A87DA /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF2870821DFAEC7009A87DA /* AppDelegate.swift */; };
FCF2870B21DFAEC7009A87DA /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF2870A21DFAEC7009A87DA /* ViewController.swift */; };
FCF2870E21DFAEC7009A87DA /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FCF2870C21DFAEC7009A87DA /* Main.storyboard */; };
FCF2871021DFAEC8009A87DA /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = FCF2870F21DFAEC8009A87DA /* Assets.xcassets */; };
FCF2871321DFAEC8009A87DA /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FCF2871121DFAEC8009A87DA /* LaunchScreen.storyboard */; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
FCB40DFC21E0BC360075EC91 /* Embed Frameworks */ = {
isa = PBXCopyFilesBuildPhase;
buildActionMask = 2147483647;
dstPath = "";
dstSubfolderSpec = 10;
files = (
FCD3873921E1C31F0052F3D0 /* paddle_mobile.framework in Embed Frameworks */,
);
name = "Embed Frameworks";
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
4FE67FF667A24FCB0134F627 /* Pods-MobileNetDemo.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-MobileNetDemo.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-MobileNetDemo/Pods-MobileNetDemo.debug.xcconfig"; sourceTree = "<group>"; };
DD3A2E3175627EF63DACA36C /* Pods_MobileNetDemo.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_MobileNetDemo.framework; sourceTree = BUILT_PRODUCTS_DIR; };
E57059FE3629E3A8DE6C7ECF /* Pods-MobileNetDemo.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-MobileNetDemo.release.xcconfig"; path = "../Pods/Target Support Files/Pods-MobileNetDemo/Pods-MobileNetDemo.release.xcconfig"; sourceTree = "<group>"; };
FC74BB3521DFAFEC0055232B /* MobileNet.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MobileNet.swift; sourceTree = "<group>"; };
FCB40DA121E0B7C60075EC91 /* MobilenetPreProcess.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = MobilenetPreProcess.metal; sourceTree = "<group>"; };
FCB40DA321E0B85B0075EC91 /* MetalHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = MetalHelper.swift; path = "../../paddle-mobile-demo/paddle-mobile-demo/MetalHelper.swift"; sourceTree = "<group>"; };
FCB40DD221E0B9410075EC91 /* banana.jpeg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = banana.jpeg; sourceTree = "<group>"; };
FCB40E4F21E0CEBB0075EC91 /* mobilenet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_model; sourceTree = "<group>"; };
FCB40E5021E0CEBB0075EC91 /* mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_params; sourceTree = "<group>"; };
FCB40E5321E0CEF80075EC91 /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = "<group>"; };
FCD3873721E1C31F0052F3D0 /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
FCF2870521DFAEC7009A87DA /* MobileNetDemo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = MobileNetDemo.app; sourceTree = BUILT_PRODUCTS_DIR; };
FCF2870821DFAEC7009A87DA /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
FCF2870A21DFAEC7009A87DA /* ViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = "<group>"; };
FCF2870D21DFAEC7009A87DA /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
FCF2870F21DFAEC8009A87DA /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
FCF2871221DFAEC8009A87DA /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
FCF2871421DFAEC8009A87DA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
FCF2870221DFAEC7009A87DA /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
FCD3873821E1C31F0052F3D0 /* paddle_mobile.framework in Frameworks */,
FA37E99B9AD29A07FEE8E743 /* Pods_MobileNetDemo.framework in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
0DDBA47E92A64BC7B0385B0F /* Frameworks */ = {
isa = PBXGroup;
children = (
DD3A2E3175627EF63DACA36C /* Pods_MobileNetDemo.framework */,
);
name = Frameworks;
sourceTree = "<group>";
};
1EACBAAF38D9EDE0AC2B3F90 /* Pods */ = {
isa = PBXGroup;
children = (
4FE67FF667A24FCB0134F627 /* Pods-MobileNetDemo.debug.xcconfig */,
E57059FE3629E3A8DE6C7ECF /* Pods-MobileNetDemo.release.xcconfig */,
);
name = Pods;
sourceTree = "<group>";
};
FCB40DCF21E0B9410075EC91 /* Resources */ = {
isa = PBXGroup;
children = (
FCB40DD021E0B9410075EC91 /* images */,
FCB40DD921E0B9410075EC91 /* models */,
);
path = Resources;
sourceTree = "<group>";
};
FCB40DD021E0B9410075EC91 /* images */ = {
isa = PBXGroup;
children = (
FCB40DD221E0B9410075EC91 /* banana.jpeg */,
);
path = images;
sourceTree = "<group>";
};
FCB40DD921E0B9410075EC91 /* models */ = {
isa = PBXGroup;
children = (
FCB40E4E21E0CEBB0075EC91 /* mobilenet_combine */,
);
path = models;
sourceTree = "<group>";
};
FCB40E4E21E0CEBB0075EC91 /* mobilenet_combine */ = {
isa = PBXGroup;
children = (
FCB40E5321E0CEF80075EC91 /* synset.txt */,
FCB40E4F21E0CEBB0075EC91 /* mobilenet_model */,
FCB40E5021E0CEBB0075EC91 /* mobilenet_params */,
);
path = mobilenet_combine;
sourceTree = "<group>";
};
FCF286FC21DFAEC7009A87DA = {
isa = PBXGroup;
children = (
FCD3873721E1C31F0052F3D0 /* paddle_mobile.framework */,
FCF2870721DFAEC7009A87DA /* MobileNetDemo */,
FCF2870621DFAEC7009A87DA /* Products */,
1EACBAAF38D9EDE0AC2B3F90 /* Pods */,
0DDBA47E92A64BC7B0385B0F /* Frameworks */,
);
sourceTree = "<group>";
};
FCF2870621DFAEC7009A87DA /* Products */ = {
isa = PBXGroup;
children = (
FCF2870521DFAEC7009A87DA /* MobileNetDemo.app */,
);
name = Products;
sourceTree = "<group>";
};
FCF2870721DFAEC7009A87DA /* MobileNetDemo */ = {
isa = PBXGroup;
children = (
FCB40DCF21E0B9410075EC91 /* Resources */,
FCB40DA321E0B85B0075EC91 /* MetalHelper.swift */,
FC74BB3521DFAFEC0055232B /* MobileNet.swift */,
FCF2870821DFAEC7009A87DA /* AppDelegate.swift */,
FCF2870A21DFAEC7009A87DA /* ViewController.swift */,
FCF2870C21DFAEC7009A87DA /* Main.storyboard */,
FCF2870F21DFAEC8009A87DA /* Assets.xcassets */,
FCF2871121DFAEC8009A87DA /* LaunchScreen.storyboard */,
FCF2871421DFAEC8009A87DA /* Info.plist */,
FCB40DA121E0B7C60075EC91 /* MobilenetPreProcess.metal */,
);
path = MobileNetDemo;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXNativeTarget section */
FCF2870421DFAEC7009A87DA /* MobileNetDemo */ = {
isa = PBXNativeTarget;
buildConfigurationList = FCF2871721DFAEC8009A87DA /* Build configuration list for PBXNativeTarget "MobileNetDemo" */;
buildPhases = (
B4EB56AEEFF6F3965DA3D2DA /* [CP] Check Pods Manifest.lock */,
FCF2870121DFAEC7009A87DA /* Sources */,
FCF2870221DFAEC7009A87DA /* Frameworks */,
FCF2870321DFAEC7009A87DA /* Resources */,
1D801B9681ACFCA70D444D2C /* [CP] Embed Pods Frameworks */,
FCB40DFC21E0BC360075EC91 /* Embed Frameworks */,
);
buildRules = (
);
dependencies = (
);
name = MobileNetDemo;
productName = MobileNetDemo;
productReference = FCF2870521DFAEC7009A87DA /* MobileNetDemo.app */;
productType = "com.apple.product-type.application";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
FCF286FD21DFAEC7009A87DA /* Project object */ = {
isa = PBXProject;
attributes = {
LastSwiftUpdateCheck = 1010;
LastUpgradeCheck = 1010;
ORGANIZATIONNAME = Ray;
TargetAttributes = {
FCF2870421DFAEC7009A87DA = {
CreatedOnToolsVersion = 10.1;
};
};
};
buildConfigurationList = FCF2870021DFAEC7009A87DA /* Build configuration list for PBXProject "MobileNetDemo" */;
compatibilityVersion = "Xcode 9.3";
developmentRegion = en;
hasScannedForEncodings = 0;
knownRegions = (
en,
Base,
);
mainGroup = FCF286FC21DFAEC7009A87DA;
productRefGroup = FCF2870621DFAEC7009A87DA /* Products */;
projectDirPath = "";
projectRoot = "";
targets = (
FCF2870421DFAEC7009A87DA /* MobileNetDemo */,
);
};
/* End PBXProject section */
/* Begin PBXResourcesBuildPhase section */
FCF2870321DFAEC7009A87DA /* Resources */ = {
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
FCF2871321DFAEC8009A87DA /* LaunchScreen.storyboard in Resources */,
FCB40E5121E0CEBB0075EC91 /* mobilenet_model in Resources */,
FCB40DE921E0B9410075EC91 /* banana.jpeg in Resources */,
FCF2871021DFAEC8009A87DA /* Assets.xcassets in Resources */,
FCB40E5421E0CEF80075EC91 /* synset.txt in Resources */,
FCB40E5221E0CEBB0075EC91 /* mobilenet_params in Resources */,
FCF2870E21DFAEC7009A87DA /* Main.storyboard in Resources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXResourcesBuildPhase section */
/* Begin PBXShellScriptBuildPhase section */
1D801B9681ACFCA70D444D2C /* [CP] Embed Pods Frameworks */ = {
isa = PBXShellScriptBuildPhase;
buildActionMask = 2147483647;
files = (
);
inputFileListPaths = (
);
inputPaths = (
"${SRCROOT}/../Pods/Target Support Files/Pods-MobileNetDemo/Pods-MobileNetDemo-frameworks.sh",
"${BUILT_PRODUCTS_DIR}/SwiftProtobuf/SwiftProtobuf.framework",
);
name = "[CP] Embed Pods Frameworks";
outputFileListPaths = (
);
outputPaths = (
"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/SwiftProtobuf.framework",
);
runOnlyForDeploymentPostprocessing = 0;
shellPath = /bin/sh;
shellScript = "\"${SRCROOT}/../Pods/Target Support Files/Pods-MobileNetDemo/Pods-MobileNetDemo-frameworks.sh\"\n";
showEnvVarsInLog = 0;
};
B4EB56AEEFF6F3965DA3D2DA /* [CP] Check Pods Manifest.lock */ = {
isa = PBXShellScriptBuildPhase;
buildActionMask = 2147483647;
files = (
);
inputFileListPaths = (
);
inputPaths = (
"${PODS_PODFILE_DIR_PATH}/Podfile.lock",
"${PODS_ROOT}/Manifest.lock",
);
name = "[CP] Check Pods Manifest.lock";
outputFileListPaths = (
);
outputPaths = (
"$(DERIVED_FILE_DIR)/Pods-MobileNetDemo-checkManifestLockResult.txt",
);
runOnlyForDeploymentPostprocessing = 0;
shellPath = /bin/sh;
shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n # print error to STDERR\n echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n";
showEnvVarsInLog = 0;
};
/* End PBXShellScriptBuildPhase section */
/* Begin PBXSourcesBuildPhase section */
FCF2870121DFAEC7009A87DA /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
FC74BB3621DFAFEC0055232B /* MobileNet.swift in Sources */,
FCB40DA421E0B85B0075EC91 /* MetalHelper.swift in Sources */,
FCB40DA221E0B7C60075EC91 /* MobilenetPreProcess.metal in Sources */,
FCF2870B21DFAEC7009A87DA /* ViewController.swift in Sources */,
FCF2870921DFAEC7009A87DA /* AppDelegate.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin PBXVariantGroup section */
FCF2870C21DFAEC7009A87DA /* Main.storyboard */ = {
isa = PBXVariantGroup;
children = (
FCF2870D21DFAEC7009A87DA /* Base */,
);
name = Main.storyboard;
sourceTree = "<group>";
};
FCF2871121DFAEC8009A87DA /* LaunchScreen.storyboard */ = {
isa = PBXVariantGroup;
children = (
FCF2871221DFAEC8009A87DA /* Base */,
);
name = LaunchScreen.storyboard;
sourceTree = "<group>";
};
/* End PBXVariantGroup section */
/* Begin XCBuildConfiguration section */
FCF2871521DFAEC8009A87DA /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
CODE_SIGN_IDENTITY = "iPhone Developer";
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_DYNAMIC_NO_PIC = NO;
GCC_NO_COMMON_BLOCKS = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"$(inherited)",
);
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
IPHONEOS_DEPLOYMENT_TARGET = 12.1;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
ONLY_ACTIVE_ARCH = YES;
SDKROOT = iphoneos;
SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
};
name = Debug;
};
FCF2871621DFAEC8009A87DA /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
CODE_SIGN_IDENTITY = "iPhone Developer";
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
ENABLE_NS_ASSERTIONS = NO;
ENABLE_STRICT_OBJC_MSGSEND = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_NO_COMMON_BLOCKS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
IPHONEOS_DEPLOYMENT_TARGET = 12.1;
MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES;
SDKROOT = iphoneos;
SWIFT_COMPILATION_MODE = wholemodule;
SWIFT_OPTIMIZATION_LEVEL = "-O";
VALIDATE_PRODUCT = YES;
};
name = Release;
};
FCF2871821DFAEC8009A87DA /* Debug */ = {
isa = XCBuildConfiguration;
baseConfigurationReference = 4FE67FF667A24FCB0134F627 /* Pods-MobileNetDemo.debug.xcconfig */;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
CODE_SIGN_STYLE = Automatic;
DEVELOPMENT_TEAM = A798K58VVL;
INFOPLIST_FILE = MobileNetDemo/Info.plist;
IPHONEOS_DEPLOYMENT_TARGET = 9.0;
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = Ray.MobileNetDemo;
PRODUCT_NAME = "$(TARGET_NAME)";
SWIFT_VERSION = 4.0;
TARGETED_DEVICE_FAMILY = "1,2";
};
name = Debug;
};
FCF2871921DFAEC8009A87DA /* Release */ = {
isa = XCBuildConfiguration;
baseConfigurationReference = E57059FE3629E3A8DE6C7ECF /* Pods-MobileNetDemo.release.xcconfig */;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
CODE_SIGN_STYLE = Automatic;
DEVELOPMENT_TEAM = A798K58VVL;
INFOPLIST_FILE = MobileNetDemo/Info.plist;
IPHONEOS_DEPLOYMENT_TARGET = 9.0;
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = Ray.MobileNetDemo;
PRODUCT_NAME = "$(TARGET_NAME)";
SWIFT_VERSION = 4.0;
TARGETED_DEVICE_FAMILY = "1,2";
};
name = Release;
};
/* End XCBuildConfiguration section */
/* Begin XCConfigurationList section */
FCF2870021DFAEC7009A87DA /* Build configuration list for PBXProject "MobileNetDemo" */ = {
isa = XCConfigurationList;
buildConfigurations = (
FCF2871521DFAEC8009A87DA /* Debug */,
FCF2871621DFAEC8009A87DA /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
FCF2871721DFAEC8009A87DA /* Build configuration list for PBXNativeTarget "MobileNetDemo" */ = {
isa = XCConfigurationList;
buildConfigurations = (
FCF2871821DFAEC8009A87DA /* Debug */,
FCF2871921DFAEC8009A87DA /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = FCF286FD21DFAEC7009A87DA /* Project object */;
}
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
version = "1.0">
<FileRef
location = "self:MobileNetDemo.xcodeproj">
</FileRef>
</Workspace>
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>IDEDidComputeMac32BitWarning</key>
<true/>
</dict>
</plist>
//
// AppDelegate.swift
// MobileNetDemo
//
// Created by liuRuiLong on 2019/1/4.
// Copyright © 2019 Ray. All rights reserved.
//
import UIKit
@UIApplicationMain
class AppDelegate: UIResponder, UIApplicationDelegate {
var window: UIWindow?
func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool {
// Override point for customization after application launch.
return true
}
func applicationWillResignActive(_ application: UIApplication) {
// Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state.
// Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game.
}
func applicationDidEnterBackground(_ application: UIApplication) {
// Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later.
// If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits.
}
func applicationWillEnterForeground(_ application: UIApplication) {
// Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background.
}
func applicationDidBecomeActive(_ application: UIApplication) {
// Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface.
}
func applicationWillTerminate(_ application: UIApplication) {
// Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:.
}
}
{
"images" : [
{
"idiom" : "iphone",
"size" : "20x20",
"scale" : "2x"
},
{
"idiom" : "iphone",
"size" : "20x20",
"scale" : "3x"
},
{
"idiom" : "iphone",
"size" : "29x29",
"scale" : "2x"
},
{
"idiom" : "iphone",
"size" : "29x29",
"scale" : "3x"
},
{
"idiom" : "iphone",
"size" : "40x40",
"scale" : "2x"
},
{
"idiom" : "iphone",
"size" : "40x40",
"scale" : "3x"
},
{
"idiom" : "iphone",
"size" : "60x60",
"scale" : "2x"
},
{
"idiom" : "iphone",
"size" : "60x60",
"scale" : "3x"
},
{
"idiom" : "ipad",
"size" : "20x20",
"scale" : "1x"
},
{
"idiom" : "ipad",
"size" : "20x20",
"scale" : "2x"
},
{
"idiom" : "ipad",
"size" : "29x29",
"scale" : "1x"
},
{
"idiom" : "ipad",
"size" : "29x29",
"scale" : "2x"
},
{
"idiom" : "ipad",
"size" : "40x40",
"scale" : "1x"
},
{
"idiom" : "ipad",
"size" : "40x40",
"scale" : "2x"
},
{
"idiom" : "ipad",
"size" : "76x76",
"scale" : "1x"
},
{
"idiom" : "ipad",
"size" : "76x76",
"scale" : "2x"
},
{
"idiom" : "ipad",
"size" : "83.5x83.5",
"scale" : "2x"
},
{
"idiom" : "ios-marketing",
"size" : "1024x1024",
"scale" : "1x"
}
],
"info" : {
"version" : 1,
"author" : "xcode"
}
}
\ No newline at end of file
{
"info" : {
"version" : 1,
"author" : "xcode"
}
}
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13122.16" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
<dependencies>
<plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13104.12"/>
<capability name="Safe area layout guides" minToolsVersion="9.0"/>
<capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
</dependencies>
<scenes>
<!--View Controller-->
<scene sceneID="EHf-IW-A2E">
<objects>
<viewController id="01J-lp-oVM" sceneMemberID="viewController">
<view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
<rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
<color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
<viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
</view>
</viewController>
<placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
</objects>
<point key="canvasLocation" x="53" y="375"/>
</scene>
</scenes>
</document>
<?xml version="1.0" encoding="UTF-8"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14460.31" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="hKf-0C-qAk">
<device id="retina4_7" orientation="portrait">
<adaptation id="fullscreen"/>
</device>
<dependencies>
<deployment identifier="iOS"/>
<plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="14460.20"/>
<capability name="Safe area layout guides" minToolsVersion="9.0"/>
<capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
</dependencies>
<scenes>
<!--View Controller-->
<scene sceneID="Me8-c9-Oox">
<objects>
<viewController id="hKf-0C-qAk" customClass="ViewController" customModule="MobileNetDemo" customModuleProvider="target" sceneMemberID="viewController">
<view key="view" contentMode="scaleToFill" id="Yst-rK-Wk7">
<rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
<subviews>
<imageView userInteractionEnabled="NO" contentMode="scaleAspectFit" horizontalHuggingPriority="251" verticalHuggingPriority="251" translatesAutoresizingMaskIntoConstraints="NO" id="bDP-xQ-JgS">
<rect key="frame" x="0.0" y="20" width="375" height="271"/>
</imageView>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="HLo-2k-dr7">
<rect key="frame" x="16" y="597" width="63.5" height="30"/>
<color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<constraints>
<constraint firstAttribute="width" secondItem="HLo-2k-dr7" secondAttribute="height" multiplier="21:10" id="xlA-qq-ubI"/>
</constraints>
<state key="normal" title="Image">
<color key="titleColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
</state>
<connections>
<action selector="selectImageAct:" destination="hKf-0C-qAk" eventType="touchUpInside" id="rJB-ZK-jTR"/>
</connections>
</button>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="Aa7-KR-JhB">
<rect key="frame" x="109.5" y="597" width="63" height="30"/>
<color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<state key="normal" title="Load">
<color key="titleColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
</state>
<connections>
<action selector="loadAct:" destination="hKf-0C-qAk" eventType="touchUpInside" id="Lkj-aW-8vj"/>
</connections>
</button>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="2dy-Ya-PJY">
<rect key="frame" x="202.5" y="597" width="63.5" height="30"/>
<color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<state key="normal" title="Predict">
<color key="titleColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
</state>
<connections>
<action selector="predictAct:" destination="hKf-0C-qAk" eventType="touchUpInside" id="iw4-E7-3br"/>
</connections>
</button>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="Bac-eY-xPP">
<rect key="frame" x="296" y="597" width="63" height="30"/>
<color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<state key="normal" title="Clear">
<color key="titleColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
</state>
<connections>
<action selector="clearAct:" destination="hKf-0C-qAk" eventType="touchUpInside" id="QgH-jd-cR1"/>
</connections>
</button>
<view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="vhI-WH-WKF">
<rect key="frame" x="79.5" y="597" width="30" height="30"/>
<color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<constraints>
<constraint firstAttribute="width" constant="30" id="ffB-31-3Iy"/>
<constraint firstAttribute="height" constant="30" id="nbx-3B-EW0"/>
</constraints>
</view>
<view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="ZoT-1q-tgf">
<rect key="frame" x="266" y="597" width="30" height="30"/>
<color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<constraints>
<constraint firstAttribute="height" constant="30" id="Iu3-ig-lYv"/>
<constraint firstAttribute="width" constant="30" id="Jic-6I-7ch"/>
</constraints>
</view>
<view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="Zvo-dq-f6D">
<rect key="frame" x="172.5" y="597" width="30" height="30"/>
<color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<constraints>
<constraint firstAttribute="width" constant="30" id="Zgu-c6-rPT"/>
<constraint firstAttribute="height" constant="30" id="c8V-Gd-hiK"/>
</constraints>
</view>
<label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="耗时:" lineBreakMode="tailTruncation" numberOfLines="0" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="Jox-rT-ieC">
<rect key="frame" x="15" y="301" width="350" height="38"/>
<constraints>
<constraint firstAttribute="height" constant="38" id="8TB-w5-hbk"/>
</constraints>
<fontDescription key="fontDescription" type="system" pointSize="15"/>
<nil key="textColor"/>
<nil key="highlightedColor"/>
</label>
<imageView userInteractionEnabled="NO" contentMode="scaleToFill" horizontalHuggingPriority="251" verticalHuggingPriority="251" image="paddle-mobile.png" translatesAutoresizingMaskIntoConstraints="NO" id="PZO-kk-MVS">
<rect key="frame" x="90" y="637" width="195" height="30"/>
<constraints>
<constraint firstAttribute="width" secondItem="PZO-kk-MVS" secondAttribute="height" multiplier="6.5:1" id="9DJ-Rj-4ex"/>
</constraints>
</imageView>
<textView clipsSubviews="YES" multipleTouchEnabled="YES" contentMode="scaleToFill" editable="NO" text="结果:" textAlignment="natural" translatesAutoresizingMaskIntoConstraints="NO" id="efW-gP-E3g">
<rect key="frame" x="10" y="347" width="355" height="150"/>
<color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<constraints>
<constraint firstAttribute="height" constant="150" id="whC-NW-nhZ"/>
</constraints>
<fontDescription key="fontDescription" type="system" pointSize="15"/>
<textInputTraits key="textInputTraits" autocapitalizationType="sentences"/>
</textView>
</subviews>
<color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
<constraints>
<constraint firstItem="PZO-kk-MVS" firstAttribute="centerX" secondItem="Yst-rK-Wk7" secondAttribute="centerX" id="2ET-tq-zfh"/>
<constraint firstItem="Zvo-dq-f6D" firstAttribute="leading" secondItem="Aa7-KR-JhB" secondAttribute="trailing" id="368-Sl-KgC"/>
<constraint firstItem="bDP-xQ-JgS" firstAttribute="top" secondItem="hlK-mk-uEU" secondAttribute="top" id="3HC-Tb-qff"/>
<constraint firstItem="hlK-mk-uEU" firstAttribute="trailing" secondItem="efW-gP-E3g" secondAttribute="trailing" constant="10" id="8QW-BB-dry"/>
<constraint firstItem="ZoT-1q-tgf" firstAttribute="leading" secondItem="2dy-Ya-PJY" secondAttribute="trailing" id="AhB-vB-1aW"/>
<constraint firstItem="Bac-eY-xPP" firstAttribute="leading" secondItem="ZoT-1q-tgf" secondAttribute="trailing" id="BhE-d9-7Sf"/>
<constraint firstItem="HLo-2k-dr7" firstAttribute="leading" secondItem="hlK-mk-uEU" secondAttribute="leading" constant="16" id="BuX-zw-HOG"/>
<constraint firstItem="HLo-2k-dr7" firstAttribute="width" secondItem="Aa7-KR-JhB" secondAttribute="width" id="Dbs-xF-8in"/>
<constraint firstItem="HLo-2k-dr7" firstAttribute="width" secondItem="Bac-eY-xPP" secondAttribute="width" id="Dov-mA-K38"/>
<constraint firstItem="hlK-mk-uEU" firstAttribute="trailing" secondItem="Jox-rT-ieC" secondAttribute="trailing" constant="10" id="LfU-MA-UTb"/>
<constraint firstItem="Aa7-KR-JhB" firstAttribute="centerY" secondItem="HLo-2k-dr7" secondAttribute="centerY" id="OMl-f7-5CL"/>
<constraint firstItem="HLo-2k-dr7" firstAttribute="top" secondItem="efW-gP-E3g" secondAttribute="bottom" constant="100" id="P2f-lC-F02"/>
<constraint firstItem="hlK-mk-uEU" firstAttribute="bottom" secondItem="HLo-2k-dr7" secondAttribute="bottom" constant="40" id="Po9-43-AFd"/>
<constraint firstItem="bDP-xQ-JgS" firstAttribute="trailing" secondItem="hlK-mk-uEU" secondAttribute="trailing" id="Pqb-0o-qjh"/>
<constraint firstItem="hlK-mk-uEU" firstAttribute="trailing" secondItem="Bac-eY-xPP" secondAttribute="trailing" constant="16" id="VOE-fl-N71"/>
<constraint firstItem="vhI-WH-WKF" firstAttribute="leading" secondItem="HLo-2k-dr7" secondAttribute="trailing" id="Vlg-FW-uEQ"/>
<constraint firstItem="ZoT-1q-tgf" firstAttribute="centerY" secondItem="HLo-2k-dr7" secondAttribute="centerY" id="Wpv-Ck-8l3"/>
<constraint firstItem="efW-gP-E3g" firstAttribute="top" secondItem="Jox-rT-ieC" secondAttribute="bottom" constant="8" id="Z8f-Rs-QDZ"/>
<constraint firstItem="vhI-WH-WKF" firstAttribute="centerY" secondItem="HLo-2k-dr7" secondAttribute="centerY" id="af8-Qd-iQN"/>
<constraint firstItem="bDP-xQ-JgS" firstAttribute="leading" secondItem="hlK-mk-uEU" secondAttribute="leading" id="bZr-fF-a2S"/>
<constraint firstItem="HLo-2k-dr7" firstAttribute="width" secondItem="2dy-Ya-PJY" secondAttribute="width" id="c0U-4X-uIO"/>
<constraint firstItem="2dy-Ya-PJY" firstAttribute="leading" secondItem="Zvo-dq-f6D" secondAttribute="trailing" id="cRa-pW-xi8"/>
<constraint firstItem="2dy-Ya-PJY" firstAttribute="centerY" secondItem="HLo-2k-dr7" secondAttribute="centerY" id="f2B-zG-wXC"/>
<constraint firstItem="PZO-kk-MVS" firstAttribute="top" secondItem="HLo-2k-dr7" secondAttribute="bottom" constant="10" id="hAy-La-Eeh"/>
<constraint firstItem="Zvo-dq-f6D" firstAttribute="centerY" secondItem="HLo-2k-dr7" secondAttribute="centerY" id="hUc-wn-Ua1"/>
<constraint firstItem="Bac-eY-xPP" firstAttribute="centerY" secondItem="HLo-2k-dr7" secondAttribute="centerY" id="jDC-ag-kL6"/>
<constraint firstItem="Aa7-KR-JhB" firstAttribute="leading" secondItem="vhI-WH-WKF" secondAttribute="trailing" id="jgU-OM-v1G"/>
<constraint firstItem="PZO-kk-MVS" firstAttribute="bottom" secondItem="hlK-mk-uEU" secondAttribute="bottom" id="lkS-rk-Ap8"/>
<constraint firstItem="efW-gP-E3g" firstAttribute="leading" secondItem="hlK-mk-uEU" secondAttribute="leading" constant="10" id="q2g-4E-mgJ"/>
<constraint firstItem="Jox-rT-ieC" firstAttribute="top" secondItem="bDP-xQ-JgS" secondAttribute="bottom" constant="10" id="rqK-Pv-SXt"/>
<constraint firstItem="Jox-rT-ieC" firstAttribute="leading" secondItem="hlK-mk-uEU" secondAttribute="leading" constant="15" id="sP3-ym-vhH"/>
</constraints>
<viewLayoutGuide key="safeArea" id="hlK-mk-uEU"/>
</view>
<connections>
<outlet property="elapsedTimeLabel" destination="Jox-rT-ieC" id="QdK-sY-xmq"/>
<outlet property="resultTextView" destination="efW-gP-E3g" id="Vnl-XG-D8E"/>
<outlet property="selectImageView" destination="bDP-xQ-JgS" id="dMV-Wh-YsW"/>
</connections>
</viewController>
<placeholder placeholderIdentifier="IBFirstResponder" id="ShQ-yg-7s0" sceneMemberID="firstResponder"/>
</objects>
<point key="canvasLocation" x="-1558" y="-14"/>
</scene>
</scenes>
<resources>
<image name="paddle-mobile.png" width="16" height="16"/>
</resources>
</document>
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>$(DEVELOPMENT_LANGUAGE)</string>
<key>CFBundleExecutable</key>
<string>$(EXECUTABLE_NAME)</string>
<key>CFBundleIdentifier</key>
<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>$(PRODUCT_NAME)</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>1</string>
<key>LSRequiresIPhoneOS</key>
<true/>
<key>NSCameraUsageDescription</key>
<string>use camera</string>
<key>UILaunchStoryboardName</key>
<string>LaunchScreen</string>
<key>UIMainStoryboardFile</key>
<string>Main</string>
<key>UIRequiredDeviceCapabilities</key>
<array>
<string>armv7</string>
</array>
<key>UISupportedInterfaceOrientations</key>
<array>
<string>UIInterfaceOrientationPortrait</string>
<string>UIInterfaceOrientationLandscapeLeft</string>
<string>UIInterfaceOrientationLandscapeRight</string>
</array>
<key>UISupportedInterfaceOrientations~ipad</key>
<array>
<string>UIInterfaceOrientationPortrait</string>
<string>UIInterfaceOrientationPortraitUpsideDown</string>
<string>UIInterfaceOrientationLandscapeLeft</string>
<string>UIInterfaceOrientationLandscapeRight</string>
</array>
</dict>
</plist>
......@@ -13,13 +13,13 @@
limitations under the License. */
import Foundation
import paddle_mobile
class MobileNet: Net{
public class MobileNet: Net{
class MobilenetPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_preprocess", outputDim: s, usePaddleMobileLib: false)
let s = Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
}
}
......@@ -43,9 +43,7 @@ class MobileNet: Net{
let labels = PreWords.init(fileName: "synset")
override public func resultStr(res: ResultHolder) -> String {
guard let resPointer = res.result else {
fatalError()
}
let resPointer = res.result
var s: [String] = []
(0..<res.capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
......@@ -53,18 +51,13 @@ class MobileNet: Net{
return s.joined(separator: "\n")
}
override init(device: MTLDevice) {
override public init(device: MTLDevice) {
super.init(device: device)
means = [123.68, 116.78, 103.94]
scale = 0.017
except = 0
modelPath = Bundle.main.path(forResource: "model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "params", ofType: nil) ?! "para null"
modelDir = ""
modelPath = Bundle.main.path(forResource: "mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "mobilenet_params", ofType: nil) ?! "para null"
preprocessKernel = MobilenetPreProccess.init(device: device)
dim = (n: 1, h: 224, w: 224, c: 3)
inputDim = Dim.init(inDim: [1, 224, 224, 3])
}
}
//
// MobilenetProcess.metal
// MobileNetDemo
//
// Created by liuRuiLong on 2019/1/5.
// Copyright © 2019 Ray. All rights reserved.
//
#include <metal_stdlib>
using namespace metal;
kernel void mobilenet_preprocess(
texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f);
const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
kernel void mobilenet_preprocess_half(
texture2d<half, access::read> inTexture [[texture(0)]],
texture2d<half, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = half4(123.68f, 116.78f, 103.94f, 0.0f);
const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
//
// ViewController.swift
// MobileNetDemo
//
// Created by liuRuiLong on 2019/1/4.
// Copyright © 2019 Ray. All rights reserved.
//
import UIKit
import paddle_mobile
class ViewController: UIViewController {
@IBOutlet weak var resultTextView: UITextView!
@IBOutlet weak var selectImageView: UIImageView!
@IBOutlet weak var elapsedTimeLabel: UILabel!
var net: MobileNet!
var runner: Runner!
var toPredictTexture: MTLTexture?
override func viewDidLoad() {
super.viewDidLoad()
GlobalConfig.shared.computePrecision = .Float16
net = MobileNet.init(device: MetalHelper.shared.device)
runner = Runner.init(inNet: net, commandQueue: MetalHelper.shared.queue)
if let selectImage = UIImage.init(named: "banana.jpeg") {
selectImageView.image = selectImage
runner.getTexture(image: selectImage.cgImage!) {[weak self] (texture) in
self?.toPredictTexture = texture
}
}
}
@IBAction func loadAct(_ sender: Any) {
if runner.load() {
let resutText = " load success ! "
print(resutText)
self.resultTextView.text = resutText
} else {
fatalError(" load error ")
}
}
@IBAction func selectImageAct(_ sender: Any) {
let imagePicker = UIImagePickerController()
imagePicker.sourceType = .camera
imagePicker.delegate = self
self.present(imagePicker, animated: true, completion: nil)
}
@IBAction func clearAct(_ sender: Any) {
runner.clear()
}
@IBAction func predictAct(_ sender: Any) {
if let texture = toPredictTexture {
let beginDate = Date.init()
runner.predict(texture: texture) { [weak self] (success, resultHolder) in
if success, let inResultHolder = resultHolder {
let timeUse = Date.init().timeIntervalSince(beginDate)
DispatchQueue.main.async {
self?.elapsedTimeLabel.text = "\(timeUse * 1000)ms"
self?.resultTextView.text = self?.net.resultStr(res: inResultHolder)
}
} else {
print(" predict fail ")
}
}
} else {
print(" toPredictTexture is nil ")
}
}
}
extension ViewController: UIImagePickerControllerDelegate, UINavigationControllerDelegate {
func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) {
picker.dismiss(animated: true){[weak self] in
guard let sSelf = self, let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else {
fatalError("no image")
}
sSelf.selectImageView.image = image
sSelf.runner.getTexture(image: image.cgImage!, getTexture: { (texture) in
sSelf.toPredictTexture = texture
})
}
}
}
......@@ -17,3 +17,9 @@ target 'paddle-mobile-unit-test' do
project 'paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj'
pod 'SwiftProtobuf', '~> 1.0'
end
target 'MobileNetDemo' do
project 'MobileNetDemo/MobileNetDemo.xcodeproj'
pod 'SwiftProtobuf', '~> 1.0'
end
......@@ -10,20 +10,42 @@
30D0ED21F392CFA3885B1002 /* Pods_paddle_mobile_demo.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */; };
C2CBB49021B778EA0020DC6C /* libc++.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = FC4FD97B2140EE250073E130 /* libc++.tbd */; };
C2E67E5E21524E460013F575 /* LoadPointerViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = C2E67E5D21524E460013F575 /* LoadPointerViewController.m */; };
FC013928210204A3008100E3 /* PreProcessKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC013927210204A3008100E3 /* PreProcessKernel.metal */; };
FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B8120E11C550081E9F8 /* AppDelegate.swift */; };
FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B8320E11C550081E9F8 /* ViewController.swift */; };
FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8520E11C550081E9F8 /* Main.storyboard */; };
FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8820E11C560081E9F8 /* Assets.xcassets */; };
FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */; };
FC203FB221CBFDBA00B37166 /* test.jpg in Resources */ = {isa = PBXBuildFile; fileRef = FC203FA921CBFDBA00B37166 /* test.jpg */; };
FC203FB321CBFDBA00B37166 /* combined_mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FC203FAD21CBFDBA00B37166 /* combined_mobilenet_params */; };
FC203FB421CBFDBA00B37166 /* combined_mobilenet_model in Resources */ = {isa = PBXBuildFile; fileRef = FC203FAE21CBFDBA00B37166 /* combined_mobilenet_model */; };
FC203FB521CBFDBA00B37166 /* yolo_params in Resources */ = {isa = PBXBuildFile; fileRef = FC203FB021CBFDBA00B37166 /* yolo_params */; };
FC203FB621CBFDBA00B37166 /* yolo_model in Resources */ = {isa = PBXBuildFile; fileRef = FC203FB121CBFDBA00B37166 /* yolo_model */; };
FC2BFCBC21DF0A8600C262B2 /* 00001.jpg in Resources */ = {isa = PBXBuildFile; fileRef = FC2BFCBB21DF0A8600C262B2 /* 00001.jpg */; };
FC2BFCBE21DF15D900C262B2 /* 123.jpg in Resources */ = {isa = PBXBuildFile; fileRef = FC2BFCBD21DF15D900C262B2 /* 123.jpg */; };
FC2BFCC021DF279900C262B2 /* classify-img-output.png in Resources */ = {isa = PBXBuildFile; fileRef = FC2BFCBF21DF279900C262B2 /* classify-img-output.png */; };
FC2BFD3021DF3FEA00C262B2 /* MobilenetSSD_AR.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD2A21DF3FE900C262B2 /* MobilenetSSD_AR.swift */; };
FC2BFD3121DF3FEA00C262B2 /* Genet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD2B21DF3FE900C262B2 /* Genet.swift */; };
FC2BFD3221DF3FEA00C262B2 /* MobileNetSSD.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD2C21DF3FE900C262B2 /* MobileNetSSD.swift */; };
FC2BFD3321DF3FEA00C262B2 /* YoloNet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD2D21DF3FE900C262B2 /* YoloNet.swift */; };
FC2BFD3421DF3FEA00C262B2 /* MobileNetCombined.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD2E21DF3FEA00C262B2 /* MobileNetCombined.swift */; };
FC2BFD3521DF3FEA00C262B2 /* MobileNet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD2F21DF3FEA00C262B2 /* MobileNet.swift */; };
FC2BFD3821DF46DE00C262B2 /* OCDemoViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD3721DF46DE00C262B2 /* OCDemoViewController.m */; };
FC2BFD3C21DF480400C262B2 /* CPUCompute.mm in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD3B21DF480400C262B2 /* CPUCompute.mm */; };
FC2BFD3E21DF5CE800C262B2 /* PreProcessKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD3D21DF5CE800C262B2 /* PreProcessKernel.metal */; };
FC2BFD4321DF5E1E00C262B2 /* PaddleMobileGPU.m in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD4021DF5E1E00C262B2 /* PaddleMobileGPU.m */; };
FC2BFD4421DF5E1E00C262B2 /* SuperResolutionNet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD4221DF5E1E00C262B2 /* SuperResolutionNet.swift */; };
FC5E03B221DCE8D90016C137 /* mingren_input_data in Resources */ = {isa = PBXBuildFile; fileRef = FC5E03B121DCE8D90016C137 /* mingren_input_data */; };
FC704C1921D2375300F98BAB /* super_params in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1721D2375300F98BAB /* super_params */; };
FC704C1A21D2375300F98BAB /* super_model in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1821D2375300F98BAB /* super_model */; };
FC704C2221D237FC00F98BAB /* combined_mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1D21D237FC00F98BAB /* combined_mobilenet_params */; };
FC704C2321D237FC00F98BAB /* combined_mobilenet_model in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1E21D237FC00F98BAB /* combined_mobilenet_model */; };
FC704C2421D237FC00F98BAB /* yolo_params in Resources */ = {isa = PBXBuildFile; fileRef = FC704C2021D237FC00F98BAB /* yolo_params */; };
FC704C2521D237FC00F98BAB /* yolo_model in Resources */ = {isa = PBXBuildFile; fileRef = FC704C2121D237FC00F98BAB /* yolo_model */; };
FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCB214D27920094B8E5 /* FPSCounter.swift */; };
FC803BCE214D27930094B8E5 /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCC214D27920094B8E5 /* VideoCapture.swift */; };
FC9797BE21D6045B00F2FD90 /* banana.jpeg in Resources */ = {isa = PBXBuildFile; fileRef = FC9797BD21D6045B00F2FD90 /* banana.jpeg */; };
FC9797C221D608E000F2FD90 /* mobilenet_model in Resources */ = {isa = PBXBuildFile; fileRef = FC9797C021D608DF00F2FD90 /* mobilenet_model */; };
FC9797C321D608E000F2FD90 /* mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FC9797C121D608DF00F2FD90 /* mobilenet_params */; };
FC9797C721D609FB00F2FD90 /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FC9797C621D609FB00F2FD90 /* synset.txt */; };
FC9797CF21D6506F00F2FD90 /* mingren.jpg in Resources */ = {isa = PBXBuildFile; fileRef = FC9797CE21D6506F00F2FD90 /* mingren.jpg */; };
FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC542122EF5400D94F7E /* MetalHelper.swift */; };
FCCED60521D7646E00BE8D5F /* test_image_super in Resources */ = {isa = PBXBuildFile; fileRef = FCCED60421D7646E00BE8D5F /* test_image_super */; };
FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; };
FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
FCF437E8214B6DDB00943429 /* MultiPredictViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */; };
......@@ -49,7 +71,6 @@
878829884E1A14D7044721D5 /* Pods-paddle-mobile-demo.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile-demo.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo.debug.xcconfig"; sourceTree = "<group>"; };
C2E67E5C21524E460013F575 /* LoadPointerViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = LoadPointerViewController.h; sourceTree = "<group>"; };
C2E67E5D21524E460013F575 /* LoadPointerViewController.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = LoadPointerViewController.m; sourceTree = "<group>"; };
FC013927210204A3008100E3 /* PreProcessKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreProcessKernel.metal; sourceTree = "<group>"; };
FC039B7E20E11C550081E9F8 /* paddle-mobile-demo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "paddle-mobile-demo.app"; sourceTree = BUILT_PRODUCTS_DIR; };
FC039B8120E11C550081E9F8 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
FC039B8320E11C550081E9F8 /* ViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = "<group>"; };
......@@ -58,15 +79,41 @@
FC039B8B20E11C560081E9F8 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
FC039B8D20E11C560081E9F8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
FC203FA921CBFDBA00B37166 /* test.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = test.jpg; sourceTree = "<group>"; };
FC203FAD21CBFDBA00B37166 /* combined_mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_params; sourceTree = "<group>"; };
FC203FAE21CBFDBA00B37166 /* combined_mobilenet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_model; sourceTree = "<group>"; };
FC203FB021CBFDBA00B37166 /* yolo_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_params; sourceTree = "<group>"; };
FC203FB121CBFDBA00B37166 /* yolo_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_model; sourceTree = "<group>"; };
FC27991121343A39000B6BAD /* paddle-mobile-demo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "paddle-mobile-demo-Bridging-Header.h"; sourceTree = "<group>"; };
FC2BFCBB21DF0A8600C262B2 /* 00001.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = 00001.jpg; sourceTree = "<group>"; };
FC2BFCBD21DF15D900C262B2 /* 123.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = 123.jpg; sourceTree = "<group>"; };
FC2BFCBF21DF279900C262B2 /* classify-img-output.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; path = "classify-img-output.png"; sourceTree = "<group>"; };
FC2BFD2A21DF3FE900C262B2 /* MobilenetSSD_AR.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MobilenetSSD_AR.swift; sourceTree = "<group>"; };
FC2BFD2B21DF3FE900C262B2 /* Genet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Genet.swift; sourceTree = "<group>"; };
FC2BFD2C21DF3FE900C262B2 /* MobileNetSSD.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MobileNetSSD.swift; sourceTree = "<group>"; };
FC2BFD2D21DF3FE900C262B2 /* YoloNet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = YoloNet.swift; sourceTree = "<group>"; };
FC2BFD2E21DF3FEA00C262B2 /* MobileNetCombined.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MobileNetCombined.swift; sourceTree = "<group>"; };
FC2BFD2F21DF3FEA00C262B2 /* MobileNet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MobileNet.swift; sourceTree = "<group>"; };
FC2BFD3621DF46DE00C262B2 /* OCDemoViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = OCDemoViewController.h; sourceTree = "<group>"; };
FC2BFD3721DF46DE00C262B2 /* OCDemoViewController.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = OCDemoViewController.m; sourceTree = "<group>"; };
FC2BFD3A21DF480300C262B2 /* CPUCompute.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CPUCompute.h; sourceTree = "<group>"; };
FC2BFD3B21DF480400C262B2 /* CPUCompute.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = CPUCompute.mm; sourceTree = "<group>"; };
FC2BFD3D21DF5CE800C262B2 /* PreProcessKernel.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = PreProcessKernel.metal; sourceTree = "<group>"; };
FC2BFD4021DF5E1E00C262B2 /* PaddleMobileGPU.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = PaddleMobileGPU.m; sourceTree = "<group>"; };
FC2BFD4121DF5E1E00C262B2 /* PaddleMobileGPU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PaddleMobileGPU.h; sourceTree = "<group>"; };
FC2BFD4221DF5E1E00C262B2 /* SuperResolutionNet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SuperResolutionNet.swift; sourceTree = "<group>"; };
FC4FD97B2140EE250073E130 /* libc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libc++.tbd"; path = "usr/lib/libc++.tbd"; sourceTree = SDKROOT; };
FC5E03B121DCE8D90016C137 /* mingren_input_data */ = {isa = PBXFileReference; lastKnownFileType = file; path = mingren_input_data; sourceTree = "<group>"; };
FC704C1721D2375300F98BAB /* super_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = super_params; sourceTree = "<group>"; };
FC704C1821D2375300F98BAB /* super_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = super_model; sourceTree = "<group>"; };
FC704C1D21D237FC00F98BAB /* combined_mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_params; sourceTree = "<group>"; };
FC704C1E21D237FC00F98BAB /* combined_mobilenet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_model; sourceTree = "<group>"; };
FC704C2021D237FC00F98BAB /* yolo_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_params; sourceTree = "<group>"; };
FC704C2121D237FC00F98BAB /* yolo_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_model; sourceTree = "<group>"; };
FC803BCB214D27920094B8E5 /* FPSCounter.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FPSCounter.swift; sourceTree = "<group>"; };
FC803BCC214D27920094B8E5 /* VideoCapture.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = "<group>"; };
FC9797BD21D6045B00F2FD90 /* banana.jpeg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = banana.jpeg; sourceTree = "<group>"; };
FC9797C021D608DF00F2FD90 /* mobilenet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_model; sourceTree = "<group>"; };
FC9797C121D608DF00F2FD90 /* mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_params; sourceTree = "<group>"; };
FC9797C621D609FB00F2FD90 /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = "<group>"; };
FC9797CE21D6506F00F2FD90 /* mingren.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = mingren.jpg; sourceTree = "<group>"; };
FCBCCC542122EF5400D94F7E /* MetalHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MetalHelper.swift; sourceTree = "<group>"; };
FCCED60421D7646E00BE8D5F /* test_image_super */ = {isa = PBXFileReference; lastKnownFileType = file; path = test_image_super; sourceTree = "<group>"; };
FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MultiPredictViewController.swift; sourceTree = "<group>"; };
/* End PBXFileReference section */
......@@ -125,8 +172,11 @@
FC039B8020E11C550081E9F8 /* paddle-mobile-demo */ = {
isa = PBXGroup;
children = (
FC203FA821CBFDBA00B37166 /* images */,
FC203FAA21CBFDBA00B37166 /* models */,
FC2BFD4F21DF892500C262B2 /* Resources */,
FCBCCC542122EF5400D94F7E /* MetalHelper.swift */,
FC2BFD3F21DF5DDF00C262B2 /* OCInterface */,
FC27991121343A39000B6BAD /* paddle-mobile-demo-Bridging-Header.h */,
FC2BFD3921DF46F000C262B2 /* OCDemo */,
FC803BCA214D27920094B8E5 /* VideoCapture */,
FC8CFED2213519540094D569 /* Net */,
FC039B8120E11C550081E9F8 /* AppDelegate.swift */,
......@@ -135,10 +185,7 @@
FC039B8820E11C560081E9F8 /* Assets.xcassets */,
FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */,
FC039B8D20E11C560081E9F8 /* Info.plist */,
FC27991121343A39000B6BAD /* paddle-mobile-demo-Bridging-Header.h */,
FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */,
C2E67E5C21524E460013F575 /* LoadPointerViewController.h */,
C2E67E5D21524E460013F575 /* LoadPointerViewController.m */,
);
path = "paddle-mobile-demo";
sourceTree = "<group>";
......@@ -146,44 +193,90 @@
FC203FA821CBFDBA00B37166 /* images */ = {
isa = PBXGroup;
children = (
FC2BFCBF21DF279900C262B2 /* classify-img-output.png */,
FC2BFCBD21DF15D900C262B2 /* 123.jpg */,
FC2BFCBB21DF0A8600C262B2 /* 00001.jpg */,
FC5E03B121DCE8D90016C137 /* mingren_input_data */,
FCCED60421D7646E00BE8D5F /* test_image_super */,
FC9797CE21D6506F00F2FD90 /* mingren.jpg */,
FC9797BD21D6045B00F2FD90 /* banana.jpeg */,
FC203FA921CBFDBA00B37166 /* test.jpg */,
);
name = images;
path = ../../images;
path = images;
sourceTree = "<group>";
};
FC203FAA21CBFDBA00B37166 /* models */ = {
isa = PBXGroup;
children = (
FC203FAB21CBFDBA00B37166 /* vision_model */,
FC9797BF21D608DF00F2FD90 /* mobilenet */,
FC704C1B21D237FC00F98BAB /* vision_model */,
FC704C1621D2375300F98BAB /* superresoltion */,
);
path = models;
sourceTree = "<group>";
};
FC2BFD3921DF46F000C262B2 /* OCDemo */ = {
isa = PBXGroup;
children = (
C2E67E5C21524E460013F575 /* LoadPointerViewController.h */,
C2E67E5D21524E460013F575 /* LoadPointerViewController.m */,
FC2BFD3621DF46DE00C262B2 /* OCDemoViewController.h */,
FC2BFD3721DF46DE00C262B2 /* OCDemoViewController.m */,
);
path = OCDemo;
sourceTree = "<group>";
};
FC2BFD3F21DF5DDF00C262B2 /* OCInterface */ = {
isa = PBXGroup;
children = (
FC2BFD4121DF5E1E00C262B2 /* PaddleMobileGPU.h */,
FC2BFD4021DF5E1E00C262B2 /* PaddleMobileGPU.m */,
FC2BFD4221DF5E1E00C262B2 /* SuperResolutionNet.swift */,
);
name = models;
path = ../../models;
path = OCInterface;
sourceTree = "<group>";
};
FC203FAB21CBFDBA00B37166 /* vision_model */ = {
FC2BFD4F21DF892500C262B2 /* Resources */ = {
isa = PBXGroup;
children = (
FC203FAC21CBFDBA00B37166 /* mobilenet */,
FC203FAF21CBFDBA00B37166 /* yolo */,
FC203FA821CBFDBA00B37166 /* images */,
FC203FAA21CBFDBA00B37166 /* models */,
);
path = Resources;
sourceTree = "<group>";
};
FC704C1621D2375300F98BAB /* superresoltion */ = {
isa = PBXGroup;
children = (
FC704C1721D2375300F98BAB /* super_params */,
FC704C1821D2375300F98BAB /* super_model */,
);
path = superresoltion;
sourceTree = "<group>";
};
FC704C1B21D237FC00F98BAB /* vision_model */ = {
isa = PBXGroup;
children = (
FC704C1C21D237FC00F98BAB /* mobilenet */,
FC704C1F21D237FC00F98BAB /* yolo */,
);
path = vision_model;
sourceTree = "<group>";
};
FC203FAC21CBFDBA00B37166 /* mobilenet */ = {
FC704C1C21D237FC00F98BAB /* mobilenet */ = {
isa = PBXGroup;
children = (
FC203FAD21CBFDBA00B37166 /* combined_mobilenet_params */,
FC203FAE21CBFDBA00B37166 /* combined_mobilenet_model */,
FC704C1D21D237FC00F98BAB /* combined_mobilenet_params */,
FC704C1E21D237FC00F98BAB /* combined_mobilenet_model */,
);
path = mobilenet;
sourceTree = "<group>";
};
FC203FAF21CBFDBA00B37166 /* yolo */ = {
FC704C1F21D237FC00F98BAB /* yolo */ = {
isa = PBXGroup;
children = (
FC203FB021CBFDBA00B37166 /* yolo_params */,
FC203FB121CBFDBA00B37166 /* yolo_model */,
FC704C2021D237FC00F98BAB /* yolo_params */,
FC704C2121D237FC00F98BAB /* yolo_model */,
);
path = yolo;
sourceTree = "<group>";
......@@ -200,12 +293,29 @@
FC8CFED2213519540094D569 /* Net */ = {
isa = PBXGroup;
children = (
FC013927210204A3008100E3 /* PreProcessKernel.metal */,
FCBCCC542122EF5400D94F7E /* MetalHelper.swift */,
FC2BFD3A21DF480300C262B2 /* CPUCompute.h */,
FC2BFD3B21DF480400C262B2 /* CPUCompute.mm */,
FC2BFD3D21DF5CE800C262B2 /* PreProcessKernel.metal */,
FC2BFD2B21DF3FE900C262B2 /* Genet.swift */,
FC2BFD2F21DF3FEA00C262B2 /* MobileNet.swift */,
FC2BFD2E21DF3FEA00C262B2 /* MobileNetCombined.swift */,
FC2BFD2A21DF3FE900C262B2 /* MobilenetSSD_AR.swift */,
FC2BFD2C21DF3FE900C262B2 /* MobileNetSSD.swift */,
FC2BFD2D21DF3FE900C262B2 /* YoloNet.swift */,
);
path = Net;
sourceTree = "<group>";
};
FC9797BF21D608DF00F2FD90 /* mobilenet */ = {
isa = PBXGroup;
children = (
FC9797C621D609FB00F2FD90 /* synset.txt */,
FC9797C021D608DF00F2FD90 /* mobilenet_model */,
FC9797C121D608DF00F2FD90 /* mobilenet_params */,
);
path = mobilenet;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXNativeTarget section */
......@@ -268,14 +378,26 @@
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
FCCED60521D7646E00BE8D5F /* test_image_super in Resources */,
FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */,
FC203FB421CBFDBA00B37166 /* combined_mobilenet_model in Resources */,
FC203FB321CBFDBA00B37166 /* combined_mobilenet_params in Resources */,
FC9797CF21D6506F00F2FD90 /* mingren.jpg in Resources */,
FC704C2221D237FC00F98BAB /* combined_mobilenet_params in Resources */,
FC704C1921D2375300F98BAB /* super_params in Resources */,
FC2BFCBE21DF15D900C262B2 /* 123.jpg in Resources */,
FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */,
FC203FB521CBFDBA00B37166 /* yolo_params in Resources */,
FC203FB621CBFDBA00B37166 /* yolo_model in Resources */,
FC9797C721D609FB00F2FD90 /* synset.txt in Resources */,
FC5E03B221DCE8D90016C137 /* mingren_input_data in Resources */,
FC704C1A21D2375300F98BAB /* super_model in Resources */,
FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */,
FC9797C221D608E000F2FD90 /* mobilenet_model in Resources */,
FC2BFCC021DF279900C262B2 /* classify-img-output.png in Resources */,
FC203FB221CBFDBA00B37166 /* test.jpg in Resources */,
FC704C2321D237FC00F98BAB /* combined_mobilenet_model in Resources */,
FC9797C321D608E000F2FD90 /* mobilenet_params in Resources */,
FC704C2421D237FC00F98BAB /* yolo_params in Resources */,
FC2BFCBC21DF0A8600C262B2 /* 00001.jpg in Resources */,
FC9797BE21D6045B00F2FD90 /* banana.jpeg in Resources */,
FC704C2521D237FC00F98BAB /* yolo_model in Resources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
......@@ -325,14 +447,24 @@
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
FC2BFD3221DF3FEA00C262B2 /* MobileNetSSD.swift in Sources */,
FC2BFD3C21DF480400C262B2 /* CPUCompute.mm in Sources */,
FC2BFD4321DF5E1E00C262B2 /* PaddleMobileGPU.m in Sources */,
FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */,
FC803BCE214D27930094B8E5 /* VideoCapture.swift in Sources */,
FC013928210204A3008100E3 /* PreProcessKernel.metal in Sources */,
FCF437E8214B6DDB00943429 /* MultiPredictViewController.swift in Sources */,
FC2BFD3021DF3FEA00C262B2 /* MobilenetSSD_AR.swift in Sources */,
FC2BFD3321DF3FEA00C262B2 /* YoloNet.swift in Sources */,
FC2BFD3421DF3FEA00C262B2 /* MobileNetCombined.swift in Sources */,
FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */,
FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */,
FC2BFD3521DF3FEA00C262B2 /* MobileNet.swift in Sources */,
C2E67E5E21524E460013F575 /* LoadPointerViewController.m in Sources */,
FC2BFD3121DF3FEA00C262B2 /* Genet.swift in Sources */,
FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */,
FC2BFD4421DF5E1E00C262B2 /* SuperResolutionNet.swift in Sources */,
FC2BFD3E21DF5CE800C262B2 /* PreProcessKernel.metal in Sources */,
FC2BFD3821DF46DE00C262B2 /* OCDemoViewController.m in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
......
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1010"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
BuildableName = "paddle-mobile-demo.app"
BlueprintName = "paddle-mobile-demo"
ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
<MacroExpansion>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
BuildableName = "paddle-mobile-demo.app"
BlueprintName = "paddle-mobile-demo"
ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
</BuildableReference>
</MacroExpansion>
<AdditionalOptions>
</AdditionalOptions>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
BuildableName = "paddle-mobile-demo.app"
BlueprintName = "paddle-mobile-demo"
ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
<AdditionalOptions>
</AdditionalOptions>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
BuildableName = "paddle-mobile-demo.app"
BlueprintName = "paddle-mobile-demo"
ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>
<?xml version="1.0" encoding="UTF-8"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14113" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14460.31" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
<device id="retina4_7" orientation="portrait">
<adaptation id="fullscreen"/>
</device>
<dependencies>
<deployment identifier="iOS"/>
<plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="14088"/>
<capability name="Aspect ratio constraints" minToolsVersion="5.1"/>
<plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="14460.20"/>
<capability name="Safe area layout guides" minToolsVersion="9.0"/>
<capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
</dependencies>
......@@ -20,7 +19,7 @@
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
<subviews>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="TQt-X9-PdF">
<rect key="frame" x="164" y="318" width="46" height="30"/>
<rect key="frame" x="164.5" y="318.5" width="46" height="30"/>
<state key="normal" title="Button"/>
<connections>
<action selector="predictAct:" destination="Vwd-lt-764" eventType="touchUpInside" id="d4z-Cv-6jY"/>
......@@ -60,7 +59,7 @@
<nil key="highlightedColor"/>
</label>
<pickerView contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="DlO-dk-RMr">
<rect key="frame" x="55" y="510.5" width="320" height="80"/>
<rect key="frame" x="55" y="510" width="320" height="80"/>
<constraints>
<constraint firstAttribute="height" constant="80" id="Sbi-05-Mwd"/>
</constraints>
......@@ -83,6 +82,9 @@
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="wUL-9N-u1V">
<rect key="frame" x="16" y="597" width="63.5" height="30"/>
<color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<constraints>
<constraint firstAttribute="width" secondItem="wUL-9N-u1V" secondAttribute="height" multiplier="21:10" id="cp7-bd-CvU"/>
</constraints>
<state key="normal" title="Image">
<color key="titleColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
</state>
......
//
// LoadPointerViewController.h
// paddle-mobile-demo
//
// Created by Xiao,Haichun on 2018/9/19.
// Copyright © 2018年 orange. All rights reserved.
//
#import <UIKit/UIKit.h>
@interface LoadPointerViewController : UIViewController
@end
......@@ -27,7 +27,4 @@ public class MetalHelper {
queue = device.makeCommandQueue()!
textureLoader = MTKTextureLoader.init(device: device)
}
}
......@@ -22,10 +22,10 @@ class MultiPredictViewController: UIViewController {
super.viewDidLoad()
let mobileNet = MobileNet_ssd_hand.init(device: MetalHelper.shared.device)
let genet = Genet.init(device: MetalHelper.shared.device)
runner1 = Runner.init(inNet: mobileNet, commandQueue: MetalHelper.shared.queue, inPlatform: .GPU)
runner1 = Runner.init(inNet: mobileNet, commandQueue: MetalHelper.shared.queue)
let queue2 = MetalHelper.shared.device.makeCommandQueue()
runner2 = Runner.init(inNet: genet, commandQueue: MetalHelper.shared.queue, inPlatform: .GPU)
runner2 = Runner.init(inNet: genet, commandQueue: MetalHelper.shared.queue)
}
@IBAction func predictAct(_ sender: Any) {
......
//
// RGBToYCrCb_Y.metal
// paddle-mobile-demo
//
// Created by liuRuiLong on 2018/12/28.
// Copyright © 2018 orange. All rights reserved.
//
#include <metal_stdlib>
using namespace metal;
kernel void buffer_to_texture_kernel( const device float *input [[buffer(0)]],
texture2d<float, access::write> outTexture [[texture(0)]],
uint2 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
float y = input[outTexture.get_width() * gid.y + gid.x];
outTexture.write(float4(y, 0.0f, 0.0f, 0.0f), gid);
}
kernel void buffer_to_texture_kernel_half( const device float *input [[buffer(0)]],
texture2d<half, access::write> outTexture [[texture(0)]],
uint2 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
float y = input[outTexture.get_width() * gid.y + gid.x];
outTexture.write(half4(y, 0.0f, 0.0f, 0.0f), gid);
}
......@@ -16,7 +16,6 @@
#import <Foundation/Foundation.h>
@interface CPUResult: NSObject
@property (assign, nonatomic) float *output;
@property (assign, nonatomic) int outputSize;
......
......@@ -12,7 +12,6 @@
See the License for the specific language governing permissions and
limitations under the License. */
#import "CPUCompute.h"
#import <map>
......@@ -20,9 +19,6 @@
#import <utility>
#import <algorithm>
struct NMSParam {
float *score_data;
......
......@@ -13,42 +13,36 @@
limitations under the License. */
import Foundation
import paddle_mobile
public class Genet: Net {
@objc public override init(device: MTLDevice) {
super.init(device: device)
means = [128.0, 128.0, 128.0]
scale = 0.017
except = 0
modelPath = Bundle.main.path(forResource: "genet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "genet_params", ofType: nil) ?! "para null"
modelDir = ""
preprocessKernel = GenetPreProccess.init(device: device)
dim = (n: 1, h: 128, w: 128, c: 3)
inputDim = Dim.init(inDim: [1, 128, 128, 3])
}
@objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize)
means = [128.0, 128.0, 128.0]
scale = 0.017
except = 0
modelPath = ""
paramPath = ""
modelDir = ""
@objc override public init(device: MTLDevice, paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
super.init(device: device,
paramPointer: paramPointer,
paramSize: paramSize,
modePointer: modePointer,
modelSize: modelSize)
preprocessKernel = GenetPreProccess.init(device: device)
dim = (n: 1, h: 128, w: 128, c: 3)
inputDim = Dim.init(inDim: [1, 128, 128, 3])
}
class GenetPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 128, inHeight: 128, inChannel: 3)
super.init(device: device, inFunctionName: "genet_preprocess", outputDim: s, usePaddleMobileLib: false)
let s = Shape.init(inWidth: 128, inHeight: 128, inChannel: 3)
super.init(device: device, inFunctionName: "genet_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
}
}
override public func resultStr(res: ResultHolder) -> String {
// fatalError()
return " \(res.result![0]) ... "
return " \(res.result[0]) ... "
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
import paddle_mobile
public class MobileNet: Net{
class MobilenetPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
}
}
class PreWords {
var contents: [String] = []
init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
if let filePath = inBundle.path(forResource: fileName, ofType: type) {
let string = try! String.init(contentsOfFile: filePath)
contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
String($0[$0.index($0.startIndex, offsetBy: 10)...])
}
}else{
fatalError("no file call \(fileName)")
}
}
subscript(index: Int) -> String {
return contents[index]
}
}
let labels = PreWords.init(fileName: "synset")
override public func resultStr(res: ResultHolder) -> String {
let resPointer = res.result
var s: [String] = []
(0..<res.capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
}
return s.joined(separator: "\n")
}
override public init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "mobilenet_params", ofType: nil) ?! "para null"
// metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil "
preprocessKernel = MobilenetPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 224, 224, 3])
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
import paddle_mobile
public class MobileNetCombined: Net {
@objc public override init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "combined_mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "combined_mobilenet_params", ofType: nil) ?! "para null"
inputDim = Dim.init(inDim: [1, 224, 224, 3])
// metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil "
}
override public func resultStr(res: ResultHolder) -> String {
return " \(res.result[0]) ... "
}
}
......@@ -13,36 +13,35 @@
limitations under the License. */
import Foundation
import paddle_mobile
public class MobileNet_ssd_hand: Net{
public class MobileNet_ssd_hand: Net {
@objc public override init(device: MTLDevice) {
super.init(device: device)
means = [123.68, 116.78, 103.94]
scale = 0.017
except = 2
modelPath = Bundle.main.path(forResource: "ssd_hand_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "ssd_hand_params", ofType: nil) ?! "para null"
modelDir = ""
// metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil "
preprocessKernel = MobilenetssdPreProccess.init(device: device)
dim = (n: 1, h: 300, w: 300, c: 3)
inputDim = Dim.init(inDim: [1, 300, 300, 3])
}
@objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize)
means = [123.68, 116.78, 103.94]
scale = 0.017
except = 2
modelPath = ""
paramPath = ""
modelDir = ""
// metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil "
preprocessKernel = MobilenetssdPreProccess.init(device: device)
dim = (n: 1, h: 300, w: 300, c: 3)
inputDim = Dim.init(inDim: [1, 300, 300, 3])
}
class MobilenetssdPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 300, inHeight: 300, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess", outputDim: s, usePaddleMobileLib: false)
let s = Shape.init(inWidth: 300, inHeight: 300, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
}
}
......@@ -50,7 +49,7 @@ public class MobileNet_ssd_hand: Net{
return " \(res)"
}
override func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder {
override public func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder {
// guard let interRes = paddleMobileRes.intermediateResults else {
// fatalError(" need have inter result ")
......
......@@ -13,55 +13,49 @@
limitations under the License. */
import Foundation
import paddle_mobile
public class MobileNet_ssd_AR: Net{
public class MobileNet_ssd_AR: Net {
@objc public override init(device: MTLDevice) {
super.init(device: device)
means = [103.94, 116.78, 123.68]
scale = 1
except = 2
modelPath = Bundle.main.path(forResource: "ar_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "ar_params", ofType: nil) ?! "para null"
modelDir = ""
preprocessKernel = MobilenetssdPreProccess.init(device: device)
dim = (n: 1, h: 160, w: 160, c: 3)
inputDim = Dim.init(inDim: [1, 160, 160, 3])
}
@objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize)
means = [103.94, 116.78, 123.68]
scale = 1
except = 2
modelPath = ""
paramPath = ""
modelDir = ""
preprocessKernel = MobilenetssdPreProccess.init(device: device)
dim = (n: 1, h: 160, w: 160, c: 3)
inputDim = Dim.init(inDim: [1, 160, 160, 3])
}
class MobilenetssdPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 160, inHeight: 160, inChannel: 3)
super.init(device: device, inFunctionName: "mobilent_ar_preprocess", outputDim: s, usePaddleMobileLib: false)
init(device: MTLDevice) {
let s = Shape.init(inWidth: 160, inHeight: 160, inChannel: 3)
super.init(device: device, inFunctionName: "mobilent_ar_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
}
}
override public func resultStr(res: ResultHolder) -> String {
return " \(res.result![0])"
return " \(res.result[0])"
}
override func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder {
guard let interRes = paddleMobileRes.intermediateResults else {
fatalError(" need have inter result ")
}
guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? FetchHolder else {
fatalError(" need score ")
}
guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? FetchHolder else {
fatalError()
}
override public func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder {
fatalError()
// guard let interRes = paddleMobileRes.intermediateResults else {
// fatalError(" need have inter result ")
// }
//
// guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? FetchHolder else {
// fatalError(" need score ")
// }
//
// guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? FetchHolder else {
// fatalError()
// }
// let startDate = Date.init()
......@@ -72,19 +66,19 @@ public class MobileNet_ssd_AR: Net{
//
// print((0..<bbox.capacity).map{ bbox.result[$0] }.strideArray())
let nmsCompute = NMSCompute.init()
nmsCompute.scoreThredshold = 0.25
nmsCompute.nmsTopK = 100
nmsCompute.keepTopK = 100
nmsCompute.nmsEta = 1.0
nmsCompute.nmsThreshold = 0.449999988
nmsCompute.background_label = 0;
nmsCompute.scoreDim = [NSNumber.init(value: score.dim[0]), NSNumber.init(value: score.dim[1]), NSNumber.init(value: score.dim[2])]
nmsCompute.bboxDim = [NSNumber.init(value: bbox.dim[0]), NSNumber.init(value: bbox.dim[1]), NSNumber.init(value: bbox.dim[2])]
guard let result = nmsCompute.compute(withScore: score.result, andBBoxs: bbox.result) else {
fatalError( " result error " )
}
let resultHolder = ResultHolder.init(inResult: result.output, inCapacity: Int(result.outputSize))
// let nmsCompute = NMSCompute.init()
// nmsCompute.scoreThredshold = 0.25
// nmsCompute.nmsTopK = 100
// nmsCompute.keepTopK = 100
// nmsCompute.nmsEta = 1.0
// nmsCompute.nmsThreshold = 0.449999988
// nmsCompute.background_label = 0;
// nmsCompute.scoreDim = [NSNumber.init(value: score.dim[0]), NSNumber.init(value: score.dim[1]), NSNumber.init(value: score.dim[2])]
// nmsCompute.bboxDim = [NSNumber.init(value: bbox.dim[0]), NSNumber.init(value: bbox.dim[1]), NSNumber.init(value: bbox.dim[2])]
// guard let result = nmsCompute.compute(withScore: score.result, andBBoxs: bbox.result) else {
// fatalError( " result error " )
// }
// let resultHolder = ResultHolder.init(inResult: result.output, inCapacity: Int(result.outputSize))
// for i in 0..<Int(result.outputSize) {
//
// print("i \(i) : \(result.output[i])")
......@@ -92,62 +86,63 @@ public class MobileNet_ssd_AR: Net{
// print(Date.init().timeIntervalSince(startDate))
// print(resultHolder.result![0])
return resultHolder
// return resultHolder
}
override func updateProgram(program: Program) {
for i in [56, 66, 76, 86, 93, 99] {
let opDesc = program.programDesc.blocks[0].ops[i]
let output = opDesc.outputs["Out"]!.first!
let v = program.scope[output]!
let originTexture = v as! Texture<Float32>
originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1] / 7, originTexture.tensorDim[0] * 7])
originTexture.dim = Dim.init(inDim: [1, 1, originTexture.dim[3] / 7, originTexture.dim[2] * 7])
originTexture.padToFourDim = Dim.init(inDim: [1, 1, originTexture.padToFourDim[3] / 7, originTexture.padToFourDim[2] * 7])
program.scope[output] = originTexture
if i == 99 {
opDesc.attrs["axis"] = 0
} else {
opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
}
}
for i in [58, 59, 88, 89, 95, 96, 68, 69, 78, 79] {
let opDesc = program.programDesc.blocks[0].ops[i]
let output = opDesc.outputs["Out"]!.first!
let v = program.scope[output]!
let originTexture = v as! Texture<Float32>
originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
}
for i in [60, 101, 90, 97, 70, 80] {
let opDesc = program.programDesc.blocks[0].ops[i]
let output = opDesc.outputs["Out"]!.first!
let v = program.scope[output]!
let originTexture = v as! Texture<Float32>
originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
}
for i in [102] {
let opDesc = program.programDesc.blocks[0].ops[i]
for output in opDesc.outputs["Out"]! {
let v = program.scope[output]!
let originTexture = v as! Texture<Float32>
originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
}
opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
print(" split axis \(opDesc.attrs["axis"])")
}
// override func updateProgram(program: Program) {
// for i in [56, 66, 76, 86, 93, 99] {
// let opDesc = program.programDesc.blocks[0].ops[i]
// let output = opDesc.outputs["Out"]!.first!
// let v = program.scope[output]!
// let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1] / 7, originTexture.tensorDim[0] * 7])
//
// originTexture.dim = Dim.init(inDim: [1, 1, originTexture.dim[3] / 7, originTexture.dim[2] * 7])
//
// originTexture.padToFourDim = Dim.init(inDim: [1, 1, originTexture.padToFourDim[3] / 7, originTexture.padToFourDim[2] * 7])
//
// program.scope[output] = originTexture
//
// if i == 99 {
// opDesc.attrs["axis"] = 0
// } else {
// opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
// }
// }
//
// for i in [58, 59, 88, 89, 95, 96, 68, 69, 78, 79] {
// let opDesc = program.programDesc.blocks[0].ops[i]
// let output = opDesc.outputs["Out"]!.first!
// let v = program.scope[output]!
//
//
//
// let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
// opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
// }
//
// for i in [60, 101, 90, 97, 70, 80] {
// let opDesc = program.programDesc.blocks[0].ops[i]
// let output = opDesc.outputs["Out"]!.first!
// let v = program.scope[output]!
// let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
// opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
// }
//
// for i in [102] {
// let opDesc = program.programDesc.blocks[0].ops[i]
// for output in opDesc.outputs["Out"]! {
// let v = program.scope[output]!
// let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
// }
// opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
// print(" split axis \(opDesc.attrs["axis"])")
// }
// 99
}
// }
}
//
// PaddleMobile.swift
// paddle-mobile-demo
//
// Created by liuRuiLong on 2018/9/5.
// Copyright © 2018年 orange. All rights reserved.
//
import Foundation
......@@ -115,23 +115,3 @@ kernel void mobilent_ar_preprocess_half(texture2d<half, access::read> inTexture
const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
kernel void scale(texture2d<float, access::sample> inTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) return;
float w_stride = inTexture.get_width() / outTexture.get_width();
float h_stride = inTexture.get_height() / outTexture.get_height();
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x * w_stride, gid.y * h_stride), 0);
outTexture.write(input, gid);
}
kernel void scale_half(texture2d<float, access::sample> inTexture [[texture(0)]], texture2d<half, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) return;
float w_stride = inTexture.get_width() / outTexture.get_width();
float h_stride = inTexture.get_height() / outTexture.get_height();
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x * w_stride, gid.y * h_stride), 0);
outTexture.write(half4(input), gid);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Metal
import Foundation
import paddle_mobile
public class YoloNet: Net {
@objc public override init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "yolo_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "yolo_params", ofType: nil) ?! "para null"
inputDim = Dim.init(inDim: [1, 416, 416, 3])
// metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil "
}
override public func resultStr(res: ResultHolder) -> String {
return " \(res.result[0]) ... "
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#import <UIKit/UIKit.h>
@interface LoadPointerViewController : UIViewController
@end
//
// LoadPointerViewController.m
// paddle-mobile-demo
//
// Created by Xiao,Haichun on 2018/9/19.
// Copyright © 2018年 orange. All rights reserved.
//
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#import "PaddleMobileGPU.h"
#import "LoadPointerViewController.h"
#import <Metal/Metal.h>
#import "paddle-mobile-demo-Bridging-Header.h"
#import <Metal/Metal.h>
@interface LoadPointerViewController ()
@property (strong, nonatomic) id<MTLDevice> device;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#import <Foundation/Foundation.h>
NS_ASSUME_NONNULL_BEGIN
@interface OCDemoViewController : NSObject
@end
NS_ASSUME_NONNULL_END
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#import "OCDemoViewController.h"
@implementation OCDemoViewController
@end
......@@ -16,9 +16,8 @@
#import <Foundation/Foundation.h>
typedef enum : NSUInteger {
MobileNetType,
MobileNetSSDType,
GenetType,
SuperResolutionNetType,
MobileNetSSDType
} NetType;
@interface PaddleMobileGPUResult: NSObject
......
......@@ -12,10 +12,10 @@
See the License for the specific language governing permissions and
limitations under the License. */
#import <Foundation/Foundation.h>
#import "PaddleMobileGPU.h"
#import "paddle_mobile.h"
#import <paddle_mobile/paddle_mobile-Swift.h>
#import <Foundation/Foundation.h>
#import <paddle_mobile_demo-Swift.h>
@implementation ModelConfig
@end
......@@ -52,14 +52,12 @@
self = [super init];
if (self) {
Net *net = nil;
if (netType == GenetType) {
net = [[Genet alloc] initWithDevice:queue.device paramPointer:config.paramPointer paramSize:config.paramSize modePointer:config.modelPointer modelSize:config.modelSize];
if (netType == SuperResolutionNetType) {
net = [[SuperResolutionNet alloc] initWithDevice:queue.device];
} else if (netType == MobileNetSSDType) {
net = [[MobileNet_ssd_AR alloc] initWithDevice:queue.device paramPointer:config.paramPointer paramSize:config.paramSize modePointer:config.modelPointer modelSize:config.modelSize];
} else if (netType == MobileNetType) {
}
runner = [[Runner alloc] initInNet:net commandQueue:queue inPlatform:PlatformGPU];
runner = [[Runner alloc] initInNet:net commandQueue:queue];
}
return self;
}
......@@ -69,6 +67,7 @@
}
-(void)predict:(id<MTLTexture>)texture withCompletion:(void (^)(BOOL, NSArray<NSNumber *> *))completion {
[runner predictWithTexture:texture completion:^(BOOL success, ResultHolder * _Nullable result) {
NSMutableArray<NSNumber *> *resultArray = [NSMutableArray arrayWithCapacity:result.capacity];
for (int i = 0; i < result.capacity; ++i) {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
import paddle_mobile
@objc public class SuperResolutionNet: Net{
override public func resultStr(res: ResultHolder) -> String {
return "未实现"
}
@objc override public init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "super_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "super_params", ofType: nil) ?! "para null"
preprocessKernel = nil
inputDim = Dim.init(inDim: [1, 224, 224, 1])
// metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil "
}
override public func updateProgram(program: Program) {
// n h w c
for block in program.programDesc.blocks {
for varDesc in block.vars {
if !varDesc.persistable {
if varDesc.type == .LodTensor {
let varEle = program.scope.vars[varDesc.name]
if let texture = varEle as? Texture {
let newDim = Dim.init(inDim: [texture.dim[0], inputDim[1], inputDim[2], texture.tensorDim[1]])
print(" var desc name " + varDesc.name + " new dim" + "\(newDim)")
texture.updateDims(inTensorDim: Dim.init(inDim: [texture.tensorDim[0], texture.tensorDim[1], inputDim[1], inputDim[2]]), inDim: newDim)
texture.initTexture(device: device, inTranspose: [0, 1, 2, 3], computePrecision: GlobalConfig.shared.computePrecision)
let output: FetchHolder = program.scope.output() as! FetchHolder
output.dim = newDim
output.capacity = newDim.numel()
output.paddedCapacity = newDim.numel() * 4
output.initBuffer(device: device)
}
}
}
}
}
}
}
......@@ -18,31 +18,56 @@ import CoreMedia
import paddle_mobile
import MetalPerformanceShaders
var platform: Platform = .GPU
let threadSupport: [(Platform, String)] = [(.GPU, "GPU"), (.CPU, "CPU")]
class FileReader {
let file: UnsafeMutablePointer<FILE>
let fileSize: Int
init(paramPath: String) throws {
guard let tmpFile = fopen(paramPath, "rb") else {
throw PaddleMobileError.loaderError(message: "open param file error" + paramPath)
}
file = tmpFile
fseek(file, 0, SEEK_END)
fileSize = ftell(file)
guard fileSize > 0 else {
throw PaddleMobileError.loaderError(message: "param file size is too small")
}
rewind(file)
}
func read<T>() -> UnsafeMutablePointer<T> {
let ptr = UnsafeMutablePointer<T>.allocate(capacity: MemoryLayout<T>.size * fileSize)
fread(ptr, fileSize, 1, file)
return ptr
}
deinit {
fclose(file)
}
}
//.mobilenet_ssd : Runner.init(inNet: MobileNet_ssd_hand.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform),
let modelHelperMap: [SupportModel : Runner] = [
.yolo : Runner.init(inNet: YoloNet.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform),
.mobilenet_combined : Runner.init(inNet: MobileNetCombined.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform)]
//, .genet : Genet.init()
//let modelHelperMap: [SupportModel : Net] = [.mobilenet : MobileNet.init(), .mobilenet_ssd : MobileNet_ssd_hand.init()]
enum Platform {
case GPU
}
let netSupport: [SupportModel : Net] = [.yolo : YoloNet.init(device: MetalHelper.shared.device), .mobilenet_combined : MobileNetCombined.init(device: MetalHelper.shared.device)]
let platformSupport: [(Platform, String)] = [(.GPU, "GPU")]
enum SupportModel: String{
// case mobilenet = "mobilenet"
// case mobilenet_ssd = "mobilenetssd"
case yolo = "yolo"
case yolo = "yolo"
case mobilenet_combined = "mobilenet_combined"
case super_resolution = "superresoltion"
case mobilenet = "mobilenet"
static func supportedModels() -> [SupportModel] {
// .mobilenet,
// .mobilenet_ssd,
return [.yolo, .mobilenet_combined]
return [.super_resolution, .yolo, .mobilenet_combined, .mobilenet]
}
}
let netSupport: [SupportModel : Net] = [
.super_resolution : SuperResolutionNet.init(device: MetalHelper.shared.device),
.yolo : YoloNet.init(device: MetalHelper.shared.device),
.mobilenet_combined : MobileNetCombined.init(device: MetalHelper.shared.device),
.mobilenet : MobileNet.init(device: MetalHelper.shared.device)]
class ViewController: UIViewController {
@IBOutlet weak var resultTextView: UITextView!
@IBOutlet weak var selectImageView: UIImageView!
......@@ -50,28 +75,37 @@ class ViewController: UIViewController {
@IBOutlet weak var modelPickerView: UIPickerView!
@IBOutlet weak var threadPickerView: UIPickerView!
@IBOutlet weak var videoView: UIView!
// var videoCapture: VideoCapture!
// var videoCapture: VideoCapture!
var selectImage: UIImage?
var inputPointer: UnsafeMutablePointer<Float32>?
var modelType: SupportModel = SupportModel.supportedModels()[0]
var toPredictTexture: MTLTexture?
var runner: Runner!
var platform: Platform = .GPU
var threadNum = 1
@IBAction func loadAct(_ sender: Any) {
runner = Runner.init(inNet: netSupport[modelType]!, commandQueue: MetalHelper.shared.queue, inPlatform: platform)
if platform == .CPU {
if inputPointer == nil {
inputPointer = runner.preproccess(image: selectImage!.cgImage!)
}
} else if platform == .GPU {
runner = Runner.init(inNet: netSupport[modelType]!, commandQueue: MetalHelper.shared.queue)
if platform == .GPU {
// let filePath = Bundle.main.path(forResource: "mingren_input_data", ofType: nil)
// let fileReader = try! FileReader.init(paramPath: filePath!)
// let pointer: UnsafeMutablePointer<Float32> = fileReader.read()
//
//
// let buffer = MetalHelper.shared.device.makeBuffer(length: fileReader.fileSize, options: .storageModeShared)
//
// buffer?.contents().copyMemory(from: pointer, byteCount: fileReader.fileSize)
if self.toPredictTexture == nil {
runner.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
// runner.getTexture(inBuffer: buffer!) { [weak self] (texture) in
// self?.toPredictTexture = texture
// }
runner.getTexture(image: selectImage!.cgImage!) { [weak self] (texture) in
self?.toPredictTexture = texture
}
}
......@@ -106,27 +140,21 @@ class ViewController: UIViewController {
return
}
// for _ in 0..<1{
// runner.predict(texture: inTexture) { (success, resultHolder) in
// resultHolder?.releasePointer()
// }
// }
let startDate = Date.init()
for i in 0..<max {
runner.predict(texture: inTexture) { [weak self] (success, resultHolder) in
self.runner.predict(texture: inTexture) { [weak self] (success, resultHolder) in
guard let sSelf = self else {
fatalError()
}
if success {
if success, let inResultHolder = resultHolder {
if i == max - 1 {
let time = Date.init().timeIntervalSince(startDate)
print(inResultHolder.result.floatArr(count: inResultHolder.capacity).strideArray())
DispatchQueue.main.async {
// print(resultHolder!.result![0])
sSelf.resultTextView.text = sSelf.runner.net.resultStr(res: resultHolder!)
sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max) * 1000.0) ms"
}
}
}
......@@ -134,39 +162,6 @@ class ViewController: UIViewController {
DispatchQueue.main.async {
resultHolder?.releasePointer()
}
// print("释放")
}
// print("sleep before ")
// usleep(33000)
// print("sleep after ")
}
case .CPU:
guard let inInputPointer = inputPointer else {
fatalError( " need input pointer " )
}
for _ in 0..<10 {
runner.predict(inputPointer: inInputPointer) { (success, res) in
res?.releaseOutput()
}
}
let startDate = Date.init()
for i in 0..<max {
runner.predict(inputPointer: inInputPointer) { [weak self](success, res) in
guard let sSelf = self else {
fatalError()
}
if success {
if i == max - 1 {
let time = Date.init().timeIntervalSince(startDate)
DispatchQueue.main.async {
// sSelf.resultTextView.text = sSelf.runner.net.resultStr(res: res)
sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max) * 1000.0) ms"
}
}
}
res?.releaseOutput()
}
}
}
......@@ -179,37 +174,38 @@ class ViewController: UIViewController {
modelPickerView.dataSource = self
threadPickerView.delegate = self
threadPickerView.dataSource = self
if let image = UIImage.init(named: "test.jpg") {
selectImage = image
selectImageView.image = image
if let image = UIImage.init(named: "classify-img-output.png") {
selectImage = image
selectImageView.image = image
} else {
print("请添加测试图片")
print("请添加测试图片")
}
GlobalConfig.shared.computePrecision = .Float32
// if platform == .CPU {
// inputPointer = runner.preproccess(image: selectImage!.cgImage!)
// } else if platform == .GPU {
// runner.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
// self?.toPredictTexture = texture
// }
// } else {
// fatalError( " unsupport " )
// }
// if platform == .CPU {
// inputPointer = runner.preproccess(image: selectImage!.cgImage!)
// } else if platform == .GPU {
// runner.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
// self?.toPredictTexture = texture
// }
// } else {
// fatalError( " unsupport " )
// }
// videoCapture = VideoCapture.init(device: MetalHelper.shared.device, orientation: .portrait, position: .back)
// videoCapture.fps = 30
// videoCapture.delegate = self
// videoCapture.setUp { (success) in
// DispatchQueue.main.async {
// if let preViewLayer = self.videoCapture.previewLayer {
// self.videoView.layer.addSublayer(preViewLayer)
// self.videoCapture.previewLayer?.frame = self.videoView.bounds
// }
// self.videoCapture.start()
// }
// }
// videoCapture = VideoCapture.init(device: MetalHelper.shared.device, orientation: .portrait, position: .back)
// videoCapture.fps = 30
// videoCapture.delegate = self
// videoCapture.setUp { (success) in
// DispatchQueue.main.async {
// if let preViewLayer = self.videoCapture.previewLayer {
// self.videoView.layer.addSublayer(preViewLayer)
// self.videoCapture.previewLayer?.frame = self.videoView.bounds
// }
// self.videoCapture.start()
// }
// }
}
}
......@@ -228,7 +224,7 @@ extension ViewController: UIPickerViewDataSource, UIPickerViewDelegate{
if pickerView == modelPickerView {
return SupportModel.supportedModels().count
} else if pickerView == threadPickerView {
return threadSupport.count
return platformSupport.count
} else {
fatalError()
}
......@@ -238,7 +234,7 @@ extension ViewController: UIPickerViewDataSource, UIPickerViewDelegate{
if pickerView == modelPickerView {
return SupportModel.supportedModels()[row].rawValue
} else if pickerView == threadPickerView {
return threadSupport[row].1
return platformSupport[row].1
} else {
fatalError()
}
......@@ -248,8 +244,7 @@ extension ViewController: UIPickerViewDataSource, UIPickerViewDelegate{
if pickerView == modelPickerView {
self.modelType = SupportModel.supportedModels()[row]
} else if pickerView == threadPickerView {
platform = threadSupport[row].0
platform = platformSupport[row].0
} else {
fatalError()
}
......@@ -276,25 +271,11 @@ extension ViewController: VideoCaptureDelegate{
func predictTexture(texture: MTLTexture){
runner.scaleTexture(input: texture) { (scaledTexture) in
self.runner.predict(texture: scaledTexture, completion: { (success, resultHolder) in
// print(resultHolder!.result![0])
// print(resultHolder!.result![0])
resultHolder?.releasePointer()
})
}
}
// @available(iOS 10.0, *)
// func videoCapture(_ capture: VideoCapture, didCaptureVideoTexture texture: MTLTexture?, timestamp: CMTime) {
//// if !bool1 {
//// DispatchQueue.main.asyncAfter(deadline: DispatchTime.init(uptimeNanoseconds: 500000000)) {
// self.predictTexture(texture: texture!)
//// }
//
//
//// bool1 = true
//// }
//
// }
}
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
#define CONCAT2_(a, b) a ## _ ## b
#define CONCAT2(a, b) a ## b
#define FUNC(m, n, q) CONCAT3_(m, n, q)
#define FUNC_T(m, n) CONCAT2_(m, n)
#define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC_T(fetch, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
int input_height = inTexture.get_height();
const VECTOR(P, 4) input = inTexture.read(gid.xy, gid.z);
int output_to = 4 * input_width * input_height;
output[gid.z * output_to + 0 * input_width * input_height + gid.y * input_width + gid.x] = input.x;
output[gid.z * output_to + 1 * input_width * input_height + gid.y * input_width + gid.x] = input.y;
output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z;
output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w;
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
#define P float
#include "FetchKernel.inc.metal"
#undef P
#define P half
#include "FetchKernel.inc.metal"
#undef P
kernel void fetch_placeholder(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
}
kernel void fetch_placeholder_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
}
......@@ -24,6 +24,6 @@ using namespace metal;
#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p)
#define VECTOR(p, n) CONCAT2(p, n)
#define FUNC2_(a, b) CONCAT2_(a, b)
#define FUNC3_(a, b, c) CONCAT3_(a, b, c)
//
// PoolKernel.inc.metal
// paddle-mobile
//
// Created by liuRuiLong on 2018/12/29.
// Copyright © 2018 orange. All rights reserved.
//
#ifdef P
kernel void FUNC2_(pool, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant PoolParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
int xmin = gid.x * pm.strideX - pm.paddingX;
int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
xmin = max(xmin, 0);
int ymin = gid.y * pm.strideX - pm.paddingX;
int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
ymin = max(ymin, 0);
VECTOR(P, 4) r = 0;
if (pm.poolType == 0) {
r = inTexture.read(uint2(xmin, ymin), gid.z);
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r = fmax(r, inTexture.read(uint2(x, y), gid.z));
}
}
} else if (pm.poolType == 1) {
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r += inTexture.read(uint2(x, y), gid.z);
}
}
r /= (xmax - xmin) * (ymax - ymin);
}
outTexture.write(r, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Macro.metal"
using namespace metal;
struct PoolParam {
int ksizeX;
int ksizeY;
int strideX;
int strideY;
int paddingX;
int paddingY;
int poolType;
};
#define P float
#import "PoolKernel.inc.metal"
#undef P
#define P half
#import "PoolKernel.inc.metal"
#undef P
......@@ -16,7 +16,6 @@
4AA1EA92214665D700D0F791 /* ShapeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA91214665D700D0F791 /* ShapeOp.swift */; };
4AA1EA942146661500D0F791 /* ShapeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA932146661500D0F791 /* ShapeKernel.swift */; };
4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA972146666500D0F791 /* FlattenOp.swift */; };
4AA1EA9E2148D6F900D0F791 /* ConcatKernel.inc.metal in Headers */ = {isa = PBXBuildFile; fileRef = 4AA1EA9D2148D6F900D0F791 /* ConcatKernel.inc.metal */; };
4AA1EAA02148DEEE00D0F791 /* ReshapeKernel.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA9F2148DEEE00D0F791 /* ReshapeKernel.inc.metal */; };
4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA1214912CC00D0F791 /* FlattenKernel.swift */; };
4AA1EAA4214A295C00D0F791 /* Split.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA3214A295C00D0F791 /* Split.inc.metal */; };
......@@ -29,8 +28,6 @@
4AF9287921341661005B6C3A /* Softmax.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF9287821341661005B6C3A /* Softmax.metal */; };
4AF928822135673D005B6C3A /* ConcatKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF928812135673D005B6C3A /* ConcatKernel.metal */; };
4AF9288421357BE3005B6C3A /* Elementwise.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF9288321357BE3005B6C3A /* Elementwise.metal */; };
C28FDF8421B7858F0054EFAC /* MobileNetCombined.swift in Sources */ = {isa = PBXBuildFile; fileRef = C28FDF8221B7858F0054EFAC /* MobileNetCombined.swift */; };
C28FDF8521B7858F0054EFAC /* YoloNet.swift in Sources */ = {isa = PBXBuildFile; fileRef = C28FDF8321B7858F0054EFAC /* YoloNet.swift */; };
C28FE02F21BA68C00054EFAC /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C28FE02C21BA68C00054EFAC /* Metal.framework */; };
C28FE03021BA68C00054EFAC /* MetalPerformanceShaders.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C28FE02D21BA68C00054EFAC /* MetalPerformanceShaders.framework */; };
C28FE03121BA68C00054EFAC /* MetalKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C28FE02E21BA68C00054EFAC /* MetalKit.framework */; };
......@@ -64,19 +61,15 @@
FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */; };
FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */; };
FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC1B16B220EC9A4F00678B91 /* Kernels.metal */; };
FC292C5421421B2F00CF622F /* PaddleMobileGPU.h in Headers */ = {isa = PBXBuildFile; fileRef = FC292C5321421B2E00CF622F /* PaddleMobileGPU.h */; settings = {ATTRIBUTES = (Public, ); }; };
FC292C5621421B4600CF622F /* PaddleMobileGPU.m in Sources */ = {isa = PBXBuildFile; fileRef = FC292C5521421B4600CF622F /* PaddleMobileGPU.m */; };
FC292C81214255BD00CF622F /* CPUCompute.mm in Sources */ = {isa = PBXBuildFile; fileRef = FC292C7C214255BC00CF622F /* CPUCompute.mm */; };
FC292C82214255BD00CF622F /* MobileNetSSD.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC292C7E214255BC00CF622F /* MobileNetSSD.swift */; };
FC292C85214257CB00CF622F /* CPUCompute.h in Headers */ = {isa = PBXBuildFile; fileRef = FC292C7D214255BC00CF622F /* CPUCompute.h */; settings = {ATTRIBUTES = (Public, ); }; };
FC292C872142624800CF622F /* Genet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC292C862142624800CF622F /* Genet.swift */; };
FC33B0F02147659000714A93 /* MobileNet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC33B0EF2147659000714A93 /* MobileNet.swift */; };
FC1CF3F721D4B4C400F7392E /* Runner.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC1CF3F621D4B4C400F7392E /* Runner.swift */; };
FC2BFCC221DF2F9100C262B2 /* GlobalConfig.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFCC121DF2F9100C262B2 /* GlobalConfig.swift */; };
FC2BFD4621DF685F00C262B2 /* Scale.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD4521DF685F00C262B2 /* Scale.swift */; };
FC2BFD4A21DF81DE00C262B2 /* Kernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD4921DF81DE00C262B2 /* Kernel.swift */; };
FC2BFD4E21DF820B00C262B2 /* ConvAddBatchNormReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD4D21DF820A00C262B2 /* ConvAddBatchNormReluOp.swift */; };
FC2BFD5121DF8E0400C262B2 /* Scale.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD5021DF8E0400C262B2 /* Scale.metal */; };
FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */; };
FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74820F0B954007C0C6D /* ConvKernel.metal */; };
FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */; };
FC4FD9752140E1DE0073E130 /* PaddleMobile.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */; };
FC4FD9792140E4980073E130 /* PaddleMobileCPU.h in Headers */ = {isa = PBXBuildFile; fileRef = FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */; settings = {ATTRIBUTES = (Public, ); }; };
FC4FD97A2140E4980073E130 /* libpaddle-mobile.a in Frameworks */ = {isa = PBXBuildFile; fileRef = FC4FD9782140E4980073E130 /* libpaddle-mobile.a */; };
FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */; };
FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC60DB8820E9AAA500FF203F /* MetalExtension.swift */; };
FC803BBF214CB65A0094B8E5 /* ConvAddPreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BBE214CB65A0094B8E5 /* ConvAddPreluOp.swift */; };
......@@ -86,7 +79,9 @@
FC803BC7214CBA820094B8E5 /* Macro.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC6214CBA820094B8E5 /* Macro.metal */; };
FC803BC9214CFC8D0094B8E5 /* FetchKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */; };
FC82735920E3C04200BE430A /* OpCreator.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC82735820E3C04200BE430A /* OpCreator.swift */; };
FC9A19E32148C31300CD9CBF /* MobilenetSSD_AR.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9A19E22148C31300CD9CBF /* MobilenetSSD_AR.swift */; };
FC9797C921D6101D00F2FD90 /* ResizeBilinearOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9797C821D6101D00F2FD90 /* ResizeBilinearOp.swift */; };
FC9797CB21D6102D00F2FD90 /* ResizeBilinearKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9797CA21D6102D00F2FD90 /* ResizeBilinearKernel.swift */; };
FC9C2A0D21D3D185005856C6 /* FetchKernel.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC9C2A0C21D3D185005856C6 /* FetchKernel.inc.metal */; };
FC9D037920E229E4000F735A /* OpParam.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037820E229E4000F735A /* OpParam.swift */; };
FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037F20E22FBB000F735A /* FeedOp.swift */; };
FC9D038220E2312E000F735A /* FetchOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D038120E2312E000F735A /* FetchOp.swift */; };
......@@ -97,6 +92,7 @@
FCA67CD52138272900BD58AA /* ConvAddMetal.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD42138272900BD58AA /* ConvAddMetal.metal */; };
FCA67CD7213827AC00BD58AA /* ConvAddBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */; };
FCA67CD92138287B00BD58AA /* ConvBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */; };
FCB40E5921E0DCAB0075EC91 /* FetchKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCB40E5821E0DCAB0075EC91 /* FetchKernel.swift */; };
FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */; };
FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */; };
FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */; };
......@@ -111,6 +107,7 @@
FCBCCC6D2123073A00D94F7E /* BoxcoderKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */; };
FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */; };
FCBCCC71212309A700D94F7E /* MulticlassNMSKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */; };
FCCED5E121D71FC000BE8D5F /* PoolKernel.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCCED5E021D71FC000BE8D5F /* PoolKernel.inc.metal */; };
FCD04E6620F314C50007374F /* PoolOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6520F314C50007374F /* PoolOp.swift */; };
FCD04E6820F315020007374F /* PoolKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6720F315020007374F /* PoolKernel.swift */; };
FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6920F319EC0007374F /* SoftmaxOp.swift */; };
......@@ -136,9 +133,7 @@
FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */; };
FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCEB6849212F00DB00D2448E /* PreluKernel.metal */; };
FCEB684C212F093800D2448E /* PreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEB684B212F093800D2448E /* PreluOp.swift */; };
FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */; };
FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */; };
FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF2D73720E64E70007AC5F5 /* Kernel.swift */; };
/* End PBXBuildFile section */
/* Begin PBXFileReference section */
......@@ -164,8 +159,6 @@
4AF9287821341661005B6C3A /* Softmax.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Softmax.metal; sourceTree = "<group>"; };
4AF928812135673D005B6C3A /* ConcatKernel.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = ConcatKernel.metal; sourceTree = "<group>"; };
4AF9288321357BE3005B6C3A /* Elementwise.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Elementwise.metal; sourceTree = "<group>"; };
C28FDF8221B7858F0054EFAC /* MobileNetCombined.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MobileNetCombined.swift; sourceTree = "<group>"; };
C28FDF8321B7858F0054EFAC /* YoloNet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = YoloNet.swift; sourceTree = "<group>"; };
C28FE02C21BA68C00054EFAC /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; };
C28FE02D21BA68C00054EFAC /* MetalPerformanceShaders.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = MetalPerformanceShaders.framework; path = System/Library/Frameworks/MetalPerformanceShaders.framework; sourceTree = SDKROOT; };
C28FE02E21BA68C00054EFAC /* MetalKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = MetalKit.framework; path = System/Library/Frameworks/MetalKit.framework; sourceTree = SDKROOT; };
......@@ -203,19 +196,15 @@
FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BatchNormKernel.swift; sourceTree = "<group>"; };
FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddKernel.swift; sourceTree = "<group>"; };
FC1B16B220EC9A4F00678B91 /* Kernels.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Kernels.metal; sourceTree = "<group>"; };
FC292C5321421B2E00CF622F /* PaddleMobileGPU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PaddleMobileGPU.h; sourceTree = "<group>"; };
FC292C5521421B4600CF622F /* PaddleMobileGPU.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = PaddleMobileGPU.m; sourceTree = "<group>"; };
FC292C7C214255BC00CF622F /* CPUCompute.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = CPUCompute.mm; sourceTree = "<group>"; };
FC292C7D214255BC00CF622F /* CPUCompute.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = CPUCompute.h; sourceTree = "<group>"; };
FC292C7E214255BC00CF622F /* MobileNetSSD.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MobileNetSSD.swift; sourceTree = "<group>"; };
FC292C862142624800CF622F /* Genet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Genet.swift; sourceTree = "<group>"; };
FC33B0EF2147659000714A93 /* MobileNet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MobileNet.swift; sourceTree = "<group>"; };
FC1CF3F621D4B4C400F7392E /* Runner.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Runner.swift; sourceTree = "<group>"; };
FC2BFCC121DF2F9100C262B2 /* GlobalConfig.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GlobalConfig.swift; sourceTree = "<group>"; };
FC2BFD4521DF685F00C262B2 /* Scale.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Scale.swift; sourceTree = "<group>"; };
FC2BFD4921DF81DE00C262B2 /* Kernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Kernel.swift; sourceTree = "<group>"; };
FC2BFD4D21DF820A00C262B2 /* ConvAddBatchNormReluOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ConvAddBatchNormReluOp.swift; sourceTree = "<group>"; };
FC2BFD5021DF8E0400C262B2 /* Scale.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Scale.metal; sourceTree = "<group>"; };
FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PaddleMobileUnitTest.swift; sourceTree = "<group>"; };
FC4CB74820F0B954007C0C6D /* ConvKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvKernel.metal; sourceTree = "<group>"; };
FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProgramOptimize.swift; sourceTree = "<group>"; };
FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PaddleMobile.swift; sourceTree = "<group>"; };
FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PaddleMobileCPU.h; sourceTree = "<group>"; };
FC4FD9782140E4980073E130 /* libpaddle-mobile.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; path = "libpaddle-mobile.a"; sourceTree = "<group>"; };
FC4FD97D2140F2C30073E130 /* libstdc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libstdc++.tbd"; path = "usr/lib/libstdc++.tbd"; sourceTree = SDKROOT; };
FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture2DTo2DArrayKernel.swift; sourceTree = "<group>"; };
FC60DB8820E9AAA500FF203F /* MetalExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MetalExtension.swift; sourceTree = "<group>"; };
......@@ -226,7 +215,9 @@
FC803BC6214CBA820094B8E5 /* Macro.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Macro.metal; sourceTree = "<group>"; };
FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = FetchKernel.metal; sourceTree = "<group>"; };
FC82735820E3C04200BE430A /* OpCreator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpCreator.swift; sourceTree = "<group>"; };
FC9A19E22148C31300CD9CBF /* MobilenetSSD_AR.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MobilenetSSD_AR.swift; sourceTree = "<group>"; };
FC9797C821D6101D00F2FD90 /* ResizeBilinearOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ResizeBilinearOp.swift; sourceTree = "<group>"; };
FC9797CA21D6102D00F2FD90 /* ResizeBilinearKernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ResizeBilinearKernel.swift; sourceTree = "<group>"; };
FC9C2A0C21D3D185005856C6 /* FetchKernel.inc.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = FetchKernel.inc.metal; sourceTree = "<group>"; };
FC9D037820E229E4000F735A /* OpParam.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpParam.swift; sourceTree = "<group>"; };
FC9D037F20E22FBB000F735A /* FeedOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeedOp.swift; sourceTree = "<group>"; };
FC9D038120E2312E000F735A /* FetchOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FetchOp.swift; sourceTree = "<group>"; };
......@@ -237,6 +228,7 @@
FCA67CD42138272900BD58AA /* ConvAddMetal.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddMetal.metal; sourceTree = "<group>"; };
FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddBNReluKernel.metal; sourceTree = "<group>"; };
FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvBNReluKernel.metal; sourceTree = "<group>"; };
FCB40E5821E0DCAB0075EC91 /* FetchKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FetchKernel.swift; sourceTree = "<group>"; };
FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DwConvBNReluOp.swift; sourceTree = "<group>"; };
FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluOp.swift; sourceTree = "<group>"; };
FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluKernel.swift; sourceTree = "<group>"; };
......@@ -251,6 +243,7 @@
FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BoxcoderKernel.swift; sourceTree = "<group>"; };
FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MulticlassNMSOp.swift; sourceTree = "<group>"; };
FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MulticlassNMSKernel.swift; sourceTree = "<group>"; };
FCCED5E021D71FC000BE8D5F /* PoolKernel.inc.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PoolKernel.inc.metal; sourceTree = "<group>"; };
FCD04E6520F314C50007374F /* PoolOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolOp.swift; sourceTree = "<group>"; };
FCD04E6720F315020007374F /* PoolKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolKernel.swift; sourceTree = "<group>"; };
FCD04E6920F319EC0007374F /* SoftmaxOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SoftmaxOp.swift; sourceTree = "<group>"; };
......@@ -276,9 +269,7 @@
FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = NMSFetchResultKernel.metal; sourceTree = "<group>"; };
FCEB6849212F00DB00D2448E /* PreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreluKernel.metal; sourceTree = "<group>"; };
FCEB684B212F093800D2448E /* PreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PreluOp.swift; sourceTree = "<group>"; };
FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = ConvAddBatchNormReluOp.swift; path = "paddle-mobile/Operators/ConvAddBatchNormReluOp.swift"; sourceTree = SOURCE_ROOT; };
FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddBatchNormReluKernel.swift; sourceTree = "<group>"; };
FCF2D73720E64E70007AC5F5 /* Kernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = Kernel.swift; path = "paddle-mobile/Operators/Kernels/Base/Kernel.swift"; sourceTree = SOURCE_ROOT; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
......@@ -290,7 +281,6 @@
C28FE03021BA68C00054EFAC /* MetalPerformanceShaders.framework in Frameworks */,
C28FE03121BA68C00054EFAC /* MetalKit.framework in Frameworks */,
D3831F70E7E0B565B9AC22DA /* Pods_paddle_mobile.framework in Frameworks */,
FC4FD97A2140E4980073E130 /* libpaddle-mobile.a in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
......@@ -339,23 +329,8 @@
FC039B6C20E11C3C0081E9F8 /* paddle-mobile */ = {
isa = PBXGroup;
children = (
C28FDF8221B7858F0054EFAC /* MobileNetCombined.swift */,
C28FDF8321B7858F0054EFAC /* YoloNet.swift */,
FCE9D7B6214F869000B520C3 /* Net.swift */,
FC9A19E22148C31300CD9CBF /* MobilenetSSD_AR.swift */,
FC33B0EF2147659000714A93 /* MobileNet.swift */,
FC292C862142624800CF622F /* Genet.swift */,
FC292C7E214255BC00CF622F /* MobileNetSSD.swift */,
FC292C7C214255BC00CF622F /* CPUCompute.mm */,
FC292C7D214255BC00CF622F /* CPUCompute.h */,
FC292C5521421B4600CF622F /* PaddleMobileGPU.m */,
FC292C5321421B2E00CF622F /* PaddleMobileGPU.h */,
FC4FD9762140E4920073E130 /* CPU */,
FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */,
FC039BAE20E11CC20081E9F8 /* Program */,
FC039BA320E11CBC0081E9F8 /* Operators */,
FC039B9C20E11CB20081E9F8 /* framework */,
FC039B9320E11C9A0081E9F8 /* Common */,
FC2BFD4721DF818000C262B2 /* API */,
FC2BFD4821DF818000C262B2 /* Src */,
FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */,
FC039B6E20E11C3C0081E9F8 /* Info.plist */,
);
......@@ -375,7 +350,7 @@
path = Common;
sourceTree = "<group>";
};
FC039B9C20E11CB20081E9F8 /* framework */ = {
FC039B9C20E11CB20081E9F8 /* Framework */ = {
isa = PBXGroup;
children = (
FC039BA120E11CB70081E9F8 /* Loader.swift */,
......@@ -384,7 +359,7 @@
FC039B9E20E11CB20081E9F8 /* Dim.swift */,
FC9D038320E23B01000F735A /* Texture.swift */,
);
path = framework;
path = Framework;
sourceTree = "<group>";
};
FC039BA320E11CBC0081E9F8 /* Operators */ = {
......@@ -392,7 +367,8 @@
children = (
FC086BA520E67E8500D85EF7 /* Kernels */,
FCD592FA20E248EC00252966 /* Base */,
FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */,
FC9797C821D6101D00F2FD90 /* ResizeBilinearOp.swift */,
FC2BFD4D21DF820A00C262B2 /* ConvAddBatchNormReluOp.swift */,
FC039BA420E11CBC0081E9F8 /* ConvOp.swift */,
FC039BA520E11CBC0081E9F8 /* ElementwiseAddOp.swift */,
FC039BA720E11CBC0081E9F8 /* BatchNormOp.swift */,
......@@ -446,6 +422,7 @@
children = (
FCDDC6CD212FE02100E5EF74 /* Base */,
FCEB6837212F00B100D2448E /* metal */,
FC9797CA21D6102D00F2FD90 /* ResizeBilinearKernel.swift */,
FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */,
FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */,
FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */,
......@@ -471,17 +448,31 @@
FC803BC0214CB77A0094B8E5 /* ConvAddPreluKernel.swift */,
FCE3A1AA2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift */,
FCE3A1AE2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift */,
FC2BFD4521DF685F00C262B2 /* Scale.swift */,
FCB40E5821E0DCAB0075EC91 /* FetchKernel.swift */,
);
path = Kernels;
sourceTree = "<group>";
};
FC4FD9762140E4920073E130 /* CPU */ = {
FC2BFD4721DF818000C262B2 /* API */ = {
isa = PBXGroup;
children = (
FC4FD9782140E4980073E130 /* libpaddle-mobile.a */,
FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */,
FCE9D7B6214F869000B520C3 /* Net.swift */,
FC1CF3F621D4B4C400F7392E /* Runner.swift */,
FC2BFCC121DF2F9100C262B2 /* GlobalConfig.swift */,
);
path = API;
sourceTree = "<group>";
};
FC2BFD4821DF818000C262B2 /* Src */ = {
isa = PBXGroup;
children = (
FC039BAE20E11CC20081E9F8 /* Program */,
FC039BA320E11CBC0081E9F8 /* Operators */,
FC039B9C20E11CB20081E9F8 /* Framework */,
FC039B9320E11C9A0081E9F8 /* Common */,
);
path = CPU;
path = Src;
sourceTree = "<group>";
};
FCD592FA20E248EC00252966 /* Base */ = {
......@@ -497,7 +488,7 @@
FCDDC6CD212FE02100E5EF74 /* Base */ = {
isa = PBXGroup;
children = (
FCF2D73720E64E70007AC5F5 /* Kernel.swift */,
FC2BFD4921DF81DE00C262B2 /* Kernel.swift */,
);
path = Base;
sourceTree = "<group>";
......@@ -533,13 +524,16 @@
FC0226552138F33800F395E2 /* TransposeKernel.metal */,
4AA1EAAD214F5FD900D0F791 /* TransposeKernel.inc.metal */,
FC0226572138F38D00F395E2 /* PoolKernel.metal */,
FCCED5E021D71FC000BE8D5F /* PoolKernel.inc.metal */,
FC803BC2214CB79C0094B8E5 /* ConvAddPreluKernel.metal */,
FC803BC4214CB8F00094B8E5 /* ConvAddPrelu.inc.metal */,
FC803BC6214CBA820094B8E5 /* Macro.metal */,
FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */,
FC9C2A0C21D3D185005856C6 /* FetchKernel.inc.metal */,
FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */,
FCE3A1B02153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal */,
FCE3A1B22153E91900C37CDE /* ElementwiseAddPreluKernel.metal */,
FC2BFD5021DF8E0400C262B2 /* Scale.metal */,
);
path = metal;
sourceTree = "<group>";
......@@ -551,10 +545,6 @@
isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647;
files = (
FC4FD9792140E4980073E130 /* PaddleMobileCPU.h in Headers */,
FC292C85214257CB00CF622F /* CPUCompute.h in Headers */,
FC292C5421421B2F00CF622F /* PaddleMobileGPU.h in Headers */,
4AA1EA9E2148D6F900D0F791 /* ConcatKernel.inc.metal in Headers */,
FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
......@@ -650,6 +640,7 @@
buildActionMask = 2147483647;
files = (
FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */,
FC9C2A0D21D3D185005856C6 /* FetchKernel.inc.metal in Sources */,
4AA1EAAA214F53D800D0F791 /* BoxCoder.inc.metal in Sources */,
FC039B9F20E11CB20081E9F8 /* Tensor.swift in Sources */,
FC803BC9214CFC8D0094B8E5 /* FetchKernel.metal in Sources */,
......@@ -672,20 +663,17 @@
FCE3A1AB2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift in Sources */,
FC9D037920E229E4000F735A /* OpParam.swift in Sources */,
FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */,
FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */,
FCDDC6CC212FDFDB00E5EF74 /* ReluKernel.metal in Sources */,
FC0226562138F33800F395E2 /* TransposeKernel.metal in Sources */,
FCDDC6C6212F9FB800E5EF74 /* PreluKernel.swift in Sources */,
FC9797CB21D6102D00F2FD90 /* ResizeBilinearKernel.swift in Sources */,
FCA67CD52138272900BD58AA /* ConvAddMetal.metal in Sources */,
FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */,
4AA1EA8C2146640900D0F791 /* SplitOp.swift in Sources */,
FC292C81214255BD00CF622F /* CPUCompute.mm in Sources */,
FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */,
4AA1EAAC214F55C800D0F791 /* Softmax.inc.metal in Sources */,
FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */,
4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */,
FC803BBF214CB65A0094B8E5 /* ConvAddPreluOp.swift in Sources */,
FC33B0F02147659000714A93 /* MobileNet.swift in Sources */,
FCEB684C212F093800D2448E /* PreluOp.swift in Sources */,
4AA1EAA8214B7AFB00D0F791 /* BilinearInterp.inc.metal in Sources */,
FCA67CD92138287B00BD58AA /* ConvBNReluKernel.metal in Sources */,
......@@ -697,17 +685,19 @@
FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */,
FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */,
FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */,
C28FDF8521B7858F0054EFAC /* YoloNet.swift in Sources */,
FC039B9920E11C9A0081E9F8 /* Types.swift in Sources */,
FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */,
FCA3A1632132A4AC00084FE5 /* ReshapeKernel.metal in Sources */,
FC4FD9752140E1DE0073E130 /* PaddleMobile.swift in Sources */,
FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */,
FC039BA920E11CBC0081E9F8 /* ConvOp.swift in Sources */,
FCCED5E121D71FC000BE8D5F /* PoolKernel.inc.metal in Sources */,
FC2BFD4A21DF81DE00C262B2 /* Kernel.swift in Sources */,
FC9D038420E23B01000F735A /* Texture.swift in Sources */,
FCE3A1B32153E91900C37CDE /* ElementwiseAddPreluKernel.metal in Sources */,
FC2BFD4E21DF820B00C262B2 /* ConvAddBatchNormReluOp.swift in Sources */,
4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */,
4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */,
FC2BFCC221DF2F9100C262B2 /* GlobalConfig.swift in Sources */,
FCBCCC652122FCD700D94F7E /* TransposeOp.swift in Sources */,
4AA1EAA6214B5F6800D0F791 /* Shape.metal in Sources */,
FCD04E6E20F31B4B0007374F /* ReshapeOp.swift in Sources */,
......@@ -715,14 +705,13 @@
FC039BBF20E11CC20081E9F8 /* Attribute.swift in Sources */,
4AA1EA8E2146647F00D0F791 /* SplitKernel.swift in Sources */,
FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */,
FC1CF3F721D4B4C400F7392E /* Runner.swift in Sources */,
FC039BB920E11CC20081E9F8 /* Scope.swift in Sources */,
FC292C5621421B4600CF622F /* PaddleMobileGPU.m in Sources */,
FCD04E6620F314C50007374F /* PoolOp.swift in Sources */,
FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */,
FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */,
FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */,
FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */,
FC292C872142624800CF622F /* Genet.swift in Sources */,
FC803BC5214CB8F00094B8E5 /* ConvAddPrelu.inc.metal in Sources */,
4AF928822135673D005B6C3A /* ConcatKernel.metal in Sources */,
FCBCCC632122FCC000D94F7E /* TransposeKernel.swift in Sources */,
......@@ -731,6 +720,7 @@
FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */,
4AA1EA862146625E00D0F791 /* BilinearInterpOp.swift in Sources */,
FCBCCC6D2123073A00D94F7E /* BoxcoderKernel.swift in Sources */,
FCB40E5921E0DCAB0075EC91 /* FetchKernel.swift in Sources */,
FCBCCC69212306D300D94F7E /* ConcatKernel.swift in Sources */,
FCDDC6C8212FA3CA00E5EF74 /* ConvTransposeKernel.swift in Sources */,
FC82735920E3C04200BE430A /* OpCreator.swift in Sources */,
......@@ -742,20 +732,18 @@
FCE9D7B7214F869000B520C3 /* Net.swift in Sources */,
FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */,
FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */,
C28FDF8421B7858F0054EFAC /* MobileNetCombined.swift in Sources */,
FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */,
FC292C82214255BD00CF622F /* MobileNetSSD.swift in Sources */,
FCBCCC612122FBDF00D94F7E /* PriorBoxKernel.swift in Sources */,
FCBCCC5F2122FB3B00D94F7E /* PriorBoxOp.swift in Sources */,
FC9D038220E2312E000F735A /* FetchOp.swift in Sources */,
FCA67B1721364EF000BD58AA /* ConvTransposeKernel.metal in Sources */,
FC039BBD20E11CC20081E9F8 /* Program.swift in Sources */,
FC2BFD5121DF8E0400C262B2 /* Scale.metal in Sources */,
FC039BA220E11CB70081E9F8 /* Loader.swift in Sources */,
FCBCCC67212306B000D94F7E /* ConcatOp.swift in Sources */,
FCD04E6C20F31A280007374F /* SoftmaxKernel.swift in Sources */,
FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */,
4AA1EAA02148DEEE00D0F791 /* ReshapeKernel.inc.metal in Sources */,
FC9A19E32148C31300CD9CBF /* MobilenetSSD_AR.swift in Sources */,
FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */,
FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */,
FCE3A1A92153DE5100C37CDE /* ConvAddAddPreluOp.swift in Sources */,
......@@ -769,7 +757,9 @@
FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */,
FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */,
FC039BBE20E11CC20081E9F8 /* OpDesc.swift in Sources */,
FC9797C921D6101D00F2FD90 /* ResizeBilinearOp.swift in Sources */,
4AA1EA88214662BD00D0F791 /* BilinearInterpKernel.swift in Sources */,
FC2BFD4621DF685F00C262B2 /* Scale.swift in Sources */,
FC039B9720E11C9A0081E9F8 /* Extensions.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
......@@ -909,7 +899,7 @@
DYLIB_COMPATIBILITY_VERSION = 1;
DYLIB_CURRENT_VERSION = 1;
DYLIB_INSTALL_NAME_BASE = "@rpath";
ENABLE_BITCODE = NO;
ENABLE_BITCODE = YES;
INFOPLIST_FILE = "paddle-mobile/Info.plist";
INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
IPHONEOS_DEPLOYMENT_TARGET = 9.0;
......@@ -946,7 +936,7 @@
DYLIB_COMPATIBILITY_VERSION = 1;
DYLIB_CURRENT_VERSION = 1;
DYLIB_INSTALL_NAME_BASE = "@rpath";
ENABLE_BITCODE = NO;
ENABLE_BITCODE = YES;
INFOPLIST_FILE = "paddle-mobile/Info.plist";
INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
IPHONEOS_DEPLOYMENT_TARGET = 9.0;
......
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1010"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B6920E11C3C0081E9F8"
BuildableName = "paddle_mobile.framework"
BlueprintName = "paddle-mobile"
ReferencedContainer = "container:paddle-mobile.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
<AdditionalOptions>
</AdditionalOptions>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
<MacroExpansion>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B6920E11C3C0081E9F8"
BuildableName = "paddle_mobile.framework"
BlueprintName = "paddle-mobile"
ReferencedContainer = "container:paddle-mobile.xcodeproj">
</BuildableReference>
</MacroExpansion>
<AdditionalOptions>
</AdditionalOptions>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<MacroExpansion>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B6920E11C3C0081E9F8"
BuildableName = "paddle_mobile.framework"
BlueprintName = "paddle-mobile"
ReferencedContainer = "container:paddle-mobile.xcodeproj">
</BuildableReference>
</MacroExpansion>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
@objc public enum MetalLoadMode: Int {
case
LoadMetalInPaddleMobile = 1, // 使用 paddle-mobile 中的 metal 代码
LoadMetalInDefaultLib = 2, // 使用 main bundle 中的 metal 代码
LoadMetalInCustomMetalLib = 3 // 使用 metal 库文件
}
@objc public enum ComputePrecision: Int {
case
Float32 = 1,
Float16 = 2
}
@objc public class GlobalConfig: NSObject {
/// 单例
@objc public static let shared: GlobalConfig = GlobalConfig.init()
/// 运算精度, runner 生命周期中不可变
@objc public var computePrecision: ComputePrecision = .Float16
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Metal
import Foundation
/// 网络的基类, 参数已经给了默认值,请在子类实现中修改需要改的参数
@objc open class Net: NSObject {
/// 默认为0, 如果指定个数, 后边 except 个op不使用 GPU 运算, 中间结果会通过 fetchResult 传参过来
@objc public var except: Int = 0
/// 预处理 kernel, 如果输入图像需要预处理, 则指定预处理 kernel
@objc public var preprocessKernel: CusomKernel? = nil
// 以下四个参数为从内存中读取模型时用到的参数
/// 模型在内存中的指针
@objc public var modelPointer: UnsafeMutableRawPointer? = nil
/// 模型大小 单位: 字节
@objc public var modelSize: Int = 0
/// 权重参数在内存中的指针
@objc public var paramPointer: UnsafeMutableRawPointer? = nil
/// 权重大小 单位: 字节
@objc public var paramSize: Int = 0
// 以下两个为从文件中读取模型时用到的参数
/// 模型文件路径
@objc public var modelPath: String? = nil
/// 权重文件路径
@objc public var paramPath: String? = nil
/// 代表着 GPU 处理器
@objc public let device: MTLDevice
/// metal 代码加载方式 注意: 如果静态库只能使用 LoadMetalInDefaultLib LoadMetalInCustomMetalLib 进行 load metal 代码
@objc public var metalLoadMode: MetalLoadMode = .LoadMetalInPaddleMobile
/// 当 metalLoadMode 为 LoadMetalInCustomMetalLib 时, metal library 路径不能为空
@objc public var metalLibPath: String? = nil
/// 输入维度,按照 n h w c 方式传入
@objc public var inputDim: Dim = Dim.init(inDim: [])
@objc public init(device: MTLDevice, paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
self.paramPointer = paramPointer
self.paramSize = paramSize
self.modelPointer = modePointer
self.modelSize = modelSize
self.device = device
super.init()
}
@objc public init(device: MTLDevice) {
self.device = device
super.init()
}
@objc open func resultStr(res: ResultHolder) -> String {
fatalError()
}
@objc open func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder {
guard let inResPointer = paddleMobileRes.resultPointer else {
fatalError()
}
return ResultHolder.init(inResult: inResPointer, inCapacity: paddleMobileRes.capacity)
}
open func updateProgram(program: Program) {
}
}
......@@ -12,25 +12,22 @@
See the License for the specific language governing permissions and
limitations under the License. */
import Metal
import MetalKit
import Foundation
@objc public enum Platform: Int{
case CPU, GPU
}
class ScaleKernel: CusomKernel {
init(device: MTLDevice, shape: Shape) {
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "scale", outputDim: shape, usePaddleMobileLib: false)
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "scale_half", outputDim: shape, usePaddleMobileLib: false)
} else {
fatalError(" unsupport ")
}
@objc public class ResultHolder: NSObject {
@objc public let result: UnsafeMutablePointer<Float32>
@objc public let capacity: Int
init(inResult: UnsafeMutablePointer<Float32>, inCapacity: Int) {
result = inResult
capacity = inCapacity
}
@objc public func releasePointer() {
result.deinitialize(count: capacity)
result.deallocate()
}
}
@objc public class Runner: NSObject {
......@@ -40,86 +37,77 @@ class ScaleKernel: CusomKernel {
var textureLoader: MTKTextureLoader?
public let net: Net
let device: MTLDevice?
let platform: Platform
var cpuPaddleMobile: PaddleMobileCPU?
let numel: Int
let meansNumber: [NSNumber]
// dims num nchw
let dimsNum: [NSNumber]
/**
* inNet: 需要运行的网络
* commandQueue: GPU 是需要传入
* inPlatform: 需要使用的平台, GPU or CPU
*/
@objc public init(inNet: Net, commandQueue: MTLCommandQueue?, inPlatform: Platform) {
/// 初始化函数
///
/// - Parameters:
/// - inNet: 传入自定义的网络
/// - commandQueue: commandQueue
@objc public init(inNet: Net, commandQueue: MTLCommandQueue?) {
guard inNet.inputDim.cout() == 4 else {
fatalError(" input dim count must 4 ")
}
net = inNet
queue = commandQueue
device = queue?.device
platform = inPlatform
if let inDevice = device {
textureLoader = MTKTextureLoader.init(device: inDevice)
}
if platform == .CPU {
cpuPaddleMobile = PaddleMobileCPU.init()
}
numel = net.dim.n * net.dim.c * net.dim.h * net.dim.w
meansNumber = net.means.map { NSNumber.init(value: $0) }
dimsNum = [NSNumber.init(value: net.dim.n),
NSNumber.init(value: net.dim.c),
NSNumber.init(value: net.dim.h),
NSNumber.init(value: net.dim.w)]
numel = net.inputDim.numel()
}
/**
* load 模型, 返回 true 可进行预测
*/
/// load 模型, 返回 true 可进行预测
///
/// - Returns: load 成功或失败
@objc public func load() -> Bool {
if platform == .GPU {
guard let inDevice = device, let inQueue = queue else {
print(" paddle mobile gpu load error, need MTLCommandQueue")
return false
}
let loader = Loader<Float32>.init()
do {
// program = try loader.load(device: inDevice, paramPointer: net.paramPointer!, paramSize: net.paramSize,modePointer:net.modelPointer!,modelSize:net.modelSize)
program = try loader.load(device: inDevice, modelPath: net.modelPath, paraPath: net.paramPath)
if let inParamPointer = net.paramPointer, let inModelPointer = net.modelPointer {
guard net.paramSize > 0 && net.modelSize > 0 else {
print(" load from memory param size or model size can't 0 ")
return false
}
program = try loader.load(device: inDevice, paramPointer: inParamPointer, paramSize: net.paramSize,modePointer:inModelPointer,modelSize:net.modelSize)
} else if let inModelPath = net.modelPath, let inParamPath = net.paramPath {
program = try loader.load(device: inDevice, modelPath: inModelPath, paraPath: inParamPath)
} else {
print(" model pointer or model file path need be specified")
return false
}
let initContext: InitContext = InitContext.init()
initContext.metalLoadMode = net.metalLoadMode
initContext.metalLibPath = net.metalLibPath
executor = try Executor<Float32>.init(inDevice: inDevice, inQueue: inQueue, inProgram: program!, initContext: initContext)
net.updateProgram(program: program!)
executor = try Executor<Float32>.init(inDevice: inDevice, inQueue: inQueue, inProgram: program!)
} catch let error {
print(error)
return false
}
} else {
return cpuPaddleMobile?.load(net.modelPath, andWeightsPath: net.paramPath) ?? false
}
return true
}
@objc public func predict(inputPointer: UnsafeMutablePointer<Float32>, completion: @escaping ( _ success: Bool, _ result: PaddleMobileCPUResult?) -> Void) {
guard let res = cpuPaddleMobile?.predictInput(inputPointer, dim: dimsNum) else {
completion(false, nil)
return
}
completion(true, res)
}
/**
* GPU 版本 predict
* texture: 需要预测的 texture 需要做过预处理
* ( _ success: Bool, _ time:TimeInterval, _ resultArray: [Float32]) -> Void : 回调闭包, 三个参数分别为: 是否成功, 预测耗时, 结果数组
*/
/// 预测
///
/// - Parameters:
/// - texture: 输入 texture 需要使用 getTexture 获得
/// - completion: 结果回调, 当 success 为 true 时 result 不为 nil
@objc public func predict(texture: MTLTexture, completion: @escaping ( _ success: Bool, _ result: ResultHolder?) -> Void) {
do {
try self.executor?.predict(input: texture, dim: [self.net.dim.n, self.net.dim.h, self.net.dim.w, self.net.dim.c], completionHandle: { [weak self] (res) in
try self.executor?.predict(input: texture, dim: self.net.inputDim, completionHandle: { [weak self] (res) in
guard let SSelf = self else {
fatalError( " self nil " )
}
let result = SSelf.net.fetchResult(paddleMobileRes: res)
completion(true, result)
}, preProcessKernle: self.net.preprocessKernel, except: self.net.except)
}, preProcessKernle: self.net.preprocessKernel, except: self.net.except)
} catch let error {
print(error)
completion(false, nil)
......@@ -127,59 +115,64 @@ class ScaleKernel: CusomKernel {
}
}
/**
* CPU GPU 通用版本 predict
* cgImage: 需要预测的图片
* ( _ success: Bool, _ time:TimeInterval, _ resultArray: [Float32]) -> Void : 回调闭包, 三个参数分别为: 是否成功, 预测耗时, 结果数组
*/
// @objc public func predict(cgImage: CGImage, completion: @escaping ( _ success: Bool, _ resultArray: [Float32]) -> Void) {
// if platform == .GPU {
// getTexture(image: cgImage) { [weak self] (texture) in
// guard let SSelf = self else {
// fatalError( "" )
// }
// SSelf.predict(texture: texture, completion: completion)
// }
// } else if platform == .CPU {
// let input = preproccess(image: cgImage)
// predict(inputPointer: input, completion: completion)
// input.deinitialize(count: numel)
// input.deallocate()
// }
// }
/*
* 清理内存, 调用此函数后, 不能再使用, 需重新 load
*/
/// 清理内存, 调用此函数后, 不能再使用, 需重新 load
@objc public func clear() {
if platform == .GPU {
executor?.clear()
executor = nil
program = nil
} else if platform == .CPU {
cpuPaddleMobile?.clear()
}
executor?.clear()
executor = nil
program = nil
}
@objc public func preproccess(image: CGImage) -> UnsafeMutablePointer<Float> {
let output = UnsafeMutablePointer<Float>.allocate(capacity: numel)
let means = net.means.map { NSNumber.init(value: $0) }
let dims = [NSNumber.init(value: net.dim.n),
NSNumber.init(value: net.dim.c),
NSNumber.init(value: net.dim.h),
NSNumber.init(value: net.dim.w)]
cpuPaddleMobile?.preprocess(image, output: output, means: means, scale: net.scale, dim: dims)
return output
}
/*
* 获取 texture, 对 texture 进行预处理, GPU 预测时使用
*/
/// 获取 texture, 对 texture 进行预处理, 预测时使用
///
/// - Parameters:
/// - image: 输入图像
/// - getTexture: 获取 texture 回调
@objc public func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) {
let texture = try? textureLoader?.newTexture(cgImage: image, options: [:]) ?! " texture loader error"
scaleTexture(input: texture!, complete: getTexture)
}
/// 通过 buffer 获取 texture, 内部会使用GPU进行转换操作
///
/// - Parameters:
/// - inBuffer: 输入buffer
/// - getTexture: 结果回调
@objc public func getTexture(inBuffer: MTLBuffer, getTexture: @escaping (MTLTexture) -> Void) {
guard let inQueue = queue, let inDevice = device else {
fatalError( " queue or devcie nil " )
}
guard let buffer = inQueue.makeCommandBuffer() else {
fatalError( " make buffer error" )
}
let bufferToTextureKernel = BufferToTextureKernel.init(device: inDevice, outputDim: Shape.init(inWidth: net.inputDim[2], inHeight: net.inputDim[1], inChannel: net.inputDim[3]), metalLoadMode: net.metalLoadMode, metalLibPath: net.metalLibPath)
do {
try bufferToTextureKernel.compute(inputBuffer: inBuffer, commandBuffer: buffer)
} catch {
fatalError(" bufferToTextureKernel error ")
}
buffer.addCompletedHandler { (buffer) in
getTexture(bufferToTextureKernel.outputTexture)
}
buffer.commit()
}
/// 更新输入维度, 针对可变长输入模型
///
/// - Parameter inDim: 输入维度
@objc public func updateInputDim(inDim: Dim) {
if net.inputDim != inDim {
guard let inProgram = program else {
fatalError(" need load first ")
}
net.inputDim = inDim
net.updateProgram(program: inProgram)
}
}
public func scaleTexture(input: MTLTexture , complete: @escaping (MTLTexture) -> Void) {
guard let inQueue = queue, let inDevice = device else {
......@@ -190,7 +183,7 @@ class ScaleKernel: CusomKernel {
fatalError( " make buffer error" )
}
let scaleKernel = ScaleKernel.init(device: inDevice, shape: CusomKernel.Shape.init(inWidth: net.dim.w, inHeight: net.dim.h, inChannel: 3))
let scaleKernel = ScaleKernel.init(device: inDevice, shape: Shape.init(inWidth: net.inputDim[2], inHeight: net.inputDim[1], inChannel: 3), metalLoadMode: net.metalLoadMode, metalLibPath: net.metalLibPath)
do {
try scaleKernel.compute(inputTexuture: input, commandBuffer: buffer)
......@@ -205,5 +198,3 @@ class ScaleKernel: CusomKernel {
buffer.commit()
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#import <CoreImage/CoreImage.h>
#import <Foundation/Foundation.h>
@interface PaddleMobileCPUResult: NSObject
@property (assign, nonatomic, readonly) float *output;
@property (assign, nonatomic, readonly) int outputSize;
-(void)releaseOutput;
@end
@interface PaddleMobileCPU : NSObject
/*
创建对象
*/
- (instancetype)init;
/*
load 模型, 开辟内存
*/
- (BOOL)load:(NSString *)modelPath andWeightsPath:(NSString *)weighsPath;
/*
加载散开形式的模型, 需传入模型的目录
*/
- (BOOL)load:(NSString *)modelAndWeightPath;
/*
* 从内存中加载模型
* */
- (BOOL)LoadCombinedMemory:(size_t)modelLen
andModelBuf:(const uint8_t *)modelBuf
andModelParamsLen:(size_t)combinedParamsLen
andCombinedParamsBuf:(const uint8_t *)combinedParamsBuf;
/*
* 对图像进行预处理, 需要外部开辟 output 内存, 外部释放 output 内存
* */
-(void)preprocess:(CGImageRef)image
output:(float *)output
means:(NSArray<NSNumber *> *)means
scale:(float)scale
dim:(NSArray<NSNumber *> *)dim;
/*
* 预测预处理后的数据, 返回结果使用结束需要调用其 realseOutput 函数进行释放
* */
- (PaddleMobileCPUResult *)predictInput:(float *)input
dim:(NSArray<NSNumber *> *)dim;
/*
进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict
*/
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale;
/*
进行预测, 默认 means 为 0, scale 为 1.0
*/
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim;
/*
清理内存
*/
- (void)clear;
@end
//
// MobileNetCombined.swift
// paddle-mobile
//
// Created by Xiao,Haichun on 2018/12/5.
// Copyright © 2018 orange. All rights reserved.
//
import Foundation
public class MobileNetCombined: Net {
@objc public override init(device: MTLDevice) {
super.init(device: device)
means = [0, 0, 0]
scale = 1
except = 0
modelPath = Bundle.main.path(forResource: "combined_mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "combined_mobilenet_params", ofType: nil) ?! "para null"
modelDir = ""
//preprocessKernel = GenetPreProccess.init(device: device)
dim = (n: 1, h: 416, w: 416, c: 3)
}
@objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize)
means = [0, 0, 0]
scale = 1
except = 0
modelPath = ""
paramPath = ""
modelDir = ""
//preprocessKernel = GenetPreProccess.init(device: device)
dim = (n: 1, h: 416, w: 416, c: 3)
}
// class GenetPreProccess: CusomKernel {
// init(device: MTLDevice) {
// let s = CusomKernel.Shape.init(inWidth: 128, inHeight: 128, inChannel: 3)
// super.init(device: device, inFunctionName: "genet_preprocess", outputDim: s, usePaddleMobileLib: false)
// }
// }
override public func resultStr(res: ResultHolder) -> String {
// fatalError()
return " \(res.result![0]) ... "
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
import Metal
public class ResultHolder: NSObject {
@objc public let result: UnsafeMutablePointer<Float32>?
@objc public let capacity: Int
init(inResult: UnsafeMutablePointer<Float32>?, inCapacity: Int) {
result = inResult
capacity = inCapacity
}
@objc public func releasePointer() {
result?.deinitialize(count: capacity)
result?.deallocate()
}
}
public class Net: NSObject {
var except: Int = 0
var means: [Float] = []
var scale: Float = 0.0
var dim: (n: Int, h: Int, w: Int, c: Int) = (n: 0, h: 0, w: 0, c: 0)
var preprocessKernel: CusomKernel? = nil
var paramPointer: UnsafeMutableRawPointer? = nil
var paramSize: Int = 0
var modelPointer: UnsafeMutableRawPointer? = nil
var modelSize: Int = 0
var modelPath: String = ""
var paramPath: String = ""
var modelDir: String = ""
@objc public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
self.paramPointer = paramPointer
self.paramSize = paramSize
self.modelPointer = modePointer
self.modelSize = modelSize
super.init()
}
public func resultStr(res: ResultHolder) -> String {
fatalError()
}
func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder {
return ResultHolder.init(inResult: paddleMobileRes.resultPointer, inCapacity: paddleMobileRes.capacity)
}
@objc public init(device: MTLDevice) {
super.init()
}
func updateProgram(program: Program) {
}
}
......@@ -110,9 +110,27 @@ extension Array {
return newArray
}
}
public static func floatArrWithBuffer(floatArrBuffer: UnsafeMutablePointer<Float32>, count: Int) -> [Float32] {
var arr: [Float32] = []
for i in 0..<count {
arr.append(floatArrBuffer[i])
}
return arr
}
}
extension UnsafeMutablePointer {
public func floatArr(count: Int) -> [Pointee]{
var arr: [Pointee] = []
for i in 0..<count {
arr.append(self[i])
}
return arr
}
}
extension String{
extension String {
func cStr() -> UnsafePointer<Int8>? {
return (self as NSString).utf8String
}
......
......@@ -18,6 +18,7 @@ import CoreMedia
fileprivate var defaultMetalLibrary: MTLLibrary?
fileprivate var paddleMobileMetalLibrary: MTLLibrary?
fileprivate var customMetalLibrary: MTLLibrary?
extension MTLDevice {
func defaultLibrary() -> MTLLibrary {
......@@ -31,6 +32,22 @@ extension MTLDevice {
}
}
func customLibrary(metalLibPath: String) -> MTLLibrary {
if customMetalLibrary == nil {
do {
customMetalLibrary = try makeLibrary(filepath: metalLibPath)
} catch let error {
fatalError("\(error)")
}
}
if let inMetalLib = customMetalLibrary {
return inMetalLib
} else {
fatalError(" customlib is nil ")
}
}
func paddleMobileLibrary() -> MTLLibrary {
if paddleMobileMetalLibrary == nil {
guard let path = Bundle.init(for: Kernel.self).path(forResource: "default", ofType: "metallib") else {
......@@ -50,8 +67,19 @@ extension MTLDevice {
}
}
func pipeLine(funcName: String, inPaddleMobileLib: Bool = true) -> MTLComputePipelineState {
let useLib = inPaddleMobileLib ? paddleMobileLibrary() : defaultLibrary()
func pipeLine(funcName: String, metalLoadMode: MetalLoadMode, metalLibPath: String?) -> MTLComputePipelineState {
let useLib: MTLLibrary
switch metalLoadMode {
case .LoadMetalInDefaultLib:
useLib = defaultLibrary()
case .LoadMetalInPaddleMobile:
useLib = paddleMobileLibrary()
case .LoadMetalInCustomMetalLib:
useLib = customLibrary(metalLibPath: metalLibPath ?! " can't be nil ")
default:
fatalError()
}
guard let function = useLib.makeFunction(name: funcName) else {
fatalError(" function " + funcName + " not found")
}
......@@ -501,7 +529,7 @@ public extension MTLTexture {
} else {
fatalError(" 目前还不支持其他类型 ")
}
print(textureArray.count)
var output: [Float32] = []
for s in 0..<arrayLength {
for c in 0..<4{
......
......@@ -324,9 +324,10 @@ public class PaddleMobileUnitTest {
let param = ConvAddBatchNormReluTestParam.init(inInputTexture: inputeTexture, inOutputTexture: outputTexture, inMetalParam: metalParam, inFilterBuffer: filterBuffer, inBiaseBuffer: biaseBuffer, inNewScaleBuffer: newScalueBuffer, inNewBiaseBuffer: newBiaseBuffer, inFilterSize: filterSize)
let initContext = InitContext.init()
initContext.metalLoadMode = .LoadMetalInDefaultLib
let convAddBnReluKernel = ConvAddBatchNormReluKernel<Float32>.init(device: device, testParam: param)
let convAddBnReluKernel = ConvAddBatchNormReluKernel<Float32>.init(device: device, testParam: param, initContext: initContext)
convAddBnReluKernel.test(commandBuffer: buffer, param: param)
......
......@@ -250,39 +250,40 @@ extension InputTexture: Variant {
}
extension MTLTexture where Self: Variant {
}
class FetchHolder: Variant {
public class FetchHolder: Variant {
var resultBuffer: MTLBuffer?
var dim: [Int]
var capacity: Int
public var dim: Dim
public var capacity: Int
public var paddedCapacity: Int
init(inCapacity: Int, inDim: [Int]) {
capacity = inCapacity
init(inPaddedCapacity: Int, inDim: Dim) {
paddedCapacity = inPaddedCapacity
capacity = inDim.numel()
dim = inDim
}
func initBuffer(device: MTLDevice) {
resultBuffer = device.makeBuffer(length: capacity * 4, options: [])
public func initBuffer(device: MTLDevice) {
resultBuffer = device.makeBuffer(length: paddedCapacity * 4, options: [])
}
var result: UnsafeMutablePointer<Float32> {
guard let inResultBuffer = resultBuffer else {
fatalError()
}
return inResultBuffer.contents().bindMemory(to: Float32.self, capacity: capacity)
return inResultBuffer.contents().bindMemory(to: Float32.self, capacity: paddedCapacity)
}
}
extension FetchHolder: CustomStringConvertible, CustomDebugStringConvertible {
var description: String {
public var description: String {
fatalError()
// return "\(result)"
}
var debugDescription: String {
public var debugDescription: String {
fatalError()
// return "\(result)"
}
......
......@@ -14,39 +14,42 @@
import Foundation
public struct Dim {
public init(inDim: [Int]) {
dims = inDim
}
mutating func swapeDimAt(index1: Int, index2: Int) {
dims.swapAt(index1, index2)
}
func cout() -> Int {
return dims.count
}
func numel() -> Int {
return dims.reduce(1) { $0 * $1 }
}
public static func ==(left: Dim, right: Dim) -> Bool {
return left.dims == right.dims;
}
public subscript(index: Int) -> Int {
return dims[index];
}
private(set) var dims: [Int]
private init(){
fatalError()
}
}
extension Dim: CustomStringConvertible {
public var description: String {
return "\(dims)"
}
@objc public class Dim: NSObject {
private(set) var dims: [Int]
@objc public init(inDim: [Int]) {
dims = inDim
}
public func cout() -> Int {
return dims.count
}
public func numel() -> Int {
return dims.reduce(1) { $0 * $1 }
}
public static func ==(left: Dim, right: Dim) -> Bool {
return left.dims == right.dims;
}
public static func !=(left: Dim, right: Dim) -> Bool {
return left.dims != right.dims;
}
public subscript(index: Int) -> Int {
return dims[index];
}
public override var description: String {
return "\(dims)"
}
func swapeDimAt(index1: Int, index2: Int) {
dims.swapAt(index1, index2)
}
private override init(){
fatalError()
}
}
......@@ -15,19 +15,16 @@
import Foundation
let testTo = 81
let testTo = 5
var isTest = false
let computePrecision: ComputePrecision = .Float16
public class GPUResultHolder {
public let dim: [Int]
public let capacity: Int
public var resultPointer: UnsafeMutablePointer<Float32>?
public var intermediateResults: [String : [Variant]]?
public let elapsedTime: Double
public init(inDim: [Int], inPointer: UnsafeMutablePointer<Float32>?, inCapacity: Int, inElapsedTime: Double, inIntermediateResults: [String : [Variant]]? = nil) {
@objc public class GPUResultHolder: NSObject{
@objc public let dim: [Int]
@objc public let capacity: Int
@objc public var resultPointer: UnsafeMutablePointer<Float32>?
@objc public var intermediateResults: [String : [MTLBuffer]]?
public init(inDim: [Int], inPointer: UnsafeMutablePointer<Float32>?, inCapacity: Int, inIntermediateResults: [String : [MTLBuffer]]? = nil) {
dim = inDim
capacity = inCapacity
......@@ -36,64 +33,34 @@ public class GPUResultHolder {
resultPointer?.initialize(from: inInPointer, count: inCapacity)
}
elapsedTime = inElapsedTime
intermediateResults = inIntermediateResults
}
}
extension GPUResultHolder: CustomDebugStringConvertible, CustomStringConvertible {
public var debugDescription: String {
// var str = ""
// str += "Dim: \(dim) \n value:[ "
// if resultArr.count < 20 {
// for d in resultArr {
// str += " \(d) "
// }
// } else {
// for d in stride(from: 0, to: resultArr.count, by: resultArr.count/20) {
// str += " \(resultArr[d]) "
// }
// }
// str += " ]"
// return str
public override var description: String {
fatalError()
}
public var description: String {
return debugDescription
}
}
public class Executor<P: PrecisionType> {
var ops: [Runable & InferShaperable] = []
var preInputDim: Dim = Dim.init(inDim: [])
let program: Program
let device: MTLDevice
let inflightSemaphore: DispatchSemaphore
let queue: MTLCommandQueue
public init(inDevice:MTLDevice, inQueue: MTLCommandQueue, inProgram: Program) throws {
self.inflightSemaphore = DispatchSemaphore(value: 3)
init(inDevice:MTLDevice, inQueue: MTLCommandQueue, inProgram: Program, initContext: InitContext) throws {
self.inflightSemaphore = DispatchSemaphore(value: 1)
program = inProgram
device = inDevice
queue = inQueue
// print("before for ")
//print(program.scope.vars["fea_pyramid1_mbox_conf_flat.Flatten.output.1.tmp_0"])
for block in inProgram.programDesc.blocks {
//block.ops.count
for i in 0..<block.ops.count {
let opDesc = block.ops[i]
do {
// print("in for i \(i): ")
// print(program.scope.vars["fea_pyramid1_mbox_conf_flat.Flatten.output.1.tmp_0"])
//
// if i == 56 {
// print(program.scope.vars["fea_pyramid1_mbox_conf_flat.Flatten.output.1.tmp_0"])
//
// }
let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: opDesc, scope: inProgram.scope)
let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: opDesc, scope: inProgram.scope, initContext: initContext)
ops.append(op)
} catch let error {
throw error
......@@ -102,11 +69,12 @@ public class Executor<P: PrecisionType> {
}
}
public func predict(input: MTLTexture, dim: [Int], completionHandle: @escaping (GPUResultHolder) -> Void, preProcessKernle: CusomKernel? = nil, except: Int = 0) throws {
public func predict(input: MTLTexture, dim: Dim, completionHandle: @escaping (GPUResultHolder) -> Void, preProcessKernle: CusomKernel? = nil, except: Int = 0) throws {
inflightSemaphore.wait()
guard let buffer = queue.makeCommandBuffer() else {
throw PaddleMobileError.predictError(message: "CommandBuffer is nil")
}
inflightSemaphore.wait()
let resInput: MTLTexture
if let inPre = preProcessKernle {
......@@ -120,8 +88,7 @@ public class Executor<P: PrecisionType> {
resInput = input
}
let beforeDate = Date.init()
let inputTexture = InputTexture.init(inMTLTexture: resInput, inExpectDim: Dim.init(inDim: dim))
let inputTexture = InputTexture.init(inMTLTexture: resInput, inExpectDim: dim)
program.scope.setInput(input: inputTexture)
//(ops.count - except)
for i in 0..<(ops.count - except) {
......@@ -133,64 +100,45 @@ public class Executor<P: PrecisionType> {
}
}
var outputTextures: [String : [Variant]]?
var outputTextures: [String : [MTLBuffer]]?
if except > 0 {
ops[ops.count - except].computeMiddleResult(device: device, buffer: buffer)
outputTextures = ops[ops.count - except].inputVariant()
}
buffer.addCompletedHandler { [weak self] (commandbuffer) in
// let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2]))
// print(inputArr.strideArray())
//
//// print(dim)
// writeToLibrary(fileName: "test_image_ssd_ar", array: inputArr)
// print(" write done ")
// print("write to library done")
// return
// print(inputArr)
//
// let stridableInput: [(index: Int, value: Float)] = input.stridableFloatArray()
// print(stridableInput)
//
// let _: Flo? = input.logDesc(header: "input: ", stridable: true)
// for i in 0..<self!.ops.count {
// let op = self!.ops[i]
// print(" 第 \(i) 个 op: ")
// op.delogOutput()
// }
// return;
// self!.ops[testTo - 2].delogOutput()
// self!.ops[testTo - 1].delogOutput()
// self!.ops[5].delogOutput()
// return
guard let SSelf = self else {
// return
fatalError()
}
//将输入写进文件
/*
let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2]))
print(dim)
writeToLibrary(fileName: "test_image_super", array: inputArr)
print(" write done ")
return
*/
/* 输出 op 计算结果
for op in SSelf.ops {
op.delogOutput()
}
*/
let afterDate = Date.init()
var resultHolder: GPUResultHolder
if except > 0 {
resultHolder = GPUResultHolder.init(inDim: [], inPointer: nil, inCapacity: 0, inElapsedTime: afterDate.timeIntervalSince(beforeDate), inIntermediateResults: outputTextures)
resultHolder = GPUResultHolder.init(inDim: [], inPointer: nil, inCapacity: 0, inIntermediateResults: outputTextures)
} else {
let outputVar: Variant = SSelf.program.scope.output()!
let output: FetchHolder = outputVar as! FetchHolder
// let beforeToTensorDate = Date.init()
resultHolder = GPUResultHolder.init(inDim: output.dim, inPointer: output.result, inCapacity: output.capacity, inElapsedTime: afterDate.timeIntervalSince(beforeDate))
// let timeToTensor = Date.init().timeIntervalSince(beforeToTensorDate)
// print(timeToTensor)
resultHolder = GPUResultHolder.init(inDim: output.dim.dims, inPointer: output.result, inCapacity: output.capacity)
}
completionHandle(resultHolder)
SSelf.inflightSemaphore.signal()
}
buffer.commit()
}
......
......@@ -150,6 +150,9 @@ public class Loader<P: PrecisionType> {
let originProgramDesc = ProgramDesc.init(protoProgram: protoProgram)
let programDesc = ProgramOptimize<P>.init().optimize(originProgramDesc: originProgramDesc)
// let programDesc = ProgramDesc.init(protoProgram: protoProgram)
print(programDesc)
guard programDesc.blocks.count > 0 else {
......@@ -210,7 +213,7 @@ public class Loader<P: PrecisionType> {
scope[varDesc.name] = tensor
} else {
let dim = Dim.init(inDim: tensorDesc.dims)
scope[varDesc.name] = Texture<P>.init(device: device, inDim: dim)
scope[varDesc.name] = Texture.init(device: device, inDim: dim)
}
} else {
if varDesc.name == fetchKey {
......
......@@ -28,9 +28,7 @@ extension Tensorial {
}
}
public enum ComputePrecision {
case Float32, Float16
}
class Tensor<P: PrecisionType>: Tensorial {
......@@ -97,7 +95,7 @@ class Tensor<P: PrecisionType>: Tensorial {
func initBuffer(device: MTLDevice, precision: ComputePrecision = .Float16, convertToNHWC: Bool = true, withTranspose: Bool = false) {
func initBuffer(device: MTLDevice, precision: ComputePrecision = .Float16, padWhenOneC: Bool = false, convertToNHWC: Bool = true, withTranspose: Bool = false) {
if convertToNHWC {
// print(layout)
convert(to: DataLayout.NHWC())
......@@ -145,7 +143,7 @@ class Tensor<P: PrecisionType>: Tensorial {
case .Float16:
float32ToFloat16(input: floatPointer, output: buffer.contents(), count: count)
}
} else if C == 1 {
} else if C == 1 && !padWhenOneC {
buffer = device.makeBuffer(length: numel() * precisionSize)
switch precision {
case .Float32:
......@@ -238,10 +236,32 @@ class Tensor<P: PrecisionType>: Tensorial {
data.release()
}
var n: Int {
get {
if dim.cout() == 4 {
if layout == DataLayout.NCHW() {
return dim[0]
} else if layout == DataLayout.NHWC() {
return dim[0]
} else {
fatalError(" unsupport ")
}
} else {
fatalError()
}
}
}
var width: Int {
get {
if dim.cout() == 4 {
return dim[1]
if layout == DataLayout.NHWC() {
return dim[2]
} else if layout == DataLayout.NCHW() {
return dim[3]
} else {
fatalError(" unsupport ")
}
} else {
fatalError()
}
......@@ -251,7 +271,13 @@ class Tensor<P: PrecisionType>: Tensorial {
var height: Int {
get {
if dim.cout() == 4 {
return dim[2]
if layout == DataLayout.NHWC() {
return dim[1]
} else if layout == DataLayout.NCHW() {
return dim[2]
} else {
fatalError(" unsupport ")
}
} else {
fatalError()
}
......@@ -261,7 +287,13 @@ class Tensor<P: PrecisionType>: Tensorial {
var channel: Int {
get {
if dim.cout() == 4 {
return dim[3]
if layout == DataLayout.NHWC() {
return dim[3]
} else if layout == DataLayout.NCHW() {
return dim[1]
} else {
fatalError(" unsupport ")
}
} else {
fatalError()
}
......
......@@ -68,16 +68,20 @@ extension InputTexture {
.height = 1
.len = 1
*/
public class Texture<P: PrecisionType>: Tensorial {
var dim: Dim
public class Texture: Tensorial {
public var dim: Dim
public var tensorDim: Dim
/// tensor dim pad to four
public var padToFourDim: Dim
private var textureDesc: MTLTextureDescriptor!
public var metalTexture: MTLTexture!
var transpose: [Int] = [0, 1, 2, 3]
func elementCount() -> Int {
return metalTexture.width * metalTexture.height * metalTexture.arrayLength * 4
}
func toTensor() -> [Float32] {
guard padToFourDim.cout() == 4 else {
fatalError("- not support -")
......@@ -92,15 +96,15 @@ public class Texture<P: PrecisionType>: Tensorial {
return metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
}
func initTexture(device: MTLDevice, inTranspose: [Int] = [0, 1, 2, 3], computePrecision: ComputePrecision = .Float16) {
public func initTexture(device: MTLDevice, inTranspose: [Int] = [0, 1, 2, 3], computePrecision: ComputePrecision = .Float16) {
transpose = inTranspose
for i in 0..<(4 - tensorDim.cout()) {
if i != inTranspose[i] {
fatalError()
}
}
let newDim = transpose.map { padToFourDim[$0] }
let newDim = transpose.map { padToFourDim[$0] }
let newLayout = transpose.map { layout.layoutWithDim[$0] }
layout = DataLayout.init(newLayout)
......@@ -139,7 +143,29 @@ public class Texture<P: PrecisionType>: Tensorial {
metalTexture = device.makeTexture(descriptor: tmpTextureDes) ?! " texture nil "
}
public func updateDims(inTensorDim: Dim, inDim: Dim) {
var fourDim: Dim
if inDim.cout() == 4 {
fourDim = inDim
} else if inDim.cout() < 4 {
var fourDimNum: [Int] = []
for _ in 0..<(4 - inDim.cout()) {
fourDimNum.append(1)
}
fourDimNum.append(contentsOf: inDim.dims)
fourDim = Dim.init(inDim: fourDimNum)
} else {
fatalError(" not support ")
}
tensorDim = inTensorDim
dim = fourDim
padToFourDim = fourDim
}
// 初始化时 dim padToFourDim 模型中的维度(一般来说 nchw),前面补全0
init(device: MTLDevice, inDim: Dim) {
print(" in dim > \(inDim)")
var fourDim: Dim
if inDim.cout() == 4 {
fourDim = inDim
......
......@@ -27,19 +27,19 @@ class OpCreator<P: PrecisionType> {
}
}
func creat(device: MTLDevice, opDesc: OpDesc, scope: Scope) throws -> Runable & InferShaperable {
func creat(device: MTLDevice, opDesc: OpDesc, scope: Scope, initContext: InitContext) throws -> Runable & InferShaperable {
guard let opCreator = opCreators[opDesc.type] else {
throw PaddleMobileError.opError(message: "there is no " + opDesc.type + " yet")
}
do {
return try opCreator(device, opDesc, scope)
return try opCreator(device, opDesc, scope, initContext)
} catch let error {
throw error
}
}
let opCreators: [String : (MTLDevice, OpDesc, Scope) throws -> Runable & InferShaperable] =
let opCreators: [String : (MTLDevice, OpDesc, Scope, InitContext) throws -> Runable & InferShaperable] =
[gConvType : ConvOp<P>.creat,
gBatchNormType : BatchNormOp<P>.creat,
gReluType : ReluOp<P>.creat,
......
......@@ -31,7 +31,7 @@ protocol Runable {
func run(device: MTLDevice, buffer: MTLCommandBuffer) throws
func runImpl(device: MTLDevice,buffer: MTLCommandBuffer) throws
func delogOutput()
func inputVariant() -> [String : [Variant]]
func inputVariant() -> [String : [MTLBuffer]]
func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer)
}
......@@ -44,7 +44,7 @@ extension Runable where Self: OperatorProtocol{
}
}
func inputVariant() -> [String : [Variant]] {
func inputVariant() -> [String : [MTLBuffer]] {
// return [:]
fatalError(" op \(type) need implement inputVariant")
}
......@@ -59,15 +59,26 @@ extension Runable where Self: OperatorProtocol{
}
}
public class InitContext {
/// metal 代码加载方式
var metalLoadMode: MetalLoadMode = .LoadMetalInDefaultLib
/// 当 metalLoadMode 为 LoadMetalInCustomMetalLib 时, metal library 路径不能为空
var metalLibPath: String? = nil
init() {
metalLoadMode = .LoadMetalInDefaultLib
metalLibPath = nil
}
}
protocol Creator where Self: OperatorProtocol{
associatedtype OpType: OperatorProtocol & Runable & InferShaperable
static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType
static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws -> OpType
}
extension Creator where Self: OperatorProtocol {
static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType {
static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws -> OpType {
do {
return try OpType.provide(device:device, opDesc: opDesc, inScope: inScope)
return try OpType.provide(device:device, opDesc: opDesc, inScope: inScope, initContext: initContext)
} catch let error {
throw error
}
......@@ -89,13 +100,13 @@ protocol OperatorProtocol {
var attrs: [String : Attr] { get }
var para: ParamType { get }
var kernel: KerType { get }
init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws
init(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws
}
extension OperatorProtocol {
static func provide(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> Self {
static func provide(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws -> Self {
do {
return try Self.init(device: device, opDesc: opDesc, inScope: inScope)
return try Self.init(device: device, opDesc: opDesc, inScope: inScope, initContext: initContext)
} catch let error {
throw error
}
......@@ -103,18 +114,7 @@ extension OperatorProtocol {
}
class Operator <KernelType: Computable , ParameterType>: OperatorProtocol where KernelType.ParamType == ParameterType {
typealias ParamType = ParameterType
typealias KerType = KernelType
let type: String
let inputs: [String : [String]]
var paraInputs: [String : [String]]
let outpus: [String : [String]]
let attrs: [String : Attr]
let para: ParamType
let scope: Scope
var kernel: KerType
required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
// print("create op: \(opDesc.type)")
required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws {
type = opDesc.type
scope = inScope
inputs = opDesc.inputs
......@@ -126,8 +126,19 @@ class Operator <KernelType: Computable , ParameterType>: OperatorProtocol where
} catch let error {
throw error
}
kernel = KernelType.init(device: device, param: para)
kernel = KernelType.init(device: device, param: para, initContext: initContext)
}
typealias ParamType = ParameterType
typealias KerType = KernelType
let type: String
let inputs: [String : [String]]
var paraInputs: [String : [String]]
let outpus: [String : [String]]
let attrs: [String : Attr]
let para: ParamType
let scope: Scope
var kernel: KerType
}
// op infos
......
......@@ -34,8 +34,8 @@ class BatchNormParam<P: PrecisionType>: OpParam {
throw error
}
}
let input: Texture<P>
var output: Texture<P>
let input: Texture
var output: Texture
let bias: Tensor<P>
let mean: Tensor<P>
let scale: Tensor<P>
......
......@@ -30,8 +30,8 @@ class BilinearInterpParam<P: PrecisionType>: OpParam {
fatalError()
}
}
let input: Texture<P>
var output: Texture<P>
let input: Texture
var output: Texture
let out_h: Int
let out_w: Int
}
......
......@@ -37,10 +37,10 @@ class BoxcoderParam<P: PrecisionType>: OpParam {
assert(codeType == "decode_center_size") // encode_center_size is not implemented
assert((targetBox.tensorDim.cout() == 3) && (targetBox.tensorDim[0] == 1)) // N must be 1 (only handle batch size = 1)
}
let priorBox: Texture<P>
let priorBoxVar: Texture<P>
let targetBox: Texture<P>
var output: Texture<P>
let priorBox: Texture
let priorBoxVar: Texture
let targetBox: Texture
var output: Texture
let codeType: String
let boxNormalized: Bool
}
......
......@@ -22,7 +22,7 @@ class ConcatParam<P: PrecisionType>: OpParam {
fatalError()
}
for x in xlist {
guard let variant = inScope[x], let v = variant as? Texture<P> else {
guard let variant = inScope[x], let v = variant as? Texture else {
fatalError()
}
if transpose.count == 0 {
......@@ -40,8 +40,8 @@ class ConcatParam<P: PrecisionType>: OpParam {
throw error
}
}
var input: [Texture<P>] = []
var output: Texture<P>
var input: [Texture] = []
var output: Texture
var transpose: [Int] = []
let axis: Int
}
......
......@@ -34,12 +34,12 @@ class ConvAddAddPreluParam<P: PrecisionType>: OpParam {
}
}
let input: Texture<P>
let input: Texture
let y: Tensor<P>
let filter: Tensor<P>
let mode: String
let alpha: Tensor<P>
var output: Texture<P>
var output: Texture
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
......
......@@ -40,7 +40,7 @@ class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam {
}
}
let input: Texture<P>
let input: Texture
let variance: Tensor<P>
let bias: Tensor<P>
......@@ -52,7 +52,7 @@ class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam {
var newScale: MTLBuffer?
var newBiase: MTLBuffer?
var output: Texture<P>
var output: Texture
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
......
......@@ -32,11 +32,11 @@ class ConvAddParam<P: PrecisionType>: OpParam {
}
}
let input: Texture<P>
let input: Texture
let y: Tensor<P>
let filter: Tensor<P>
var output: Texture<P>
var output: Texture
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
......@@ -111,6 +111,7 @@ class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>,
// print(biase)
print(" \(type) output: ")
print(para.output.metalTexture)
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
}
}
......@@ -33,12 +33,12 @@ class ConvAddPreluParam<P: PrecisionType>: OpParam {
}
}
let input: Texture<P>
let input: Texture
let y: Tensor<P>
let filter: Tensor<P>
let mode: String
let alpha: Tensor<P>
var output: Texture<P>
var output: Texture
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
......
......@@ -36,8 +36,7 @@ class ConvBNReluParam<P: PrecisionType>: OpParam {
}
}
let input: Texture<P>
let input: Texture
let variance: Tensor<P>
let bias: Tensor<P>
let mean: Tensor<P>
......@@ -47,7 +46,7 @@ class ConvBNReluParam<P: PrecisionType>: OpParam {
var newScale: MTLBuffer?
var newBiase: MTLBuffer?
var output: Texture<P>
var output: Texture
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
......
......@@ -31,9 +31,9 @@ class ConvParam<P: PrecisionType>: OpParam {
}
}
let input: Texture<P>
let input: Texture
let filter: Tensor<P>
var output: Texture<P>
var output: Texture
let stride: [Int32]
let paddings: [Int32]
let dilations: [Int32]
......
......@@ -17,14 +17,6 @@ import Foundation
class DepthConvOp<P: PrecisionType>: Operator<ConvKernel<P>, ConvParam<P>>, Runable, Creator, InferShaperable {
typealias OpType = DepthConvOp<P>
required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
do {
try super.init(device: device, opDesc: opDesc, inScope: inScope)
} catch let error {
throw error
}
}
func inferShape() {
let inDims = para.input.dim
......
......@@ -32,7 +32,7 @@ class ElementwiseAddParam<P: PrecisionType>: OpParam {
let device = inputX.metalTexture!.device
inputY = Texture.init(device: device, inDim: tensorY.dim)
let value: [P] = Array(UnsafeBufferPointer(start: tensorY.data.pointer, count: tensorY.dim.numel()))
inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims, transpose: [0, 1, 2, 3], inComputePrecision: computePrecision)
inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims, transpose: [0, 1, 2, 3], inComputePrecision: GlobalConfig.shared.computePrecision)
}
// required init(device: MTLDevice, param: ElementwiseAddParam<P>) {
......@@ -55,9 +55,9 @@ class ElementwiseAddParam<P: PrecisionType>: OpParam {
}
}
var inputX: Texture<P>
var inputY: Texture<P>
var output: Texture<P>
var inputX: Texture
var inputY: Texture
var output: Texture
var axis: Int
}
......
......@@ -34,7 +34,7 @@ class ElementwiseAddPreluParam<P: PrecisionType>: OpParam {
let device = inputX.metalTexture!.device
inputY = Texture.init(device: device, inDim: tensorY.dim)
let value: [P] = Array(UnsafeBufferPointer(start: tensorY.data.pointer, count: tensorY.dim.numel()))
inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims, transpose: [0, 1, 2, 3], inComputePrecision: computePrecision)
inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims, transpose: [0, 1, 2, 3], inComputePrecision: GlobalConfig.shared.computePrecision)
}
// required init(device: MTLDevice, param: ElementwiseAddParam<P>) {
......@@ -59,9 +59,9 @@ class ElementwiseAddPreluParam<P: PrecisionType>: OpParam {
let mode: String
let alpha: Tensor<P>
var inputX: Texture<P>
var inputY: Texture<P>
var output: Texture<P>
var inputX: Texture
var inputY: Texture
var output: Texture
var axis: Int
}
......
......@@ -17,7 +17,7 @@ import MetalKit
import CoreMedia
class FeedParam<P: PrecisionType>: OpParam{
var output: Texture<P>
var output: Texture
var input: InputTexture {
return scope.input() as! InputTexture
}
......@@ -63,7 +63,8 @@ class FeedOp<P: PrecisionType>: Operator<Texture2DTo2DArrayKernel<P>, FeedParam<
func delogOutput() {
print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.padToFourDim[0], c: para.output.padToFourDim[1], h: para.output.padToFourDim[2], w: para.output.padToFourDim[3])).strideArray())
print(para.output.metalTexture)
print(para.output.metalTexture.toTensor(dim: (n: para.output.padToFourDim[0], c: para.output.padToFourDim[3], h: para.output.padToFourDim[2], w: para.output.padToFourDim[1])).strideArray())
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
import Metal
class FetchParam<P: PrecisionType>: OpParam{
var output: FetchHolder
let input: Texture
let scope: Scope
required init(opDesc: OpDesc, inScope: Scope) throws {
scope = inScope
do {
input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope)
output = FetchHolder.init(inPaddedCapacity: input.elementCount(), inDim: input.tensorDim)
scope.setOutput(output: output)
} catch let error {
throw error
}
}
//typealias ParamPrecisionType = P
}
class FetchOp<P: PrecisionType>: Operator< FetchKernel<P>, FetchParam<P>>, Runable, Creator, InferShaperable {
typealias OpType = FetchOp<P>
func inferShape() {
print(para.input.dim)
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print("fetch output: ")
let resArr = self.para.output.result.floatArr(count: self.para.output.capacity)
print(resArr.strideArray())
}
}
......@@ -25,8 +25,8 @@ class FlattenParam<P: PrecisionType>: OpParam {
throw error
}
}
let input: Texture<P>
var output: Texture<P>
let input: Texture
var output: Texture
let axis: Int
}
......
......@@ -21,14 +21,14 @@ public protocol TestParam {
public protocol Testable {
associatedtype TestParamType: TestParam
func test(commandBuffer: MTLCommandBuffer, param: TestParamType)
init(device: MTLDevice, testParam: TestParamType)
init(device: MTLDevice, testParam: TestParamType, initContext: InitContext)
}
protocol Computable {
associatedtype ParamType: OpParam
func compute(commandBuffer: MTLCommandBuffer, param: ParamType) throws
init(device: MTLDevice, param: ParamType)
init(device: MTLDevice, param: ParamType, initContext: InitContext)
}
protocol KernelProtocol {
......@@ -37,37 +37,83 @@ protocol KernelProtocol {
}
open class Kernel {
@objc open class Kernel: NSObject{
let pipline: MTLComputePipelineState
let functionName: String
public init(device: MTLDevice, inFunctionName: String, usePaddleMobileLib: Bool = true) {
pipline = device.pipeLine(funcName: inFunctionName, inPaddleMobileLib: usePaddleMobileLib)
public init(device: MTLDevice, inFunctionName: String, usePaddleMobileLib: Bool = false, initContext: InitContext) {
pipline = device.pipeLine(funcName: inFunctionName, metalLoadMode: initContext.metalLoadMode, metalLibPath: initContext.metalLibPath)
functionName = inFunctionName
}
}
open class CusomKernel: Kernel {
public struct Shape {
public let width: Int
public let height: Int
public let channel: Int
public init(inWidth: Int, inHeight: Int, inChannel: Int){
width = inWidth
height = inHeight
channel = inChannel
@objc public class Shape: NSObject {
public let width: Int
public let height: Int
public let channel: Int
@objc public init(inWidth: Int, inHeight: Int, inChannel: Int){
width = inWidth
height = inHeight
channel = inChannel
}
}
open class BufferToTextureKernel: Kernel {
public let outputTexture: MTLTexture
public init(device: MTLDevice, outputDim: Shape, metalLoadMode: MetalLoadMode, metalLibPath: String?) {
let textureDesc = MTLTextureDescriptor.init()
textureDesc.textureType = .type2D
textureDesc.width = outputDim.width
textureDesc.height = outputDim.height
textureDesc.depth = (outputDim.channel + 3) / 4
if GlobalConfig.shared.computePrecision == .Float16 {
textureDesc.pixelFormat = .rgba16Float
} else if GlobalConfig.shared.computePrecision == .Float32 {
textureDesc.pixelFormat = .rgba32Float
} else {
fatalError()
}
textureDesc.usage = [.shaderRead, .shaderWrite]
textureDesc.storageMode = .shared
outputTexture = device.makeTexture(descriptor: textureDesc) ?! " make texture error "
let initContext = InitContext.init()
initContext.metalLibPath = metalLibPath
initContext.metalLoadMode = metalLoadMode
if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "buffer_to_texture_kernel", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "buffer_to_texture_kernel_half", initContext: initContext)
}
}
public func compute(inputBuffer: MTLBuffer , commandBuffer: MTLCommandBuffer) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setBuffer(inputBuffer, offset: 0, index: 0)
encoder.setTexture(outputTexture, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: outputTexture)
encoder.endEncoding()
}
}
@objc open class CusomKernel: Kernel {
public let outputTexture: MTLTexture
public init(device: MTLDevice, inFunctionName: String, outputDim: Shape, usePaddleMobileLib: Bool = false) {
public init(device: MTLDevice, inFunctionName: String, outputDim: Shape, metalLoadModel: MetalLoadMode, metalLibPath: String?) {
let textureDesc = MTLTextureDescriptor.init()
textureDesc.textureType = .type2D
textureDesc.width = outputDim.width
textureDesc.height = outputDim.height
textureDesc.depth = (outputDim.channel + 3) / 4
if computePrecision == .Float16 {
if GlobalConfig.shared.computePrecision == .Float16 {
textureDesc.pixelFormat = .rgba16Float
} else if computePrecision == .Float32 {
} else if GlobalConfig.shared.computePrecision == .Float32 {
textureDesc.pixelFormat = .rgba32Float
} else {
fatalError()
......@@ -77,7 +123,10 @@ open class CusomKernel: Kernel {
textureDesc.storageMode = .shared
outputTexture = device.makeTexture(descriptor: textureDesc) ?! " make texture error "
super.init(device: device, inFunctionName: inFunctionName, usePaddleMobileLib: usePaddleMobileLib)
let context = InitContext.init()
context.metalLoadMode = metalLoadModel
context.metalLibPath = metalLibPath
super.init(device: device, inFunctionName: inFunctionName, initContext: context)
}
public func compute(inputTexuture: MTLTexture, commandBuffer: MTLCommandBuffer) throws {
......
......@@ -15,7 +15,7 @@
import Foundation
class BatchNormKernel<P: PrecisionType>: Kernel, Computable {
required init(device: MTLDevice, param: BatchNormParam<P>) {
required init(device: MTLDevice, param: BatchNormParam<P>, initContext: InitContext) {
let count = param.variance.dim.numel()
let varianceP = param.variance.data.pointer
let meanP = param.mean.data.pointer
......@@ -27,13 +27,13 @@ class BatchNormKernel<P: PrecisionType>: Kernel, Computable {
scaleP[i] = invStd * scaleP[i]
}
param.bias.initBuffer(device: device, precision: computePrecision)
param.scale.initBuffer(device: device, precision: computePrecision)
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "batchnorm")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "batchnorm_half")
param.bias.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.scale.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: GlobalConfig.shared.computePrecision)
if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "batchnorm", initContext: initContext)
} else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "batchnorm_half", initContext: initContext)
} else {
fatalError()
}
......
......@@ -41,12 +41,12 @@ class BilinearInterpKernel<P: PrecisionType>: Kernel, Computable{
encoder.endEncoding()
}
required init(device: MTLDevice, param: BilinearInterpParam<P>) {
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "bilinear_interp_float")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "bilinear_interp_half")
required init(device: MTLDevice, param: BilinearInterpParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: GlobalConfig.shared.computePrecision)
if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "bilinear_interp_float", initContext: initContext)
} else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "bilinear_interp_half", initContext: initContext)
} else {
fatalError()
}
......
......@@ -32,12 +32,12 @@ class BoxcoderKernel<P: PrecisionType>: Kernel, Computable{
encoder.endEncoding()
}
required init(device: MTLDevice, param: BoxcoderParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 3, 1, 2], computePrecision: computePrecision)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "boxcoder_float")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "boxcoder_half")
required init(device: MTLDevice, param: BoxcoderParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: [0, 3, 1, 2], computePrecision: GlobalConfig.shared.computePrecision)
if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "boxcoder_float", initContext: initContext)
} else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "boxcoder_half", initContext: initContext)
} else {
fatalError()
}
......
......@@ -52,8 +52,8 @@ class ConcatKernel<P: PrecisionType>: Kernel, Computable{
encoder.endEncoding()
}
required init(device: MTLDevice, param: ConcatParam<P>) {
param.output.initTexture(device: device, inTranspose: param.transpose, computePrecision: computePrecision)
required init(device: MTLDevice, param: ConcatParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: param.transpose, computePrecision: GlobalConfig.shared.computePrecision)
let orank = param.output.tensorDim.cout()
let num = param.input.count
assert(num <= 6)
......@@ -133,16 +133,16 @@ class ConcatKernel<P: PrecisionType>: Kernel, Computable{
}
}
pm.vdim = (Int32(vdim[0]), Int32(vdim[1]), Int32(vdim[2]), Int32(vdim[3]), Int32(vdim[4]), Int32(vdim[5]))
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "concat_\(orank)_\(num)_\(v)_float")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "concat_\(orank)_\(num)_\(v)_half")
if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "concat_\(orank)_\(num)_\(v)_float", initContext: initContext)
} else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "concat_\(orank)_\(num)_\(v)_half", initContext: initContext)
} else {
fatalError()
}
}
required init(device: MTLDevice, testParam: ConcatTestParam) {
super.init(device: device, inFunctionName: "concat")
required init(device: MTLDevice, testParam: ConcatTestParam, initContext: InitContext) {
super.init(device: device, inFunctionName: "concat", initContext: initContext)
}
}
......@@ -16,99 +16,99 @@ import Foundation
class ConvAddAddPreluKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvAddAddPreluParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision)
param.y.initBuffer(device: device, precision: computePrecision)
param.alpha.initBuffer(device: device, precision: computePrecision)
required init(device: MTLDevice, param: ConvAddAddPreluParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: GlobalConfig.shared.computePrecision)
param.filter.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.y.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.alpha.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
if computePrecision == .Float16 {
if GlobalConfig.shared.computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_half")
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_half")
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_half", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_half")
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_half", initContext: initContext)
}
} else if param.filter.channel == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_half")
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_half")
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_half", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_half")
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_half", initContext: initContext)
}
} else if param.filter.width == 3 && param.filter.height == 3 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_half")
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_half")
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_half", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_half")
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_half", initContext: initContext)
}
} else if param.filter.width == 1 && param.filter.height == 5 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_half")
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_half")
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_half", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_half")
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_half", initContext: initContext)
}
} else if param.filter.width == 5 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_half")
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_half")
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_half", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_half")
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_half", initContext: initContext)
}
} else {
fatalError(" unsupport yet ")
}
} else if computePrecision == .Float32 {
} else if GlobalConfig.shared.computePrecision == .Float32 {
if param.filter.width == 1 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_float")
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_float")
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_float", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_float")
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_float", initContext: initContext)
}
} else if param.filter.channel == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_float")
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_float")
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_float", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_float")
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_float", initContext: initContext)
}
} else if param.filter.width == 3 && param.filter.height == 3 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_float")
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_float")
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_float", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_float")
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_float", initContext: initContext)
}
} else if param.filter.width == 1 && param.filter.height == 5 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_float")
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_float")
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_float", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_float")
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_float", initContext: initContext)
}
} else if param.filter.width == 5 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_float")
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_float")
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_float", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_float")
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_float", initContext: initContext)
}
} else {
fatalError(" unsupport yet ")
......
......@@ -37,44 +37,44 @@ struct ConvAddBatchNormReluTestParam: TestParam {
}
class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
required init(device: MTLDevice, testParam: ConvAddBatchNormReluTestParam) {
required init(device: MTLDevice, testParam: ConvAddBatchNormReluTestParam, initContext: InitContext) {
if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1")
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1", initContext: initContext)
} else if testParam.filterSize.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3")
super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3")
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3", initContext: initContext)
}
}
var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvAddBatchNormReluParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision)
param.y.initBuffer(device: device, precision: computePrecision)
required init(device: MTLDevice, param: ConvAddBatchNormReluParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: GlobalConfig.shared.computePrecision)
param.filter.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.y.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.variance.initBuffer(device: device, precision: .Float32)
param.mean.initBuffer(device: device, precision: .Float32)
param.scale.initBuffer(device: device, precision: .Float32)
param.bias.initBuffer(device: device, precision: .Float32)
if computePrecision == .Float32 {
if GlobalConfig.shared.computePrecision == .Float32 {
if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1")
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1", initContext: initContext)
} else if param.filter.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3")
super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3", initContext: initContext)
} else if param.filter.width == 3 && param.filter.height == 3 {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3")
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3", initContext: initContext)
} else {
fatalError(" unsupport ")
}
} else if computePrecision == .Float16 {
} else if GlobalConfig.shared.computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1_half")
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1_half", initContext: initContext)
} else if param.filter.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3_half")
super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3_half", initContext: initContext)
} else if param.filter.width == 3 && param.filter.height == 3 {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3_half")
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3_half", initContext: initContext)
} else {
fatalError(" unsupport ")
}
......@@ -120,10 +120,10 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable
var newBiaseBuffer: MTLBuffer
var newScaleBuffer: MTLBuffer
if computePrecision == .Float32 {
if GlobalConfig.shared.computePrecision == .Float32 {
newBiaseBuffer = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)!
newScaleBuffer = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)!
} else if computePrecision == .Float16 {
} else if GlobalConfig.shared.computePrecision == .Float16 {
newBiaseBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
newScaleBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
......
......@@ -16,36 +16,37 @@ import Foundation
class ConvAddKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvAddParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision)
param.y.initBuffer(device: device, precision: computePrecision)
required init(device: MTLDevice, param: ConvAddParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: GlobalConfig.shared.computePrecision)
let padWhenOneC = !(param.filter.channel == 1 && param.filter.n == param.input.tensorDim[1])
param.filter.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision, padWhenOneC: padWhenOneC)
param.y.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
if computePrecision == .Float16 {
if GlobalConfig.shared.computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_1x1_half")
} else if param.filter.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_half")
super.init(device: device, inFunctionName: "conv_add_1x1_half", initContext: initContext)
} else if param.filter.channel == 1 && param.filter.n == param.input.tensorDim[1] {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_half", initContext: initContext)
} else if param.filter.width == 3 && param.filter.height == 3 {
super.init(device: device, inFunctionName: "conv_add_3x3_half")
super.init(device: device, inFunctionName: "conv_add_3x3_half", initContext: initContext)
} else if param.filter.width == 1 && param.filter.height == 5 {
super.init(device: device, inFunctionName: "conv_add_5x1_half")
super.init(device: device, inFunctionName: "conv_add_5x1_half", initContext: initContext)
} else if param.filter.width == 5 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_1x5_half")
super.init(device: device, inFunctionName: "conv_add_1x5_half", initContext: initContext)
} else {
fatalError(" unsupport yet ")
}
} else if computePrecision == .Float32 {
} else if GlobalConfig.shared.computePrecision == .Float32 {
if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_1x1")
} else if param.filter.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3")
super.init(device: device, inFunctionName: "conv_add_1x1", initContext: initContext)
} else if param.filter.channel == 1 && param.filter.n == param.input.tensorDim[1] {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3", initContext: initContext)
} else if param.filter.width == 1 && param.filter.height == 5 {
super.init(device: device, inFunctionName: "conv_add_5x1")
super.init(device: device, inFunctionName: "conv_add_5x1", initContext: initContext)
} else if param.filter.width == 5 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_1x5")
super.init(device: device, inFunctionName: "conv_add_1x5", initContext: initContext)
} else if param.filter.width == 3 && param.filter.height == 3 {
super.init(device: device, inFunctionName: "conv_add_3x3")
super.init(device: device, inFunctionName: "conv_add_3x3", initContext: initContext)
} else {
fatalError(" unsupport yet ")
}
......
......@@ -16,99 +16,99 @@ import Foundation
class ConvAddPreluKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvAddPreluParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision)
param.y.initBuffer(device: device, precision: computePrecision)
param.alpha.initBuffer(device: device, precision: computePrecision)
required init(device: MTLDevice, param: ConvAddPreluParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: GlobalConfig.shared.computePrecision)
param.filter.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.y.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.alpha.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
if computePrecision == .Float16 {
if GlobalConfig.shared.computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_half")
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_half")
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_half", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_half")
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_half", initContext: initContext)
}
} else if param.filter.channel == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_half")
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_half")
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_half", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_half")
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_half", initContext: initContext)
}
} else if param.filter.width == 3 && param.filter.height == 3 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_half")
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_half")
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_half", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_half")
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_half", initContext: initContext)
}
} else if param.filter.width == 1 && param.filter.height == 5 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_half")
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_half")
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_half", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_half")
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_half", initContext: initContext)
}
} else if param.filter.width == 5 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_half")
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_half")
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_half", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_half")
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_half", initContext: initContext)
}
} else {
fatalError(" unsupport yet ")
}
} else if computePrecision == .Float32 {
} else if GlobalConfig.shared.computePrecision == .Float32 {
if param.filter.width == 1 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_float")
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_float")
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_float", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_float")
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_float", initContext: initContext)
}
} else if param.filter.channel == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_float")
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_float")
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_float", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_float")
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_float", initContext: initContext)
}
} else if param.filter.width == 3 && param.filter.height == 3 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_float")
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_float")
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_float", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_float")
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_float", initContext: initContext)
}
} else if param.filter.width == 1 && param.filter.height == 5 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_float")
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_float")
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_float", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_float")
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_float", initContext: initContext)
}
} else if param.filter.width == 5 && param.filter.height == 1 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_float")
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_float")
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_float", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_float")
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_float", initContext: initContext)
}
} else {
fatalError(" unsupport yet ")
......
......@@ -38,44 +38,44 @@ struct ConvBNReluTestParam: TestParam {
}
class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
required init(device: MTLDevice, testParam: ConvBNReluTestParam) {
required init(device: MTLDevice, testParam: ConvBNReluTestParam, initContext: InitContext) {
if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 {
super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1")
super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1", initContext: initContext)
} else if testParam.filterSize.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3")
super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3")
super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3", initContext: initContext)
}
}
var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvBNReluParam<P>) {
required init(device: MTLDevice, param: ConvBNReluParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision)
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: GlobalConfig.shared.computePrecision)
param.filter.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.variance.initBuffer(device: device, precision: .Float32)
param.mean.initBuffer(device: device, precision: .Float32)
param.scale.initBuffer(device: device, precision: .Float32)
param.bias.initBuffer(device: device, precision: .Float32)
if computePrecision == .Float32 {
if GlobalConfig.shared.computePrecision == .Float32 {
if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1")
super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1", initContext: initContext)
} else if param.filter.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3")
super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3", initContext: initContext)
} else if param.filter.width == 3 && param.filter.height == 3 {
super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3")
super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3", initContext: initContext)
} else {
fatalError(" unsupport ")
}
} else if computePrecision == .Float16 {
} else if GlobalConfig.shared.computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1_half")
super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1_half", initContext: initContext)
} else if param.filter.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3_half")
super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3_half", initContext: initContext)
} else if param.filter.width == 3 && param.filter.height == 3 {
super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3_half")
super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3_half", initContext: initContext)
} else {
fatalError(" unsupport ")
}
......@@ -122,10 +122,10 @@ class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
var newBiaseBuffer: MTLBuffer
var newScaleBuffer: MTLBuffer
if computePrecision == .Float32 {
if GlobalConfig.shared.computePrecision == .Float32 {
newBiaseBuffer = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)!
newScaleBuffer = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)!
} else if computePrecision == .Float16 {
} else if GlobalConfig.shared.computePrecision == .Float16 {
newBiaseBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
newScaleBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
......
......@@ -26,14 +26,14 @@ public struct MetalConvParam {
class ConvKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvParam<P>) {
required init(device: MTLDevice, param: ConvParam<P>, initContext: InitContext) {
param.filter.initBuffer(device: device, precision: ComputePrecision.Float32)
if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_1x1")
super.init(device: device, inFunctionName: "conv_1x1", initContext: initContext)
} else if param.filter.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_3x3")
super.init(device: device, inFunctionName: "depthwise_conv_3x3", initContext: initContext)
} else if param.filter.width == 3 && param.filter.height == 3 {
super.init(device: device, inFunctionName: "conv_3x3")
super.init(device: device, inFunctionName: "conv_3x3", initContext: initContext)
} else {
fatalError(" unsupport ")
}
......
......@@ -30,18 +30,18 @@ struct MetalConvTransposeParam {
class ConvTransposeKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: MetalConvTransposeParam!
required init(device: MTLDevice, param: ConvTransposeParam<P>) {
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision, convertToNHWC: false, withTranspose: true)
if computePrecision == .Float32 {
required init(device: MTLDevice, param: ConvTransposeParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: GlobalConfig.shared.computePrecision)
param.filter.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision, convertToNHWC: false, withTranspose: true)
if GlobalConfig.shared.computePrecision == .Float32 {
if param.stride == [2, 2] && param.stride == [2, 2] {
super.init(device: device, inFunctionName: "conv_transpose2x2_stride2")
super.init(device: device, inFunctionName: "conv_transpose2x2_stride2", initContext: initContext)
} else {
fatalError(" -- conv transpose unsupported yet -- ")
}
} else if computePrecision == .Float16 {
} else if GlobalConfig.shared.computePrecision == .Float16 {
if param.stride == [2, 2] && param.stride == [2, 2] {
super.init(device: device, inFunctionName: "conv_transpose2x2_stride2_half")
super.init(device: device, inFunctionName: "conv_transpose2x2_stride2_half", initContext: initContext)
} else {
fatalError(" -- conv transpose unsupported yet -- ")
}
......
......@@ -26,8 +26,8 @@ struct ElementwiseAddMetalParam {
class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: ElementwiseAddMetalParam
required init(device: MTLDevice, param: ElementwiseAddParam<P>) {
param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision)
required init(device: MTLDevice, param: ElementwiseAddParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: GlobalConfig.shared.computePrecision)
metalParam = ElementwiseAddMetalParam.init()
......@@ -50,10 +50,10 @@ class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable {
// print("===> elementwise_add fast!!!")
metalParam.fast = 1
}
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "elementwise_add")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "elementwise_add_half")
if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "elementwise_add", initContext: initContext)
} else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "elementwise_add_half", initContext: initContext)
} else {
fatalError()
}
......
......@@ -17,9 +17,9 @@ import Foundation
class ElementwiseAddPreluKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: ElementwiseAddMetalParam
required init(device: MTLDevice, param: ElementwiseAddPreluParam<P>) {
param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision)
param.alpha.initBuffer(device: device, precision: computePrecision)
required init(device: MTLDevice, param: ElementwiseAddPreluParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: GlobalConfig.shared.computePrecision)
param.alpha.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
metalParam = ElementwiseAddMetalParam.init()
......@@ -43,21 +43,21 @@ class ElementwiseAddPreluKernel<P: PrecisionType>: Kernel, Computable {
metalParam.fast = 1
}
if computePrecision == .Float32 {
if GlobalConfig.shared.computePrecision == .Float32 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "elementwise_add_channel_float")
super.init(device: device, inFunctionName: "elementwise_add_channel_float", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "elementwise_add_element_float")
super.init(device: device, inFunctionName: "elementwise_add_element_float", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "elementwise_add_prelu_float")
super.init(device: device, inFunctionName: "elementwise_add_prelu_float", initContext: initContext)
}
} else if computePrecision == .Float16 {
} else if GlobalConfig.shared.computePrecision == .Float16 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "elementwise_add_channel_half")
super.init(device: device, inFunctionName: "elementwise_add_channel_half", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "elementwise_add_channel_half")
super.init(device: device, inFunctionName: "elementwise_add_channel_half", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "elementwise_add_channel_half")
super.init(device: device, inFunctionName: "elementwise_add_channel_half", initContext: initContext)
}
} else {
fatalError()
......
......@@ -13,76 +13,49 @@
limitations under the License. */
import Foundation
import Metal
class FetchParam<P: PrecisionType>: OpParam{
var output: FetchHolder
let input: Texture<P>
let scope: Scope
required init(opDesc: OpDesc, inScope: Scope) throws {
scope = inScope
do {
input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope)
output = FetchHolder.init(inCapacity: input.numel(), inDim: input.tensorDim.dims)
scope.setOutput(output: output)
} catch let error {
throw error
}
}
//typealias ParamPrecisionType = P
}
class FetchKernel<P: PrecisionType>: Kernel, Computable {
func compute(commandBuffer: MTLCommandBuffer, param: FetchParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setBuffer(param.output.resultBuffer!, offset: 0, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.input.metalTexture)
encoder.endEncoding()
}
required init(device: MTLDevice, param: FetchParam<P>) {
required init(device: MTLDevice, param: FetchParam<P>, initContext: InitContext) {
param.output.initBuffer(device: device)
if computePrecision == .Float16 {
if GlobalConfig.shared.computePrecision == .Float16 {
if param.input.transpose == [0, 2, 3, 1] {
super.init(device: device, inFunctionName: "fetch_half")
super.init(device: device, inFunctionName: "fetch_half", initContext: initContext)
} else if param.input.transpose == [0, 1, 2, 3] {
switch param.input.tensorDim.cout() {
case 1, 2:
super.init(device: device, inFunctionName: "fetch_1or2_half", initContext: initContext)
default:
fatalError(" not support ")
}
} else {
// fatalError(" not support ")
super.init(device: device, inFunctionName: "fetch_placeholder_half")
print(" not support ")
fatalError(" not support ")
}
} else if computePrecision == .Float32 {
} else if GlobalConfig.shared.computePrecision == .Float32 {
if param.input.transpose == [0, 2, 3, 1] {
super.init(device: device, inFunctionName: "fetch")
super.init(device: device, inFunctionName: "fetch_float", initContext: initContext)
} else if param.input.transpose == [0, 1, 2, 3] {
switch param.input.tensorDim.cout() {
case 1, 2:
super.init(device: device, inFunctionName: "fetch_1or2_float", initContext: initContext)
default:
fatalError(" not support ")
}
} else {
print(" not support ")
super.init(device: device, inFunctionName: "fetch_placeholder")
// fatalError(" not support ")
fatalError(" not support ")
}
} else {
fatalError(" not support ")
}
}
}
class FetchOp<P: PrecisionType>: Operator< FetchKernel<P>, FetchParam<P>>, Runable, Creator, InferShaperable {
typealias OpType = FetchOp<P>
func inferShape() {
print(para.input.dim)
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
func compute(commandBuffer: MTLCommandBuffer, param: FetchParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setBuffer(param.output.resultBuffer!, offset: 0, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.input.metalTexture)
encoder.endEncoding()
}
}
......@@ -26,8 +26,8 @@ class FlattenKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: FlattenMetalParam
required init(device: MTLDevice, param: FlattenParam<P>) {
param.output.initTexture(device: device, computePrecision: computePrecision)
required init(device: MTLDevice, param: FlattenParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, computePrecision: GlobalConfig.shared.computePrecision)
var id: [Int32] = [1, 1, 1, 1]
for i in 0..<param.input.tensorDim.cout() {
id[4-param.input.tensorDim.cout()+i] = Int32(param.input.tensorDim[i])
......@@ -47,10 +47,10 @@ class FlattenKernel<P: PrecisionType>: Kernel, Computable{
let irank = param.input.tensorDim.cout()
let orank = param.output.tensorDim.cout()
assert(orank == 2)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "reshape_\(irank)_2_float")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "reshape_\(irank)_2_half")
if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "reshape_\(irank)_2_float", initContext: initContext)
} else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "reshape_\(irank)_2_half", initContext: initContext)
} else {
fatalError()
}
......
......@@ -17,16 +17,16 @@ import Foundation
class MulticlassNMSKernel<P: PrecisionType>: Kernel, Computable{
let pipline1: MTLComputePipelineState
required init(device: MTLDevice, param: MulticlassNMSParam<P>) {
required init(device: MTLDevice, param: MulticlassNMSParam<P>, initContext: InitContext) {
param.middleOutput.initBuffer(device: device)
param.bboxOutput.initBuffer(device: device)
if computePrecision == .Float32 {
pipline1 = device.pipeLine(funcName: "nms_fetch_bbox", inPaddleMobileLib: true)
super.init(device: device, inFunctionName: "nms_fetch_result")
} else if computePrecision == .Float16 {
pipline1 = device.pipeLine(funcName: "nms_fetch_bbox_half", inPaddleMobileLib: true)
super.init(device: device, inFunctionName: "nms_fetch_result_half")
if GlobalConfig.shared.computePrecision == .Float32 {
pipline1 = device.pipeLine(funcName: "nms_fetch_bbox", metalLoadMode: initContext.metalLoadMode, metalLibPath: initContext.metalLibPath)
super.init(device: device, inFunctionName: "nms_fetch_result", initContext: initContext)
} else if GlobalConfig.shared.computePrecision == .Float16 {
pipline1 = device.pipeLine(funcName: "nms_fetch_bbox_half", metalLoadMode: initContext.metalLoadMode, metalLibPath: initContext.metalLibPath)
super.init(device: device, inFunctionName: "nms_fetch_result_half", initContext: initContext)
} else {
fatalError( " unsupport precision " )
}
......
......@@ -26,8 +26,8 @@ struct PoolMetalParam {
class PoolKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: PoolMetalParam
required init(device: MTLDevice, param: PoolParam<P>) {
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
required init(device: MTLDevice, param: PoolParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: GlobalConfig.shared.computePrecision)
var poolType: Int32
switch param.poolType {
......@@ -48,10 +48,10 @@ class PoolKernel<P: PrecisionType>: Kernel, Computable{
poolType: poolType
)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "pool")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "pool_half")
if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "pool_float", initContext: initContext)
} else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "pool_half", initContext: initContext)
} else {
fatalError()
}
......
......@@ -15,24 +15,24 @@
import Foundation
class PreluKernel<P: PrecisionType>: Kernel, Computable{
required init(device: MTLDevice, param: PreluParam<P>) {
param.alpha.initBuffer(device: device, precision: computePrecision)
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
if computePrecision == .Float32 {
required init(device: MTLDevice, param: PreluParam<P>, initContext: InitContext) {
param.alpha.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: GlobalConfig.shared.computePrecision)
if GlobalConfig.shared.computePrecision == .Float32 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "prelu_channel")
super.init(device: device, inFunctionName: "prelu_channel", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "prelu_element")
super.init(device: device, inFunctionName: "prelu_element", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "prelu_other")
super.init(device: device, inFunctionName: "prelu_other", initContext: initContext)
}
} else if computePrecision == .Float16 {
} else if GlobalConfig.shared.computePrecision == .Float16 {
if param.mode == "channel" {
super.init(device: device, inFunctionName: "prelu_channel_half")
super.init(device: device, inFunctionName: "prelu_channel_half", initContext: initContext)
} else if param.mode == "element" {
super.init(device: device, inFunctionName: "prelu_element_half")
super.init(device: device, inFunctionName: "prelu_element_half", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "prelu_other_half")
super.init(device: device, inFunctionName: "prelu_other_half", initContext: initContext)
}
} else {
fatalError()
......
......@@ -32,29 +32,28 @@ struct PriorBoxMetalParam {
class PriorBoxKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: PriorBoxMetalParam!
required init(device: MTLDevice, param: PriorBoxParam<P>) {
required init(device: MTLDevice, param: PriorBoxParam<P>, initContext: InitContext) {
let originDim = param.output.tensorDim;
param.output.tensorDim = Dim.init(inDim: [1, originDim[0], originDim[1], originDim[2] * originDim[3]])
param.output.padToFourDim = Dim.init(inDim: [1, originDim[0], originDim[1], originDim[2] * originDim[3]])
param.output.initTexture(device: device, inTranspose: [0, 1, 2, 3], computePrecision: computePrecision)
param.outputVariances.initTexture(device: device, inTranspose: [2, 0, 1, 3], computePrecision: computePrecision)
param.output.initTexture(device: device, inTranspose: [0, 1, 2, 3], computePrecision: GlobalConfig.shared.computePrecision)
param.outputVariances.initTexture(device: device, inTranspose: [2, 0, 1, 3], computePrecision: GlobalConfig.shared.computePrecision)
if computePrecision == .Float32 {
if GlobalConfig.shared.computePrecision == .Float32 {
if param.min_max_aspect_ratios_order {
super.init(device: device, inFunctionName: "prior_box_MinMaxAspectRatiosOrder")
super.init(device: device, inFunctionName: "prior_box_MinMaxAspectRatiosOrder", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "prior_box")
super.init(device: device, inFunctionName: "prior_box", initContext: initContext)
}
} else if computePrecision == .Float16 {
} else if GlobalConfig.shared.computePrecision == .Float16 {
if param.min_max_aspect_ratios_order {
super.init(device: device, inFunctionName: "prior_box_MinMaxAspectRatiosOrder_half")
super.init(device: device, inFunctionName: "prior_box_MinMaxAspectRatiosOrder_half", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "prior_box_half")
super.init(device: device, inFunctionName: "prior_box_half", initContext: initContext)
}
} else {
fatalError()
......@@ -105,12 +104,12 @@ class PriorBoxKernel<P: PrecisionType>: Kernel, Computable{
}
}
if computePrecision == .Float16 {
if GlobalConfig.shared.computePrecision == .Float16 {
let buffer = device.makeBuffer(length: outputAspectRatior.count * MemoryLayout<Float16>.size)
float32ToFloat16(input: &outputAspectRatior, output:(buffer?.contents())!, count: outputAspectRatior.count)
param.newAspectRatios = buffer
} else if computePrecision == .Float32 {
} else if GlobalConfig.shared.computePrecision == .Float32 {
let buffer = device.makeBuffer(bytes: outputAspectRatior, length: outputAspectRatior.count * MemoryLayout<Float32>.size, options: [])
param.newAspectRatios = buffer
} else {
......
......@@ -25,11 +25,11 @@ class ReluKernel<P: PrecisionType>: Kernel, Computable{
encoder.endEncoding()
}
required init(device: MTLDevice, param: ReluParam<P>) {
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "relu")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "relu_half")
required init(device: MTLDevice, param: ReluParam<P>, initContext: InitContext) {
if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "relu", initContext: initContext)
} else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "relu_half", initContext: initContext)
} else {
fatalError()
}
......
......@@ -31,8 +31,8 @@ class ReshapeKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: ReshapeMetalParam
required init(device: MTLDevice, param: ReshapeParam<P>) {
param.output.initTexture(device: device, computePrecision: computePrecision)
required init(device: MTLDevice, param: ReshapeParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, computePrecision: GlobalConfig.shared.computePrecision)
var id: [Int32] = [1, 1, 1, 1]
for i in 0..<param.input.tensorDim.cout() {
id[4-param.input.tensorDim.cout()+i] = Int32(param.input.tensorDim[i])
......@@ -51,23 +51,23 @@ class ReshapeKernel<P: PrecisionType>: Kernel, Computable{
)
let irank = param.input.tensorDim.cout()
let orank = param.output.tensorDim.cout()
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "reshape_\(irank)_\(orank)_float")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "reshape_\(irank)_\(orank)_half")
if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "reshape_\(irank)_\(orank)_float", initContext: initContext)
} else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "reshape_\(irank)_\(orank)_half", initContext: initContext)
} else {
fatalError()
}
}
required init(device: MTLDevice, testParam: ReshapeTestParam) {
required init(device: MTLDevice, testParam: ReshapeTestParam, initContext: InitContext) {
metalParam = ReshapeMetalParam.init(
idim: (0, 0, 0, 0),
itrans: (0, 0, 0, 0),
odim: (0, 0, 0, 0),
otrans: (0, 0, 0, 0)
)
super.init(device: device, inFunctionName: "reshape")
super.init(device: device, inFunctionName: "reshape", initContext: initContext)
}
func compute(commandBuffer: MTLCommandBuffer, param: ReshapeParam<P>) throws {
......
......@@ -20,6 +20,17 @@ struct ResizeBilinearMetalParam {
}
class ResizeBilinearKernel<P: PrecisionType>: Kernel, Computable{
required init(device: MTLDevice, param: ResizeBilinearParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: GlobalConfig.shared.computePrecision)
if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "resize_bilinear", initContext: initContext)
} else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "resize_bilinear_half", initContext: initContext)
} else {
fatalError()
}
}
func compute(commandBuffer: MTLCommandBuffer, param: ResizeBilinearParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
......@@ -35,15 +46,6 @@ class ResizeBilinearKernel<P: PrecisionType>: Kernel, Computable{
encoder.endEncoding()
}
required init(device: MTLDevice, param: ResizeBilinearParam<P>) {
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "resize_bilinear")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "resize_bilinear_half")
} else {
fatalError()
}
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ScaleKernel: CusomKernel {
init(device: MTLDevice, shape: Shape, metalLoadMode: MetalLoadMode, metalLibPath: String?) {
if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "scale", outputDim: shape, metalLoadModel: metalLoadMode, metalLibPath: metalLibPath)
} else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "scale_half", outputDim: shape, metalLoadModel: metalLoadMode, metalLibPath: metalLibPath)
} else {
fatalError(" unsupport ")
}
}
}
......@@ -28,12 +28,12 @@ class ShapeKernel<P: PrecisionType>: Kernel, Computable{
// encoder.endEncoding()
}
required init(device: MTLDevice, param: ShapeParam<P>) {
param.output.initTexture(device: device, computePrecision: computePrecision)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "shape")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "shape_half")
required init(device: MTLDevice, param: ShapeParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, computePrecision: GlobalConfig.shared.computePrecision)
if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "shape", initContext: initContext)
} else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "shape_half", initContext: initContext)
} else {
fatalError()
}
......
......@@ -22,16 +22,16 @@ struct SoftmaxMetalParam {
class SoftmaxKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: SoftmaxMetalParam
required init(device: MTLDevice, param: SoftmaxParam<P>) {
param.output.initTexture(device: device, computePrecision: computePrecision)
required init(device: MTLDevice, param: SoftmaxParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, computePrecision: GlobalConfig.shared.computePrecision)
metalParam = SoftmaxMetalParam.init(
N: Int32(param.input.tensorDim[0]),
K: Int32(param.input.tensorDim[1])
)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "softmax_float")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "softmax_half")
if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "softmax_float", initContext: initContext)
} else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "softmax_half", initContext: initContext)
} else {
fatalError()
}
......
......@@ -37,13 +37,13 @@ class SplitKernel<P: PrecisionType>: Kernel, Computable{
encoder.endEncoding()
}
required init(device: MTLDevice, param: SplitParam<P>) {
required init(device: MTLDevice, param: SplitParam<P>, initContext: InitContext) {
// param.output.initTexture(device: device, computePrecision: computePrecision)
let num = param.outputList.count
let rank = param.input.tensorDim.cout()
assert(num >= 2 && num <= 4)
for output in param.outputList {
output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: GlobalConfig.shared.computePrecision)
}
smp = SplitMetalParam.init()
smp.idim = (Int32(param.input.dim[0]), Int32(param.input.dim[1]), Int32(param.input.dim[2]), Int32(param.input.dim[3]))
......@@ -81,10 +81,10 @@ class SplitKernel<P: PrecisionType>: Kernel, Computable{
if v == "normal" {
fatalError("split unsupported")
}
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "split_\(rank)_\(num)_\(v)_float")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "split_\(rank)_\(num)_\(v)_half")
if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "split_\(rank)_\(num)_\(v)_float", initContext: initContext)
} else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "split_\(rank)_\(num)_\(v)_half", initContext: initContext)
} else {
fatalError()
}
......
......@@ -33,12 +33,12 @@ class Texture2DTo2DArrayKernel<P: PrecisionType>: Kernel, Computable{
encoder.endEncoding()
}
required init(device: MTLDevice, param: FeedParam<P>) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision)
if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "texture2d_to_2d_array_half")
} else if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "texture2d_to_2d_array")
required init(device: MTLDevice, param: FeedParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: GlobalConfig.shared.computePrecision)
if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "texture2d_to_2d_array_half", initContext: initContext)
} else if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "texture2d_to_2d_array", initContext: initContext)
} else {
fatalError()
}
......
......@@ -22,8 +22,8 @@ struct TransposeMetalParam {
class TransposeKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: TransposeMetalParam = TransposeMetalParam.init()
required init(device: MTLDevice, param: TransposeParam<P>) {
param.output.initTexture(device: device, computePrecision: computePrecision)
required init(device: MTLDevice, param: TransposeParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, computePrecision: GlobalConfig.shared.computePrecision)
let rank = param.input.tensorDim.cout()
var axis: [Int] = [0, 1, 2, 3]
for i in 0..<param.axis.count {
......@@ -43,13 +43,13 @@ class TransposeKernel<P: PrecisionType>: Kernel, Computable {
metalParam.oC = Int32(param.output.dim[3])
metalParam.axis = (Int32(naxis[0]), Int32(naxis[1]), Int32(naxis[2]), Int32(naxis[3]))
var kernelFunc = "transpose_undefined"
if computePrecision == .Float16 {
if GlobalConfig.shared.computePrecision == .Float16 {
if param.input.transpose == axis {
kernelFunc = "transpose_copy_half"
} else {
kernelFunc = "transpose_\(rank)_half"
}
} else if computePrecision == .Float32 {
} else if GlobalConfig.shared.computePrecision == .Float32 {
if param.input.transpose == axis {
kernelFunc = "transpose_copy_float"
} else {
......@@ -60,7 +60,7 @@ class TransposeKernel<P: PrecisionType>: Kernel, Computable {
}
print("===========>", kernelFunc)
print(metalParam)
super.init(device: device, inFunctionName: kernelFunc)
super.init(device: device, inFunctionName: kernelFunc, initContext: initContext)
}
func compute(commandBuffer: MTLCommandBuffer, param: TransposeParam<P>) throws {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void batchnorm(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 * nscale [[buffer(0)]],
const device float4 * nbias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
const float4 input = inTexture.read(gid.xy, gid.z);
float4 output = input * nscale[gid.z] + nbias[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void batchnorm_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 * newScale [[buffer(0)]],
const device half4 * newBias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
const half4 input = inTexture.read(gid.xy, gid.z);
half4 output = input * newScale[gid.z] + newBias[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
//
// BatchNormRelu.metal
// paddle-mobile
//
#include <metal_stdlib>
using namespace metal;
struct MetalConvParam {
short offsetX;
short offsetY;
short offsetZ;
ushort strideX;
ushort strideY;
};
kernel void batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *new_scale [[buffer(0)]],
const device float4 *new_biase [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
float4 input;
float4 output;
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
input = inTexture.sample(sample, gid.x, gid.y, gid.z);
output = fmax(input * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define FUNC(f, p) CONCAT2_(f, p)
#define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC(bilinear_interp, P)(texture2d_array<P, access::read> input [[texture(0)]],
texture2d_array<P, access::write> output [[texture(1)]],
constant bilinear_interp_param & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) r;
if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
r = input.read(gid.xy, gid.z);
} else {
P w = gid.x * pm.ratio_w;
P h = gid.y * pm.ratio_h;
uint w0 = w, h0 = h;
uint w1 = w0 + 1, h1 = h0 + 1;
P w1lambda = w - w0, h1lambda = h - h0;
P w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda;
if (w1 >= input.get_width()) w1 = w0;
if (h1 >= input.get_height()) h1 = h0;
VECTOR(P, 4) r0 = input.read(uint2(w0, h0), gid.z);
VECTOR(P, 4) r1 = input.read(uint2(w1, h0), gid.z);
VECTOR(P, 4) r2 = input.read(uint2(w0, h1), gid.z);
VECTOR(P, 4) r3 = input.read(uint2(w1, h1), gid.z);
r = h2lambda * (w2lambda * r0 + w1lambda * r1)
+ h1lambda * (w2lambda * r2 + w1lambda * r3);
}
output.write(r, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct bilinear_interp_param {
float ratio_h;
float ratio_w;
};
#define P float
#include "BilinearInterp.inc.metal"
#undef P
#define P half
#include "BilinearInterp.inc.metal"
#undef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define FUNC(f, p) CONCAT2_(f, p)
#define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC(boxcoder, P)(texture2d_array<P, access::read> priorBox [[texture(0)]],
texture2d_array<P, access::read> priorBoxVar [[texture(1)]],
texture2d_array<P, access::read> targetBox [[texture(2)]],
texture2d_array<P, access::write> output[[texture(3)]],
uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) p = priorBox.read(uint2(0, gid.x), gid.z);
VECTOR(P, 4) pv = priorBoxVar.read(uint2(0, gid.x), gid.z);
VECTOR(P, 4) t;
t[0] = targetBox.read(uint2(0, gid.x), gid.z)[0];
t[1] = targetBox.read(uint2(1, gid.x), gid.z)[0];
t[2] = targetBox.read(uint2(2, gid.x), gid.z)[0];
t[3] = targetBox.read(uint2(3, gid.x), gid.z)[0];
P px = (p.x + p.z) / 2;
P py = (p.y + p.w) / 2;
P pw = p.z - p.x;
P ph = p.w - p.y;
P tx = pv.x * t.x * pw + px;
P ty = pv.y * t.y * ph + py;
P tw = exp(pv.z * t.z) * pw;
P th = exp(pv.w * t.w) * ph;
VECTOR(P, 4) r;
r.x = tx - tw / 2;
r.y = ty - th / 2;
r.z = tx + tw / 2;
r.w = ty + th / 2;
output.write(r, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
#define P float
#include "BoxCoder.inc.metal"
#undef P
#define P half
#include "BoxCoder.inc.metal"
#undef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
inline void xyzn2abcd_1(int xyzn[4], int abcd[4]) {
abcd[0] = abcd[1] = abcd[2] = 0;
abcd[3] = xyzn[0] * 4 + xyzn[3];
}
inline void xyzn2abcd_2(int xyzn[4], int abcd[4]) {
abcd[0] = abcd[1] = 0;
abcd[2] = xyzn[1];
abcd[3] = xyzn[0] * 4 + xyzn[3];
}
inline void xyzn2abcd_3(int xyzn[4], int abcd[4]) {
abcd[0] = 0;
abcd[3] = xyzn[0];
abcd[2] = xyzn[1];
abcd[1] = xyzn[2] * 4 + xyzn[3];
}
inline void xyzn2abcd_4(int C, int xyzn[4], int abcd[4]) {
abcd[2] = xyzn[0];
abcd[1] = xyzn[1];
uint t = xyzn[2] * 4 + xyzn[3];
abcd[0] = t / C;
abcd[3] = t % C;
}
inline void abcd2xyzn_1(int abcd[4], int xyzn[4]) {
xyzn[1] = xyzn[2] = 0;
xyzn[0] = abcd[3] / 4;
xyzn[1] = abcd[3] % 4;
}
inline void abcd2xyzn_2(int abcd[4], int xyzn[4]) {
xyzn[2] = 0;
xyzn[1] = abcd[2];
xyzn[0] = abcd[3] / 4;
xyzn[3] = abcd[3] % 4;
}
inline void abcd2xyzn_3(int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[3];
xyzn[1] = abcd[2];
xyzn[2] = abcd[1] / 4;
xyzn[3] = abcd[1] % 4;
}
inline void abcd2xyzn_4(int C, int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[2];
xyzn[1] = abcd[1];
uint t = abcd[0] * C + abcd[3];
xyzn[2] = t / 4;
xyzn[3] = t % 4;
}
inline void xyzn2abcd(int C, int xyzn[4], int abcd[4]) {
abcd[2] = xyzn[0];
abcd[1] = xyzn[1];
uint t = xyzn[2] * 4 + xyzn[3];
abcd[0] = t / C;
abcd[3] = t % C;
}
inline void abcd2xyzn(int C, int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[2];
xyzn[1] = abcd[1];
uint t = abcd[0] * C + abcd[3];
xyzn[2] = t / 4;
xyzn[3] = t % 4;
}
inline int32_t abcd2index(int32_t dim[4], int32_t abcd[4]) {
int32_t r = abcd[0];
r = r * dim[1] + abcd[1];
r = r * dim[2] + abcd[2];
r = r * dim[3] + abcd[3];
return r;
}
inline void index2abcd(int32_t dim[4], int32_t ind, int32_t abcd[4]) {
abcd[3] = ind % dim[3]; ind /= dim[3];
abcd[2] = ind % dim[2]; ind /= dim[2];
abcd[1] = ind % dim[1]; ind /= dim[1];
abcd[0] = ind;
}
inline void trans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) {
for (int i = 0; i < 4; i++) {
opos[i] = ipos[trans[i]];
}
}
inline void invtrans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) {
for (int i = 0; i < 4; i++) {
opos[trans[i]] = ipos[i];
}
}
struct MetalConvParam {
short offsetX;
short offsetY;
short offsetZ;
ushort strideX;
ushort strideY;
ushort dilationX;
ushort dilationY;
};
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d
#define CONCAT5_(a, b, c, d, e) a ## _ ## b ## _ ## c ## _ ## d ## _ ## e
#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p)
#define VECTOR(p, n) CONCAT2(p, n)
#define FUNC_R(f, r) CONCAT2_(f, r)
#if V == VX
#define VV x
#elif V == VY
#define VV y
#elif V == VZ
#define VV z
#else
#define VV normal
#endif
#if V == VNORMAL
//kernel void FUNC(concat, R, N, normal, P)(array<texture2d_array<P, access::read>, N> in [[texture(0)]],
// texture2d_array<P, access::read> out_x [[texture(N)]],
// texture2d_array<P, access::write> out [[texture(N+1)]],
// constant ConcatParam & pm [[buffer(0)]],
// uint3 gid [[thread_position_in_grid]]) {
//}
kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
texture2d_array<P, access::read> in1 [[texture(1)]],
#if N >= 3
texture2d_array<P, access::read> in2 [[texture(2)]],
#endif
#if N >= 4
texture2d_array<P, access::read> in3 [[texture(3)]],
#endif
#if N >= 5
texture2d_array<P, access::read> in4 [[texture(4)]],
#endif
#if N >= 6
texture2d_array<P, access::read> in5 [[texture(5)]],
#endif
texture2d_array<P, access::read> inx [[texture(N)]],
texture2d_array<P, access::write> out [[texture(N+1)]],
constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
ConcatParam cp = pm;
int xyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}, abcd[4], oxyzn[4];
VECTOR(P, 4) r = inx.read(gid.xy, gid.z);
for (int i = 0; i < 4; i++) {
xyzn[3] = i;
#if R == 4
xyzn2abcd_4(cp.odim[3], xyzn, abcd);
#else
FUNC_R(xyzn2abcd, R)(xyzn, abcd);
#endif
int k = abcd[cp.axis] - cp.offset;
if (k < 0) continue;
int j = 0;
for (; j < N; j++) {
if (k < cp.vdim[j]) {
break;
}
k -= cp.vdim[j];
}
if (j == N) {
continue;
}
int ta = cp.odim[cp.axis];
abcd[cp.axis] = k;
cp.odim[cp.axis] = cp.vdim[j];
#if R == 4
abcd2xyzn_4(cp.odim[3], abcd, oxyzn);
#else
FUNC_R(abcd2xyzn, R)(abcd, oxyzn);
#endif
cp.odim[cp.axis] = ta;
switch (j) {
case 0: r[i] = in0.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
case 1: r[i] = in1.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#if N >= 3
case 2: r[i] = in2.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif
#if N >= 4
case 3: r[i] = in3.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif
#if N >= 5
case 4: r[i] = in4.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif
#if N >= 6
case 5: r[i] = in5.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif
}
}
out.write(r, gid.xy, gid.z);
}
#endif // V == NORMAL
#if V == VX
kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
texture2d_array<P, access::read> in1 [[texture(1)]],
#if N >= 3
texture2d_array<P, access::read> in2 [[texture(2)]],
#endif // N >= 3
#if N >= 4
texture2d_array<P, access::read> in3 [[texture(3)]],
#endif // N >= 4
#if N >= 5
texture2d_array<P, access::read> in4 [[texture(4)]],
#endif // N >= 5
#if N >= 6
texture2d_array<P, access::read> in5 [[texture(5)]],
#endif // N >= 6
texture2d_array<P, access::write> out [[texture(N)]],
constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
int x = gid.x - pm.offset;
if (x < 0) return;
if (x < pm.vdim[0]) {
VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z);
return;
}
x -= pm.vdim[0];
if (x < pm.vdim[1]) {
VECTOR(P, 4) r = in1.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#if N >= 3
x -= pm.vdim[1];
if (x < pm.vdim[2]) {
VECTOR(P, 4) r = in2.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 3
#if N >= 4
x -= pm.vdim[2];
if (x < pm.vdim[3]) {
VECTOR(P, 4) r = in3.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 4
#if N >= 5
x -= pm.vdim[3];
if (x < pm.vdim[4]) {
VECTOR(P, 4) r = in4.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 5
#if N >= 6
x -= pm.vdim[4];
if (x < pm.vdim[5]) {
VECTOR(P, 4) r = in5.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 6
}
#endif // V == VX
#if V == VY
kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
texture2d_array<P, access::read> in1 [[texture(1)]],
#if N >= 3
texture2d_array<P, access::read> in2 [[texture(2)]],
#endif // N >= 3
#if N >= 4
texture2d_array<P, access::read> in3 [[texture(3)]],
#endif // N >= 4
#if N >= 5
texture2d_array<P, access::read> in4 [[texture(4)]],
#endif // N >= 5
#if N >= 6
texture2d_array<P, access::read> in5 [[texture(5)]],
#endif // N >= 6
texture2d_array<P, access::write> out [[texture(N)]],
constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
int y = gid.y - pm.offset;
if (y < 0) return;
if (y < pm.vdim[0]) {
VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z);
return;
}
y -= pm.vdim[0];
if (y < pm.vdim[1]) {
VECTOR(P, 4) r = in1.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#if N >= 3
y -= pm.vdim[1];
if (y < pm.vdim[2]) {
VECTOR(P, 4) r = in2.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 3
#if N >= 4
y -= pm.vdim[2];
if (y < pm.vdim[3]) {
VECTOR(P, 4) r = in3.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 4
#if N >= 5
y -= pm.vdim[3];
if (y < pm.vdim[4]) {
VECTOR(P, 4) r = in4.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 5
#if N >= 6
y -= pm.vdim[4];
if (y < pm.vdim[5]) {
VECTOR(P, 4) r = in5.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 6
}
#endif // V == VY
#if V == VZ
kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
texture2d_array<P, access::read> in1 [[texture(1)]],
#if N >= 3
texture2d_array<P, access::read> in2 [[texture(2)]],
#endif // N >= 3
#if N >= 4
texture2d_array<P, access::read> in3 [[texture(3)]],
#endif // N >= 4
#if N >= 5
texture2d_array<P, access::read> in4 [[texture(4)]],
#endif // N >= 5
#if N >= 6
texture2d_array<P, access::read> in5 [[texture(5)]],
#endif // N >= 6
texture2d_array<P, access::write> out [[texture(N)]],
constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
int z = gid.z - pm.offset;
if (z < 0) return;
if (z < pm.vdim[0]) {
VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z);
return;
}
z -= pm.vdim[0];
if (z < pm.vdim[1]) {
VECTOR(P, 4) r = in1.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#if N >= 3
z -= pm.vdim[1];
if (z < pm.vdim[2]) {
VECTOR(P, 4) r = in2.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 3
#if N >= 4
z -= pm.vdim[2];
if (z < pm.vdim[3]) {
VECTOR(P, 4) r = in3.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 4
#if N >= 5
z -= pm.vdim[3];
if (z < pm.vdim[4]) {
VECTOR(P, 4) r = in4.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 5
#if N >= 6
z -= pm.vdim[4];
if (z < pm.vdim[5]) {
VECTOR(P, 4) r = in5.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 6
}
#endif // V == VZ
#undef VV
#endif // #ifdef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct ConcatParam {
int32_t odim[4];
int32_t axis;
int32_t offset;
int32_t trans[4];
int32_t vdim[6];
};
#define VNORMAL 1
#define VX 2
#define VY 3
#define VZ 4
// >> fast mode
// only support concat_{2,3,4}_{2,3,4,5,6}_y_{float,half}
// only support concat_{3,4}_{2,3,4,5,6}_x_{float,half}
// only support concat_{1,2,3,4}_{2,3,4,5,6}_z_{float,half}
// >> normal mode (loop mode)
// ssd-ar: (R=4, N=3, V=z), (R=3, N=2, V=y), (R=2, N=5, V=x), (R=3, N=5, V=x)
// ssd: (R=2, N=6, V=y), (R=3, N=6, V=y)
// genet: (R=4, N=2, V=normal)
// ssd-ar: (R=3, N=5, V=x)
#define V VX
#define R 3
#define N 5
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd-ar: (R=2, N=5, V=x)
#define V VX
#define R 2
#define N 5
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd-ar: (R=3, N=2, V=y)
#define V VY
#define R 3
#define N 2
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd-ar: (R=4, N=3, V=z)
#define V VZ
#define R 4
#define N 3
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd: (R=2, N=6, V=y)
#define V VY
#define R 2
#define N 6
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd: (R=3, N=6, V=y)
#define V VY
#define R 3
#define N 6
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
#define V VNORMAL
#define R 4
#define N 2
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
#define V VY
#define R 2
#define N 2
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
#define V VY
#define R 2
#define N 5
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
kernel void conv_add_batch_norm_relu_1x1_half(
texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device half4 *new_scale [[buffer(3)]],
const device half4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void conv_add_batch_norm_relu_3x3_half(
texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device half4 *new_scale [[buffer(3)]],
const device half4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void depthwise_conv_add_batch_norm_relu_3x3_half(
texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device half4 *new_scale [[buffer(3)]],
const device half4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
half4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
/*---------------------------------------------*/
kernel void conv_add_batch_norm_relu_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
#pragma mark - convAdd
kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = biase[gid.z];
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = biase[gid.z];
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_5x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = biase[gid.z];
ushort dilation_y = param.dilationY;
float4 input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 2 * dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 2 * dilation_y), i);
for (int j = 0; j < 5; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_1x5(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = biase[gid.z];
ushort dilation_x = param.dilationX;
float4 input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 2 * dilation_x, posInInput.y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x + 2 * dilation_x, posInInput.y), i);
for (int j = 0; j < 5; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_add_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = biase[gid.z];
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
#pragma mark - half
kernel void conv_add_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z];
half4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z];
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
half4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i);
for (int j = 0; j < 9; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(float4(input[j]), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(float4(input[j]), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(float4(input[j]), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(float4(input[j]), float4(weight_w));
}
}
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
half4 output = biase[gid.z];
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
half4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z];
ushort dilation_y = param.dilationY;
half4 input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 2 * dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 2 * dilation_y), i);
for (int j = 0; j < 5; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z];
ushort dilation_x = param.dilationX;
half4 input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 2 * dilation_x, posInInput.y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x + 2 * dilation_x, posInInput.y), i);
for (int j = 0; j < 5; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
kernel void test_conv_add_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
if (gid.x > 0 || gid.y > 0 || gid.z > 0) { return; }
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#include "Macro.metal"
#pragma mark - convAdd
kernel void FUNC3_(conv_add_1x1, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device VECTOR(P, 4) *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
VECTOR(P, 4) output = biase[gid.z];
VECTOR(P, 4) input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample,float2(posInInput.x, posInInput.y), i);
VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
// output = output + float4(biase[gid.z]);
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
kernel void FUNC3_(conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device VECTOR(P, 4) *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
VECTOR(P, 4) output = biase[gid.z];
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
VECTOR(P, 4) input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i);
for (int j = 0; j < 9; ++j) {
VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + float4(biase[gid.z]);
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
kernel void FUNC3_(conv_add_5x1, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device VECTOR(P, 4) *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
VECTOR(P, 4) output = biase[gid.z];;
ushort dilation_y = param.dilationY;
VECTOR(P, 4) input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 2 * dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 2 * dilation_y), i);
for (int j = 0; j < 5; ++j) {
VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
kernel void FUNC3_(conv_add_1x5, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device VECTOR(P, 4) *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
VECTOR(P, 4) output = biase[gid.z];
ushort dilation_x = param.dilationX;
VECTOR(P, 4) input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 2 * dilation_x, posInInput.y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x + 2 * dilation_x, posInInput.y), i);
for (int j = 0; j < 5; ++j) {
VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
kernel void FUNC3_(depthwise_conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device P *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
VECTOR(P, 4) output = biase[gid.z];
VECTOR(P, 4) inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
VECTOR(P, 4) input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
#define P float
#define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE prelu_channel
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT prelu_element
#define PRELU_TYPE prelu_element
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER prelu_other
#define PRELU_TYPE prelu_other
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
#define P half
#define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE prelu_channel
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT prelu_element
#define PRELU_TYPE prelu_element
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER prelu_other
#define PRELU_TYPE prelu_other
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
#pragma mark - conv bn relu
kernel void conv_batch_norm_relu_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *new_scale [[buffer(2)]],
const device float4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *new_scale [[buffer(2)]],
const device float4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
const device float4 *new_scale [[buffer(2)]],
const device float4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
#pragma mark - half
kernel void conv_batch_norm_relu_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *new_scale [[buffer(2)]],
const device half4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(float4(input), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(float4(input), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(float4(input), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(float4(input), float4(weight_w));
}
output = fmax(output * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void conv_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *new_scale [[buffer(2)]],
const device half4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(float4(input[j]), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(float4(input[j]), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(float4(input[j]), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(float4(input[j]), float4(weight_w));
}
}
output = fmax(output * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void depthwise_conv_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half *weights [[buffer(1)]],
const device half4 *new_scale [[buffer(2)]],
const device half4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
half4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = fmax(output * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
// conv
#pragma mark -- conv
kernel void conv_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(float4(input[j]), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(float4(input[j]), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(float4(input[j]), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(float4(input[j]), float4(weight_w));
}
}
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void depthwise_conv_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
half4 input = inputs[j];
output.x += float(input.x) * float(weights[weithTo + 0 * kernelHXW + j]);
output.y += float(input.y) * float(weights[weithTo + 1 * kernelHXW + j]);
output.z += float(input.z) * float(weights[weithTo + 2 * kernelHXW + j]);
output.w += float(input.w) * float(weights[weithTo + 3 * kernelHXW + j]);
}
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void conv_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(float4(input), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(float4(input), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(float4(input), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(float4(input), float4(weight_w));
}
outTexture.write(half4(output), gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct MetalConvTransposeParam{
ushort kernelW;
ushort kernelH;
ushort strideX;
ushort strideY;
ushort paddingX;
ushort paddingY;
ushort dilationX;
ushort dilationY;
};
kernel void conv_transpose2x2_stride2(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvTransposeParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
int input_array_size = inTexture.get_array_size();
int kernel_index_x = gid.x % 2;
int kernel_index_y = gid.y % 2;
int kernel_index = kernel_index_y * 2 + kernel_index_x;
int kernel_to = gid.z * input_array_size * 4 * 4 + (kernel_index * input_array_size);
int input_x = gid.x / 2;
int input_y = gid.y / 2;
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 output = float4(0.0);
for (int i = 0; i < input_array_size; ++i) {
float4 input = inTexture.sample(sample, float2(input_x, input_y), i);
float4 kernel_slice0 = weights[kernel_to + input_array_size * 4 * 0 + i];
float4 kernel_slice1 = weights[kernel_to + input_array_size * 4 * 1 + i];
float4 kernel_slice2 = weights[kernel_to + input_array_size * 4 * 2 + i];
float4 kernel_slice3 = weights[kernel_to + input_array_size * 4 * 3 + i];
output.x += dot(input, kernel_slice0);
output.y += dot(input, kernel_slice1);
output.z += dot(input, kernel_slice2);
output.w += dot(input, kernel_slice3);
}
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_transpose2x2_stride2_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvTransposeParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
int input_array_size = inTexture.get_array_size();
int kernel_index_x = gid.x % 2;
int kernel_index_y = gid.y % 2;
int kernel_index = kernel_index_y * 2 + kernel_index_x;
int kernel_to = gid.z * input_array_size * 4 * 4 + (kernel_index * input_array_size);
int input_x = gid.x / 2;
int input_y = gid.y / 2;
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 output = float4(0.0);
for (int i = 0; i < input_array_size; ++i) {
half4 input = inTexture.sample(sample, float2(input_x, input_y), i);
half4 kernel_slice0 = weights[kernel_to + input_array_size * 4 * 0 + i];
half4 kernel_slice1 = weights[kernel_to + input_array_size * 4 * 1 + i];
half4 kernel_slice2 = weights[kernel_to + input_array_size * 4 * 2 + i];
half4 kernel_slice3 = weights[kernel_to + input_array_size * 4 * 3 + i];
output.x += dot(float4(input), float4(kernel_slice0));
output.y += dot(float4(input), float4(kernel_slice1));
output.z += dot(float4(input), float4(kernel_slice2));
output.w += dot(float4(input), float4(kernel_slice3));
}
outTexture.write(half4(output), gid.xy, gid.z);
}
//kernel void conv_transpose(texture2d_array<float, access::sample> inTexture [[texture(0)]],
// texture2d_array<float, access::write> outTexture [[texture(1)]],
// constant MetalConvTransposeParam &param [[buffer(0)]],
// const device float4 *weights [[buffer(1)]],
// uint3 gid [[thread_position_in_grid]]){
// if (gid.x >= outTexture.get_width() ||
// gid.y >= outTexture.get_height() ||
// gid.z >= outTexture.get_array_size()) {
// return;
// }
//
// int input_array_size = inTexture.get_array_size();
//
// uint kernel_one_output_slice = input_array_size * param.kernelW * param.kernelH;
//
// uint kernel_stride_z = gid.z * 4 * (kernel_one_output_slice);
//
// constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
//
// float4 output;
//
// for (int w = 0; w < param.kernelW; ++w) {
// int top = gid.x - w * param.dilationX + param.paddingX;
// int input_x = top / param.strideX;
// if (top < 0 || input_x >= int(inTexture.get_width())) {
// continue;
// }
//
// for (int h = 0; h < param.kernelH; ++h) {
// int top_y = gid.y - h * param.dilationY + param.paddingY;
// int input_y = top_y / param.strideY;
// if (top_y < 0 || input_y >= int(inTexture.get_height())) {
// continue;
// }
//
// uint kernel_index = (w * param.kernelH + h) * inTexture.get_array_size();
//
// for (int slice = 0; slice < input_array_size; ++slice) {
//
// float4 input;
// float4 kernel_slice = weights[kernel_stride_z + 0 * kernel_one_output_slice + kernel_index + slice];
// float4 kernel_slice1 = weights[kernel_stride_z + 1 * kernel_one_output_slice + kernel_index + slice];
//
// float4 kernel_slice2 = weights[kernel_stride_z + 2 * kernel_one_output_slice + kernel_index + slice];
//
// float4 kernel_slice3 = weights[kernel_stride_z + 3 * kernel_one_output_slice + kernel_index + slice];
//
// input = inTexture.sample(sample, float2(input_x, input_y), slice);
// output.x += dot(input, kernel_slice);
// output.y += dot(input, kernel_slice1);
// output.z += dot(input, kernel_slice2);
// output.w += dot(input, kernel_slice3);
// }
// }
// }
//
// outTexture.write(output, gid.xy, gid.z);
//}
//
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct ElementwiseAddParam {
int32_t fast;
int32_t axis;
int32_t ylen;
int32_t xdim[4];
int32_t xtrans[4];
int32_t ydim[4];
int32_t ytrans[4];
};
kernel void elementwise_add(texture2d_array<float, access::read> inputX [[texture(0)]],
texture2d_array<float, access::read> inputY [[texture(1)]],
texture2d_array<float, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
float4 rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
} else {
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
}
float4 r = rx + ry;
outTexture.write(r, gid.xy, gid.z);
}
kernel void elementwise_add_half(texture2d_array<half, access::read> inputX [[texture(0)]],
texture2d_array<half, access::read> inputY [[texture(1)]],
texture2d_array<half, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
half4 rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
} else {
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
}
half4 r = rx + ry;
outTexture.write(r, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#include <metal_stdlib>
#include "Macro.metal"
using namespace metal;
kernel void FUNC3_(elementwise_add, PRELU_TYPE, P)(texture2d_array<P, access::read> inputX [[texture(0)]],
texture2d_array<P, access::read> inputY [[texture(1)]],
texture2d_array<P, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(1)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(1)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(1)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
VECTOR(P, 4) rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
} else {
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
}
VECTOR(P, 4) output = rx + ry;
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(output, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct ElementwiseAddParam {
int32_t fast;
int32_t axis;
int32_t ylen;
int32_t xdim[4];
int32_t xtrans[4];
int32_t ydim[4];
int32_t ytrans[4];
};
#define P float
#define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE channel
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT element
#define PRELU_TYPE prelu_element
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER other
#define PRELU_TYPE prelu_other
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
#define P half
#define PRELU_CHANNEL channel
#define PRELU_TYPE channel
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT element
#define PRELU_TYPE prelu_element
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER other
#define PRELU_TYPE prelu_other
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
......@@ -12,34 +12,40 @@
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
#ifdef P
kernel void fetch(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
#define CONCAT2_(a, b) a ## _ ## b
#define CONCAT2(a, b) a ## b
#define FUNC(m, n, q) CONCAT3_(m, n, q)
#define FUNC_T(m, n) CONCAT2_(m, n)
#define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC_T(fetch, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
int input_height = inTexture.get_height();
const float4 input = inTexture.read(gid.xy, gid.z);
const VECTOR(P, 4) input = inTexture.read(gid.xy, gid.z);
int output_to = 4 * input_width * input_height;
output[gid.z * output_to + 0 * input_width * input_height + gid.y * input_width + gid.x] = input.x;
output[gid.z * output_to + 1 * input_width * input_height + gid.y * input_width + gid.x] = input.y;
output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z;
output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w;
}
kernel void fetch_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
device float * output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
kernel void FUNC(fetch, 1or2, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
device float4 *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
......@@ -47,25 +53,9 @@ kernel void fetch_half(texture2d_array<half, access::read> inTexture [[texture(0
}
int input_width = inTexture.get_width();
int input_height = inTexture.get_height();
const half4 input = inTexture.read(gid.xy, gid.z);
int output_to = 4 * input_width * input_height;
output[gid.z * output_to + 0 * input_width * input_height + gid.y * input_width + gid.x] = input.x;
output[gid.z * output_to + 1 * input_width * input_height + gid.y * input_width + gid.x] = input.y;
output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z;
output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w;
}
kernel void fetch_placeholder(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
}
kernel void fetch_placeholder_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
const VECTOR(P, 4) input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = float4(input);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
#define P float
#include "FetchKernel.inc.metal"
#undef P
#define P half
#include "FetchKernel.inc.metal"
#undef P
kernel void fetch_placeholder(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
}
kernel void fetch_placeholder_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
// 占位函数, 啥也没干
kernel void place_holder(texture2d<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
}
struct OutputDim {
ushort width;
ushort height;
ushort strideX;
ushort strideY;
};
kernel void resize(texture2d<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant OutputDim &params [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint2 pos = gid.xy * uint2(params.strideX, params.strideY);
const half4 input = inTexture.read(pos);
outTexture.write(half4(input.x, input.y, input.z, input.w), gid.xy, gid.z);
}
kernel void texture2d_to_2d_array(texture2d<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height()){
return;
}
const float4 input = inTexture.read(gid.xy);
outTexture.write(input, gid.xy, 0);
}
kernel void texture2d_to_2d_array_half(texture2d<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height()){
return;
}
const half4 input = inTexture.read(gid.xy);
outTexture.write(input, gid.xy, 0);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d
#define CONCAT5_(a, b, c, d, e) a ## _ ## b ## _ ## c ## _ ## d ## _ ## e
#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p)
#define VECTOR(p, n) CONCAT2(p, n)
#define FUNC2_(a, b) CONCAT2_(a, b)
#define FUNC3_(a, b, c) CONCAT3_(a, b, c)
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void nms_fetch_result(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
const float4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = input.x;
}
kernel void nms_fetch_result_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
const half4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = input.x;
}
kernel void nms_fetch_bbox(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float4 *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
// int input_height = inTexture.get_height();
const float4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = input;
}
kernel void nms_fetch_bbox_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
device float4 *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
// int input_height = inTexture.get_height();
const half4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = float4(input);
}
......@@ -12,22 +12,10 @@
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
#ifdef P
struct PoolParam {
int ksizeX;
int ksizeY;
int strideX;
int strideY;
int paddingX;
int paddingY;
int poolType;
};
kernel void pool(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
kernel void FUNC2_(pool, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant PoolParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
......@@ -40,7 +28,7 @@ kernel void pool(texture2d_array<float, access::read> inTexture [[texture(0)]],
int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
ymin = max(ymin, 0);
float4 r = 0;
VECTOR(P, 4) r = 0;
if (pm.poolType == 0) {
r = inTexture.read(uint2(xmin, ymin), gid.z);
for (int x = xmin; x < xmax; x++) {
......@@ -54,40 +42,9 @@ kernel void pool(texture2d_array<float, access::read> inTexture [[texture(0)]],
r += inTexture.read(uint2(x, y), gid.z);
}
}
r /= pm.ksizeX * pm.ksizeY;
r /= (xmax - xmin) * (ymax - ymin);
}
outTexture.write(r, gid.xy, gid.z);
}
kernel void pool_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant PoolParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
int xmin = gid.x * pm.strideX - pm.paddingX;
int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
xmin = max(xmin, 0);
int ymin = gid.y * pm.strideX - pm.paddingX;
int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
ymin = max(ymin, 0);
half4 r = 0;
if (pm.poolType == 0) {
r = inTexture.read(uint2(xmin, ymin), gid.z);
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r = fmax(r, inTexture.read(uint2(x, y), gid.z));
}
}
} else if (pm.poolType == 1) {
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r += inTexture.read(uint2(x, y), gid.z);
}
}
r /= pm.ksizeX * pm.ksizeY;
}
outTexture.write(r, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Macro.metal"
using namespace metal;
struct PoolParam {
int ksizeX;
int ksizeY;
int strideX;
int strideY;
int paddingX;
int paddingY;
int poolType;
};
#define P half
#include "PoolKernel.inc.metal"
#undef P
#define P float
#include "PoolKernel.inc.metal"
#undef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void prelu_channel(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
float4 alpha_value = alpha[gid.z];
float4 output;
output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_element(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
int alpha_to = (gid.y * inTexture.get_width() + gid.x) * inTexture.get_array_size();
float4 alpha_value = alpha[alpha_to + gid.z];
float4 output;
output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_other(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
float alpha_value = alpha[0];
float4 output;
output.x = input.x > 0 ? input.x : (alpha_value * input.x);
output.y = input.y > 0 ? input.y : (alpha_value * input.y);
output.z = input.z > 0 ? input.z : (alpha_value * input.z);
output.w = input.w > 0 ? input.w : (alpha_value * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_channel_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
half4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
half4 alpha_value = alpha[gid.z];
half4 output;
output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_element_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
half4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
int alpha_to = (gid.y * inTexture.get_width() + gid.x) * inTexture.get_array_size();
half4 alpha_value = alpha[alpha_to + gid.z];
half4 output;
output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_other_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
half4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
half alpha_value = alpha[0];
half4 output;
output.x = input.x > 0 ? input.x : (alpha_value * input.x);
output.y = input.y > 0 ? input.y : (alpha_value * input.y);
output.z = input.z > 0 ? input.z : (alpha_value * input.z);
output.w = input.w > 0 ? input.w : (alpha_value * input.w);
outTexture.write(output, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct PriorBoxMetalParam {
float offset;
float stepWidth;
float stepHeight;
float minSize;
float maxSize;
float imageWidth;
float imageHeight;
bool clip;
uint numPriors;
uint aspecRatiosSize;
uint minSizeSize;
uint maxSizeSize;
};
kernel void prior_box(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outBoxTexture [[texture(1)]],
texture2d_array<float, access::write> varianceTexture [[texture(2)]],
const device float *aspect_ratios [[buffer(0)]],
constant PriorBoxMetalParam &param [[buffer(1)]],
const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() ||
gid.y >= outBoxTexture.get_height() ||
gid.z >= outBoxTexture.get_array_size()) return;
float center_x = (gid.x + param.offset) * param.stepWidth;
float center_y = (gid.y + param.offset) * param.stepHeight;
float box_width, box_height;
if (gid.z < param.aspecRatiosSize) {
float ar = aspect_ratios[gid.z];
box_width = param.minSize * sqrt(ar) / 2;
box_height = param.minSize / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(res, gid.xy, gid.z);
} else if (gid.z >= param.aspecRatiosSize) {
if (param.maxSizeSize > 0) {
box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / param.imageWidth;
max_box.y = (center_y - box_height) / param.imageHeight;
max_box.z = (center_x + box_width) / param.imageWidth;
max_box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outBoxTexture.write(max_box, gid.xy, gid.z);
}
}
float4 variance = variances[0];
if (gid.z < param.numPriors) {
float4 variances_output;
variances_output.x = variance.x;
variances_output.y = variance.y;
variances_output.z = variance.z;
variances_output.w = variance.w;
varianceTexture.write(variances_output, gid.xy, gid.z);
}
}
kernel void prior_box_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outBoxTexture [[texture(1)]],
texture2d_array<half, access::write> varianceTexture [[texture(2)]],
const device half *aspect_ratios [[buffer(0)]],
constant PriorBoxMetalParam &param [[buffer(1)]],
const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() ||
gid.y >= outBoxTexture.get_height() ||
gid.z >= outBoxTexture.get_array_size()) return;
float center_x = (gid.x + param.offset) * param.stepWidth;
float center_y = (gid.y + param.offset) * param.stepHeight;
float box_width, box_height;
if (gid.z < param.aspecRatiosSize) {
half ar = aspect_ratios[gid.z];
box_width = param.minSize * sqrt(ar) / 2;
box_height = param.minSize / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(half4(res), gid.xy, gid.z);
} else if (gid.z >= param.aspecRatiosSize) {
if (param.maxSizeSize > 0) {
box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / param.imageWidth;
max_box.y = (center_y - box_height) / param.imageHeight;
max_box.z = (center_x + box_width) / param.imageWidth;
max_box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outBoxTexture.write(half4(max_box), gid.xy, gid.z);
}
}
float4 variance = variances[0];
if (gid.z < param.numPriors) {
float4 variances_output;
variances_output.x = variance.x;
variances_output.y = variance.y;
variances_output.z = variance.z;
variances_output.w = variance.w;
varianceTexture.write(half4(variances_output), gid.xy, gid.z);
}
}
kernel void prior_box_MinMaxAspectRatiosOrder(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outBoxTexture [[texture(1)]],
texture2d_array<float, access::write> varianceTexture [[texture(2)]],
const device float *aspect_ratios [[buffer(0)]],
constant PriorBoxMetalParam &param [[buffer(1)]],
const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() ||
gid.y >= outBoxTexture.get_height() ||
gid.z >= outBoxTexture.get_array_size()) return;
float center_x = (gid.x + param.offset) * param.stepWidth;
float center_y = (gid.y + param.offset) * param.stepHeight;
float box_width, box_height;
if (gid.z == 0) {
box_width = box_height = param.minSize / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(res, gid.xy, gid.z);
}
if (gid.z == 1 && param.maxSizeSize > 0) {
box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / param.imageWidth;
max_box.y = (center_y - box_height) / param.imageHeight;
max_box.z = (center_x + box_width) / param.imageWidth;
max_box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outBoxTexture.write(res, gid.xy, gid.z);
}
int aspect_to = 0;
if (param.maxSizeSize > 0) {
aspect_to = gid.z - 2;
} else {
aspect_to = gid.z - 1;
}
if (aspect_to >= 0 && aspect_to < int(param.aspecRatiosSize)) {
int skip = 0;
for (int i = 0; i < aspect_to + 1; ++i) {
if (fabs(aspect_ratios[i] - 1.) < 1e-6) {
skip += 1;
}
}
aspect_to += skip;
float ar = aspect_ratios[aspect_to];
box_width = param.minSize * sqrt(ar) / 2;
box_height = param.minSize / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(res, gid.xy, gid.z);
}
float4 variance = variances[0];
if (gid.z < param.numPriors) {
float4 variances_output;
variances_output.x = variance.x;
variances_output.y = variance.y;
variances_output.z = variance.z;
variances_output.w = variance.w;
varianceTexture.write(variances_output, gid.xy, gid.z);
}
}
kernel void prior_box_MinMaxAspectRatiosOrder_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outBoxTexture [[texture(1)]],
texture2d_array<half, access::write> varianceTexture [[texture(2)]],
const device half *aspect_ratios [[buffer(0)]],
constant PriorBoxMetalParam &param [[buffer(1)]],
const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() ||
gid.y >= outBoxTexture.get_height() ||
gid.z >= outBoxTexture.get_array_size()) return;
float center_x = (gid.x + param.offset) * param.stepWidth;
float center_y = (gid.y + param.offset) * param.stepHeight;
float box_width, box_height;
if (gid.z == 0) {
box_width = box_height = param.minSize / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(half4(res), gid.xy, gid.z);
}
if (gid.z == 1 && param.maxSizeSize > 0) {
box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / param.imageWidth;
max_box.y = (center_y - box_height) / param.imageHeight;
max_box.z = (center_x + box_width) / param.imageWidth;
max_box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outBoxTexture.write(half4(res), gid.xy, gid.z);
}
int aspect_to = 0;
if (param.maxSizeSize > 0) {
aspect_to = gid.z - 2;
} else {
aspect_to = gid.z - 1;
}
if (aspect_to > 0 && aspect_to < int(param.aspecRatiosSize) && fabs(aspect_ratios[aspect_to] - 1.) > 1e-6) {
float ar = aspect_ratios[aspect_to];
box_width = param.minSize * sqrt(ar) / 2;
box_height = param.minSize / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(half4(res), gid.xy, gid.z);
}
float4 variance = variances[0];
if (gid.z < param.numPriors) {
float4 variances_output;
variances_output.x = variance.x;
variances_output.y = variance.y;
variances_output.z = variance.z;
variances_output.w = variance.w;
varianceTexture.write(half4(variances_output), gid.xy, gid.z);
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void relu_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
const half4 input = inTexture.read(gid.xy, gid.z);
const float4 relu = fmax((float4)input, 0.0);
outTexture.write(half4(relu), gid.xy, gid.z);
}
kernel void relu(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
const float4 input = inTexture.read(gid.xy, gid.z);
const float4 relu = fmax((float4)input, 0.0);
outTexture.write(float4(relu), gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d
#define FUNC(f, r1, r2, p) CONCAT4_(f, r1, r2, p)
#define VECTOR(p, n) CONCAT2(p, n)
#define FUNC_R(f, r) CONCAT2_(f, r)
kernel void FUNC(reshape, RIN, ROUT, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant ReshapeParam &rp [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
int oxyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}, oabcd[4], ixyzn[4], iabcd[4];
ReshapeParam lrp = rp;
int oC = lrp.odim[lrp.otrans[3]];
int iC = lrp.idim[lrp.itrans[3]];
int count = lrp.odim[0] * lrp.odim[1] * lrp.odim[2] * lrp.odim[3];
VECTOR(P, 4) r;
for (int n = 0; n < 4; n++) {
oxyzn[3] = n;
#if ROUT == 4
xyzn2abcd_4(oC, oxyzn, oabcd);
#else
FUNC_R(xyzn2abcd, ROUT)(oxyzn, oabcd);
#endif
int tabcd[4];
invtrans(lrp.otrans, oabcd, tabcd);
int index = abcd2index(lrp.odim, tabcd);
if (index < count) {
index2abcd(lrp.idim, index, tabcd);
trans(lrp.itrans, tabcd, iabcd);
#if RIN == 4
abcd2xyzn_4(iC, iabcd, ixyzn);
#else
FUNC_R(abcd2xyzn, RIN)(iabcd, ixyzn);
#endif
r[n] = inTexture.read(uint2(ixyzn[0], ixyzn[1]), ixyzn[2])[ixyzn[3]];
} else {
r[n] = 0;
}
}
outTexture.write(r, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONRITIONS OF ANY KINR, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct ReshapeParam {
int32_t idim[4];
int32_t itrans[4];
int32_t odim[4];
int32_t otrans[4];
};
#define P float
#define RIN 4
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#define RIN 3
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#define RIN 2
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#define RIN 1
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#undef P
#define P half
#define RIN 4
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#define RIN 3
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#define RIN 2
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#define RIN 1
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#undef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct resize_bilinear_param {
// int32_t out_h;
// int32_t out_w;
float ratio_h;
float ratio_w;
};
kernel void resize_bilinear(texture2d_array<float, access::read> input [[texture(0)]],
texture2d_array<float, access::write> output [[texture(2)]],
constant resize_bilinear_param & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
float4 r;
if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
r = input.read(gid.xy, gid.z);
} else {
float w = gid.x * pm.ratio_w;
float h = gid.y * pm.ratio_h;
uint w0 = w, h0 = h;
uint w1 = w0 + 1, h1 = h0 + 1;
float w1lambda = w - w0, h1lambda = h - h0;
float w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda;
if (w1 >= input.get_width()) w1 = w0;
if (h1 >= input.get_height()) h1 = h0;
float4 r0 = input.read(uint2(w0, h0), gid.z);
float4 r1 = input.read(uint2(w1, h0), gid.z);
float4 r2 = input.read(uint2(w0, h1), gid.z);
float4 r3 = input.read(uint2(w1, h1), gid.z);
r = h2lambda * (w2lambda * r0 + w1lambda * r1) + h1lambda * (w2lambda * r2 + w1lambda * r3);
}
output.write(r, gid.xy, gid.z);
}
kernel void resize_bilinear_half(texture2d_array<half, access::read> input [[texture(0)]],
texture2d_array<half, access::write> output [[texture(2)]],
constant resize_bilinear_param & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
half4 r;
if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
r = input.read(gid.xy, gid.z);
} else {
half w = gid.x * pm.ratio_w;
half h = gid.y * pm.ratio_h;
uint w0 = w, h0 = h;
uint w1 = w0 + 1, h1 = h0 + 1;
half w1lambda = w - w0, h1lambda = h - h0;
half w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda;
if (w1 >= input.get_width()) w1 = w0;
if (h1 >= input.get_height()) h1 = h0;
half4 r0 = input.read(uint2(w0, h0), gid.z);
half4 r1 = input.read(uint2(w1, h0), gid.z);
half4 r2 = input.read(uint2(w0, h1), gid.z);
half4 r3 = input.read(uint2(w1, h1), gid.z);
r = h2lambda * (w2lambda * r0 + w1lambda * r1) + h1lambda * (w2lambda * r2 + w1lambda * r3);
}
output.write(r, gid.xy, gid.z);
output.write(r, gid.xy, gid.z);
}
//
// Scale.metal
// paddle-mobile
//
// Created by liuRuiLong on 2019/1/4.
// Copyright © 2019 orange. All rights reserved.
//
#include <metal_stdlib>
using namespace metal;
kernel void scale(texture2d<float, access::sample> inTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) return;
float w_stride = inTexture.get_width() / outTexture.get_width();
float h_stride = inTexture.get_height() / outTexture.get_height();
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x * w_stride, gid.y * h_stride), 0);
outTexture.write(input, gid);
}
kernel void scale_half(texture2d<float, access::sample> inTexture [[texture(0)]], texture2d<half, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) return;
float w_stride = inTexture.get_width() / outTexture.get_width();
float h_stride = inTexture.get_height() / outTexture.get_height();
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x * w_stride, gid.y * h_stride), 0);
outTexture.write(half4(input), gid);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void shape() {
}
kernel void shape_half() {
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define FUNC(f, p) CONCAT2_(f, p)
#define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC(softmax, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant SoftmaxParam &sp [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
// int zsize = inTexture.get_array_size();
P maxv = inTexture.read(uint2(0, gid.y), 0)[0];
int group = sp.K / 4;
int remain = sp.K % 4;
for (int x = 0; x < group; x++) {
VECTOR(P, 4) r = inTexture.read(uint2(x, gid.y), 0);
maxv = max(maxv, max(r[0], max(r[1], max(r[2], r[3]))));
}
if (remain > 0) {
VECTOR(P, 4) r = inTexture.read(uint2(group, gid.y), 0);
for (int i = 0; i < remain; i++) {
maxv = max(maxv, r[i]);
}
}
VECTOR(P, 4) rsum = {0, 0, 0, 0};
for (int x = 0; x < group; x++) {
VECTOR(P, 4) r = inTexture.read(uint2(x, gid.y), 0);
rsum += exp(r - maxv);
}
P sum = rsum[0] + rsum[1] + rsum[2] + rsum[3];
if (remain > 0) {
VECTOR(P, 4) r = inTexture.read(uint2(group, gid.y), 0);
for (int i = 0; i < remain; i++) {
sum += exp(r[i] - maxv);
}
}
VECTOR(P, 4) rr = inTexture.read(gid.xy, gid.z);
rr = exp(rr - maxv) / sum;
outTexture.write(rr, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct SoftmaxParam {
int N;
int K;
};
#define P float
#include "Softmax.inc.metal"
#undef P
#define P half
#include "Softmax.inc.metal"
#undef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d
#define CONCAT5_(a, b, c, d, e) a ## _ ## b ## _ ## c ## _ ## d ## _ ## e
#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p)
#define VECTOR(p, n) CONCAT2(p, n)
#define FUNC_R(f, r) CONCAT2_(f, r)
#if V == VX
#define VV x
#elif V == VY
#define VV y
#elif V == VZ
#define VV z
#else
#define VV normal
#endif
#if V == VY
kernel void FUNC(split, R, N, VV, P)(texture2d_array<P, access::read> input [[texture(0)]],
texture2d_array<P, access::write> out1 [[texture(1)]],
texture2d_array<P, access::write> out2 [[texture(2)]],
#if N >= 3
texture2d_array<P, access::write> out3 [[texture(3)]],
#endif // N >= 3
#if N >= 4
texture2d_array<P, access::write> out4 [[texture(4)]],
#endif // N >= 4
constant SplitParam &sp [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) r = input.read(gid.xy, gid.z);
int y = gid.y - sp.offset;
if (y < sp.vdim[0]) {
out1.write(r, gid.xy, gid.z);
return;
}
y -= sp.vdim[0];
if (y < sp.vdim[1]) {
out2.write(r, uint2(gid.x, y), gid.z);
return;
}
#if N >= 3
y -= sp.vdim[1];
if (y < sp.vdim[2]) {
out3.write(r, uint2(gid.x, y), gid.z);
return;
}
#endif // N >= 3
#if N >= 4
y -= sp.vdim[2];
if (y < sp.vdim[3]) {
out4.write(r, uint2(gid.x, y), gid.z);
return;
}
#endif // N >= 4
}
#endif // V == VY
#if V == VX
kernel void FUNC(split, R, N, VV, P)(texture2d_array<P, access::read> input [[texture(0)]],
texture2d_array<P, access::write> out1 [[texture(1)]],
texture2d_array<P, access::write> out2 [[texture(2)]],
#if N >= 3
texture2d_array<P, access::write> out3 [[texture(3)]],
#endif // N >= 3
#if N >= 4
texture2d_array<P, access::write> out4 [[texture(4)]],
#endif // N >= 4
constant SplitParam &sp [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) r = input.read(gid.xy, gid.z);
int x = gid.x;
if (x < sp.vdim[0]) {
out1.write(r, gid.xy, gid.z);
return;
}
x -= sp.vdim[0];
if (x < sp.vdim[1]) {
out2.write(r, uint2(x, gid.y), gid.z);
return;
}
#if N >= 3
x -= sp.vdim[1];
if (x < sp.vdim[2]) {
out3.write(r, uint2(x, gid.y), gid.z);
return;
}
#endif // N >= 3
#if N >= 4
x -= sp.vdim[2];
if (x < sp.vdim[3]) {
out4.write(r, uint2(x, gid.y), gid.z);
return;
}
#endif // N >= 4
}
#endif // V == VX
#undef VV
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct SplitParam {
int32_t idim[4];
int32_t axis;
int32_t offset;
int32_t trans[4];
int32_t vdim[4];
};
#define VNORMAL 1
#define VX 2
#define VY 3
#define VZ 4
// only support split_{2, 3, 4}_{2, 3, 4}_y_{float, half}
// only support split_{3, 4}_{2, 3, 4}_x_{float, half}
//// ssd-ar: (R=3, N=2, V=y)
#define V VY
#define R 3
#define N 2
#define P float
#include "Split.inc.metal"
#undef P
#define P half
#include "Split.inc.metal"
#undef P
#undef N
#undef R
#undef V
//// ssd-ar: (R=2, N=2, V=y)
#define V VY
#define R 2
#define N 2
#define P float
#include "Split.inc.metal"
#undef P
#define P half
#include "Split.inc.metal"
#undef P
#undef N
#undef R
#undef V
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
#define FUNC(f, r, p) CONCAT3_(f, r, p)
#define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC(transpose, R, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant TransposeParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) r;
int oxyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0};
int iabcd[4], oabcd[4], ixyzn[4];
for (int n = 0; n < 4; n++) {
oxyzn[3] = n;
#if R == 4
xyzn2abcd_4(pm.oC, oxyzn, iabcd);
#endif // R == 4
#if R == 3
xyzn2abcd_3(oxyzn, oabcd);
#endif // R == 3
#if R == 2
xyzn2abcd_2(oxyzn, oabcd);
#endif // R == 2
iabcd[pm.axis[0]] = oabcd[0];
iabcd[pm.axis[1]] = oabcd[1];
iabcd[pm.axis[2]] = oabcd[2];
iabcd[pm.axis[3]] = oabcd[3];
#if R == 4
abcd2xyzn_4(pm.iC, iabcd, ixyzn);
#endif // R == 4
#if R == 3
abcd2xyzn_3(iabcd, ixyzn);
#endif // R == 3
#if R == 2
abcd2xyzn_2(iabcd, ixyzn);
#endif // R == 2
r[n] = inTexture.read(uint2(ixyzn[0], ixyzn[1]), ixyzn[2])[ixyzn[3]];
}
outTexture.write(r, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct TransposeParam {
int iC;
int oC;
int axis[4];
};
kernel void transpose_copy_float(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant TransposeParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
outTexture.write(inTexture.read(gid.xy, gid.z), gid.xy, gid.z);
}
kernel void transpose_copy_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant TransposeParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
outTexture.write(inTexture.read(gid.xy, gid.z), gid.xy, gid.z);
}
#define R 4
#define P float
#include "TransposeKernel.inc.metal"
#undef P
#define P half
#include "TransposeKernel.inc.metal"
#undef P
#undef R
#define R 3
#define P float
#include "TransposeKernel.inc.metal"
#undef P
#define P half
#include "TransposeKernel.inc.metal"
#undef P
#undef R
#define R 2
#define P float
#include "TransposeKernel.inc.metal"
#undef P
#define P half
#include "TransposeKernel.inc.metal"
#undef P
#undef R
......@@ -22,24 +22,27 @@ class MulticlassNMSParam<P: PrecisionType>: OpParam {
bboxes = try MulticlassNMSParam.getFirstTensor(key: "BBoxes", map: opDesc.inputs, from: inScope)
output = try MulticlassNMSParam.outputOut(outputs: opDesc.outputs, from: inScope)
middleOutput = FetchHolder.init(inCapacity: scores.tensorDim.numel(), inDim: scores.tensorDim.dims)
middleOutput = FetchHolder.init(inPaddedCapacity: scores.tensorDim.numel(), inDim: scores.tensorDim)
bboxOutput = FetchHolder.init(inCapacity: bboxes.tensorDim.numel(), inDim: bboxes.tensorDim.dims)
bboxOutput = FetchHolder.init(inPaddedCapacity: bboxes.tensorDim.numel(), inDim: bboxes.tensorDim)
} catch let error {
throw error
}
}
var bboxOutput: FetchHolder
var middleOutput: FetchHolder
let scores: Texture<P>
let bboxes: Texture<P>
var output: Texture<P>
let scores: Texture
let bboxes: Texture
var output: Texture
}
class MulticlassNMSOp<P: PrecisionType>: Operator<MulticlassNMSKernel<P>, MulticlassNMSParam<P>>, Runable, Creator, InferShaperable{
func inputVariant() -> [String : [Variant]] {
return ["Scores" : [para.middleOutput], "BBoxes" : [para.bboxOutput]]
func inputVariant() -> [String : [MTLBuffer]] {
guard let scoreBuffer = para.middleOutput.resultBuffer, let bboxBuffer = para.middleOutput.resultBuffer else {
fatalError()
}
return ["Scores" : [scoreBuffer], "BBoxes" : [bboxBuffer]]
}
func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer) {
......
......@@ -32,8 +32,8 @@ class PoolParam<P: PrecisionType>: OpParam {
}
// let buffer = input.metalTexture.buffer.contents().assumingMemoryBound(to: P.self)
}
let input: Texture<P>
var output: Texture<P>
let input: Texture
var output: Texture
var ksize: [Int32]
var stride: [Int32]
var padding: [Int32]
......
......@@ -28,8 +28,8 @@ class PreluParam<P: PrecisionType>: OpParam {
}
let mode: String
let alpha: Tensor<P>
let input: Texture<P>
var output: Texture<P>
let input: Texture
var output: Texture
}
class PreluOp<P: PrecisionType>: Operator<PreluKernel<P>, PreluParam<P>>, Runable, Creator, InferShaperable{
......
......@@ -53,10 +53,10 @@ class PriorBoxParam<P: PrecisionType>: OpParam {
var stepH: Float32
let offset: Float32
let input: Texture<P>
let inputImage: Texture<P>
var output: Texture<P>
let outputVariances: Texture<P>
let input: Texture
let inputImage: Texture
var output: Texture
let outputVariances: Texture
}
class PriorBoxOp<P: PrecisionType>: Operator<PriorBoxKernel<P>, PriorBoxParam<P>>, Runable, Creator, InferShaperable{
......
......@@ -25,8 +25,8 @@ class ReluParam<P: PrecisionType>: OpParam {
throw error
}
}
let input: Texture<P>
var output: Texture<P>
let input: Texture
var output: Texture
}
class ReluOp<P: PrecisionType>: Operator<ReluKernel<P>, ReluParam<P>>, Runable, Creator, InferShaperable{
......@@ -47,10 +47,11 @@ class ReluOp<P: PrecisionType>: Operator<ReluKernel<P>, ReluParam<P>>, Runable,
func delogOutput() {
print(" \(type) output: ")
print(para.output.metalTexture)
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
let device = para.output.metalTexture!.device
let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
print(outputArray.strideArray())
// let device = para.output.metalTexture!.device
// let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
// print(outputArray.strideArray())
}
}
......
......@@ -48,9 +48,9 @@ class ReshapeParam<P: PrecisionType>: OpParam {
throw error
}
}
let input: Texture<P>
let input: Texture
let shape: [Int32]
var output: Texture<P>
var output: Texture
}
class ReshapeOp<P: PrecisionType>: Operator<ReshapeKernel<P>, ReshapeParam<P>>, Runable, Creator, InferShaperable{
......
......@@ -29,8 +29,8 @@ class ResizeBilinearParam<P: PrecisionType>: OpParam {
throw error
}
}
let input: Texture<P>
var output: Texture<P>
let input: Texture
var output: Texture
let out_h: Int32
let out_w: Int32
}
......
......@@ -24,8 +24,8 @@ class ShapeParam<P: PrecisionType>: OpParam {
throw error
}
}
var output: Texture<P>
let input: Texture<P>
var output: Texture
let input: Texture
}
class ShapeOp<P: PrecisionType>: Operator<ShapeKernel<P>, ShapeParam<P>>, Runable, Creator, InferShaperable{
......
......@@ -32,8 +32,8 @@ class SoftmaxParam<P: PrecisionType>: OpParam {
throw error
}
}
let input: Texture<P>
var output: Texture<P>
let input: Texture
var output: Texture
}
class SoftmaxOp<P: PrecisionType>: Operator<SoftmaxKernel<P>, SoftmaxParam<P>>, Runable, Creator, InferShaperable{
......
......@@ -19,7 +19,7 @@ class SplitParam<P: PrecisionType>: OpParam {
required init(opDesc: OpDesc, inScope: Scope) throws {
do {
input = try SplitParam.inputX(inputs: opDesc.inputs, from: inScope)
output = Texture<P>.init(device: input.metalTexture!.device, inDim: input.dim)
output = Texture.init(device: input.metalTexture!.device, inDim: input.dim)
axis = try SplitParam.getAttr(key: "axis", attrs: opDesc.attrs)
sections = try SplitParam.getAttr(key: "sections", attrs: opDesc.attrs)
if axis < 0 {
......@@ -29,7 +29,7 @@ class SplitParam<P: PrecisionType>: OpParam {
fatalError()
}
for out in outlist {
guard let variant = inScope[out], let v = variant as? Texture<P> else {
guard let variant = inScope[out], let v = variant as? Texture else {
fatalError()
}
outputList.append(v)
......@@ -41,9 +41,9 @@ class SplitParam<P: PrecisionType>: OpParam {
}
var axis: Int
let input: Texture<P>
var output: Texture<P>
var outputList: [Texture<P>] = []
let input: Texture
var output: Texture
var outputList: [Texture] = []
var sections: [Int32] = []
}
......
......@@ -26,8 +26,8 @@ class TransposeParam<P: PrecisionType>: OpParam {
throw error
}
}
let input: Texture<P>
var output: Texture<P>
let input: Texture
var output: Texture
let axis: [Int32]
}
......
......@@ -14,10 +14,10 @@
import Foundation
class BlockDesc {
public class BlockDesc {
let index: Int
let parentIndex: Int
let vars: [VarDesc]
public let vars: [VarDesc]
let ops: [OpDesc]
init(block: PaddleMobile_Framework_Proto_BlockDesc) {
index = Int(block.idx)
......@@ -45,7 +45,7 @@ class BlockDesc {
}
extension BlockDesc: CustomStringConvertible, CustomDebugStringConvertible {
var description: String {
public var description: String {
var str = ""
for i in 0..<ops.count {
......@@ -61,9 +61,7 @@ extension BlockDesc: CustomStringConvertible, CustomDebugStringConvertible {
return str
}
var debugDescription: String {
public var debugDescription: String {
return description
}
}
......@@ -14,10 +14,10 @@
import Foundation
public class Program {
let paramPath: String
let programDesc: ProgramDesc
let scope: Scope
@objc public class Program: NSObject {
public let paramPath: String
public let programDesc: ProgramDesc
public let scope: Scope
init(inProgramDesc: ProgramDesc, inParamPath: String, inScope: Scope) {
programDesc = inProgramDesc
paramPath = inParamPath
......
......@@ -15,7 +15,7 @@
import Foundation
public class ProgramDesc {
var blocks: [BlockDesc] = []
public var blocks: [BlockDesc] = []
init(protoProgram: PaddleMobile_Framework_Proto_ProgramDesc) {
for block in protoProgram.blocks {
self.blocks.append(BlockDesc.init(block: block))
......
......@@ -14,7 +14,7 @@
import Foundation
class Scope {
public class Scope {
let feedKey: String
let fetchKey: String
func setInput(input: Variant) {
......@@ -29,7 +29,7 @@ class Scope {
return vars[feedKey];
}
func output() -> Variant? {
public func output() -> Variant? {
return vars[fetchKey];
}
......@@ -38,7 +38,7 @@ class Scope {
fetchKey = inFetchKey
}
var vars: [String : Variant] = [:]
public var vars: [String : Variant] = [:]
subscript(key: String) -> Variant?{
get {
return vars[key]
......
......@@ -14,7 +14,7 @@
import Foundation
enum VarTypeType: Int {
public enum VarTypeType: Int {
case ErrorType = -1,
Bool = 0,
Int16 = 1,
......@@ -56,10 +56,10 @@ enum VarTypeType: Int {
}
}
class VarDesc {
let name: String
let persistable: Bool
let type: VarTypeType
public class VarDesc {
public let name: String
public let persistable: Bool
public let type: VarTypeType
let tensorDesc: TensorDesc?
init(protoVarDesc: PaddleMobile_Framework_Proto_VarDesc) {
type = VarTypeType.init(rawValue: protoVarDesc.type.type.rawValue) ?? .ErrorType
......@@ -79,7 +79,7 @@ class VarDesc {
}
extension VarDesc: CustomStringConvertible, CustomDebugStringConvertible {
var description: String {
public var description: String {
var str = ""
str += "var name \(name): \n"
if let inTensorDesc = tensorDesc {
......@@ -93,7 +93,7 @@ extension VarDesc: CustomStringConvertible, CustomDebugStringConvertible {
return str
}
var debugDescription: String {
public var debugDescription: String {
return description
}
}
//
// YoloNet.swift
// paddle-mobile
//
// Created by Xiao,Haichun on 2018/12/5.
// Copyright © 2018 orange. All rights reserved.
//
import Foundation
import Metal
public class YoloNet: Net {
@objc public override init(device: MTLDevice) {
super.init(device: device)
means = [0, 0, 0]
scale = 1
except = 0
modelPath = Bundle.main.path(forResource: "yolo_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "yolo_params", ofType: nil) ?! "para null"
modelDir = ""
//preprocessKernel = GenetPreProccess.init(device: device)
dim = (n: 1, h: 224, w: 224, c: 3)
}
@objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize)
means = [0, 0, 0]
scale = 1
except = 0
modelPath = ""
paramPath = ""
modelDir = ""
//preprocessKernel = GenetPreProccess.init(device: device)
dim = (n: 1, h: 224, w: 224, c: 3)
}
// class GenetPreProccess: CusomKernel {
// init(device: MTLDevice) {
// let s = CusomKernel.Shape.init(inWidth: 128, inHeight: 128, inChannel: 3)
// super.init(device: device, inFunctionName: "genet_preprocess", outputDim: s, usePaddleMobileLib: false)
// }
// }
override public func resultStr(res: ResultHolder) -> String {
// fatalError()
return " \(res.result![0]) ... "
}
}
......@@ -14,9 +14,6 @@
#pragma once
#import "PaddleMobileCPU.h"
#import "CPUCompute.h"
#import "PaddleMobileGPU.h"
#import <UIKit/UIKit.h>
//! Project version number for paddle_mobile.
......
......@@ -110,6 +110,10 @@ enum PoolingType {
FIRST = 3,
};
struct PaddleMobileConfigInternal {
bool load_when_predict = false;
};
extern const char *G_OP_TYPE_CONV;
extern const char *G_OP_TYPE_BATCHNORM;
extern const char *G_OP_TYPE_BOX_CODER;
......
......@@ -38,12 +38,15 @@ namespace framework {
#pragma mark - executor
template <typename Device, typename T>
Executor<Device, T>::Executor(const Program<Device> &program, int batch_size,
const bool use_optimize, const bool lod_mode)
Executor<Device, T>::Executor(const Program<Device> &program,
paddle_mobile::PaddleMobileConfigInternal config,
int batch_size, const bool use_optimize,
const bool lod_mode)
: program_(program),
batch_size_(batch_size),
use_optimize_(use_optimize),
lod_mode_(lod_mode) {
lod_mode_(lod_mode),
config_(config) {
DLOG << "executor in lod mode: " << lod_mode_;
Variable *variable_ptr = program_.scope->Var("batch_size");
......@@ -212,10 +215,17 @@ void Executor<Device, T>::InitCombineMemory() {
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue;
}
DLOG << " init combine memory persistable: " << var_desc->Name();
LoadMemory(reinterpret_cast<void **>(&data), var_desc, tensor);
} else {
if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
DLOG << " init combine memory no persistable in lod: "
<< var_desc->Name();
varInputMemory(var_desc, var, tensor);
} else {
DLOG << " init combine memory no persistable: " << var_desc->Name();
}
}
}
......@@ -226,6 +236,35 @@ void Executor<Device, T>::InitCombineMemory() {
LOG(kLOG_INFO) << "init combine memory finish";
}
template <typename Device, typename T>
void Executor<Device, T>::InitNoPersistableMemory(
const Tensor &input_tensor) {
for (const auto &block : program_desc_->Blocks()) {
for (const auto &var_desc : block->Vars()) {
auto var = program_.scope->Var(var_desc->Name());
auto tensor = var->template GetMutable<LoDTensor>();
if (var_desc->Persistable()) {
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue;
}
} else {
if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
DDim tensor_dim = tensor->dims();
DDim new_dim =
make_ddim({tensor_dim[0], tensor_dim[1], input_tensor.dims()[2],
input_tensor.dims()[3]});
tensor->Resize(new_dim);
tensor->template mutable_data<T>();
}
}
}
}
std::shared_ptr<LoDTensor> output = GetOutput("fetch");
output->Resize(input_tensor.dims());
output->mutable_data<T>();
}
template <typename Device, typename T>
bool Executor<Device, T>::varInputMemory(
const std::shared_ptr<VarDesc> &var_desc, Variable *var,
......@@ -297,7 +336,16 @@ void Executor<Device, T>::SetInput(const Tensor &input,
auto *target_var = program_.scope->FindVar(var_name);
PADDLE_MOBILE_ENFORCE(target_var != nullptr, "Variable %s is not exist",
var_name.c_str());
auto *target_tensor = target_var->template GetMutable<LoDTensor>();
if (config_.load_when_predict) {
if (input_dim_last_ != input.dims()) {
InitNoPersistableMemory(input);
input_dim_last_ = input.dims();
}
}
target_tensor->Resize(input.dims());
target_tensor->ShareDataWith(input);
}
......@@ -309,6 +357,14 @@ void Executor<Device, T>::SetInput(const LoDTensor &input,
PADDLE_MOBILE_ENFORCE(target_var != nullptr, "Variable %s is not exist",
var_name.c_str());
auto *target_tensor = target_var->template GetMutable<LoDTensor>();
if (config_.load_when_predict) {
if (input_dim_last_ != input.dims()) {
InitNoPersistableMemory(*target_tensor);
input_dim_last_ = input.dims();
}
}
target_tensor->Resize(input.dims());
target_tensor->ShareDataWith(input);
target_tensor->set_lod(input.lod());
......@@ -453,6 +509,70 @@ void Executor<Device, T>::Predict_To(int end) {
#endif
#ifdef PADDLE_MOBILE_CL
template <>
void Executor<GPU_CL, float>::InitNoPersistableMemory(
const Tensor &input_tensor) {
DLOG << "CL InitNoPersistableMemory ";
for (const auto &block : program_desc_->Blocks()) {
for (const auto &var_desc : block->Vars()) {
auto var = program_.scope->Var(var_desc->Name());
auto cl_image = var->template GetMutable<CLImage>();
if (var_desc->Persistable()) {
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue;
}
} else {
if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
cl_context context = program_.scope->GetCLScpoe()->Context();
cl_command_queue command_queue =
program_.scope->GetCLScpoe()->CommandQueue();
DDim tensor_dim = cl_image->dims();
DDim new_dim =
make_ddim({tensor_dim[0], tensor_dim[1], input_tensor.dims()[2],
input_tensor.dims()[3]});
cl_image->Resize(new_dim);
cl_image->InitEmptyImage(context, command_queue, new_dim);
}
}
}
}
std::shared_ptr<LoDTensor> output = GetOutput("fetch");
output->Resize(input_tensor.dims());
output->mutable_data<float>();
}
template <>
void Executor<GPU_CL, float>::SetInput(const Tensor &input,
const std::string &var_name) {
auto *target_var = program_.scope->FindVar(var_name);
PADDLE_MOBILE_ENFORCE(target_var != nullptr, "Variable %s is not exist",
var_name.c_str());
auto *target_tensor = target_var->template GetMutable<LoDTensor>();
DLOG << "config_.load_when_predict " << config_.load_when_predict;
DLOG << "target_tensor->IsInitialized() " << target_tensor->IsInitialized();
DLOG << "target_tensor->dims() " << target_tensor->dims();
DLOG << "input.dims() " << input.dims();
DLOG << "input_dim_last_ " << input_dim_last_;
if (config_.load_when_predict) {
if (input_dim_last_ != input.dims()) {
DLOG << "SetInput ---- > resize1";
target_tensor->Resize(input.dims());
target_tensor->mutable_data<float>();
InitNoPersistableMemory(*target_tensor);
}
} else {
DLOG << "SetInput ---- > resize2";
target_tensor->Resize(input.dims());
DLOG << "SetInput ---- > ShareDataWith";
}
target_tensor->ShareDataWith(input);
auto &dim = input.dims();
input_dim_last_ = static_cast<DDim>(dim);
}
template <typename Device, typename T>
void Executor<Device, T>::LoadMemory(const VarDesc var_desc, float *tensorInput,
char **data) {}
......@@ -588,6 +708,8 @@ void Executor<GPU_CL, float>::InitMemory() {
template <>
void Executor<GPU_CL, float>::InitCombineMemory() {
DLOG << "CL InitCombineMemory---- "
<< "config_.load_when_predict: " << config_.load_when_predict;
char *origin_data = nullptr;
bool self_alloc = false;
if (program_.combined_params_buf && program_.combined_params_len) {
......
......@@ -32,7 +32,8 @@ namespace framework {
template <typename Device, typename T = float>
class Executor {
public:
Executor(const Program<Device> &program, int batch_size = 1,
Executor(const Program<Device> &program,
paddle_mobile::PaddleMobileConfigInternal config, int batch_size = 1,
const bool use_optimize = true, const bool lod_mode = false);
PMStatus Predict(const std::vector<std::pair<std::string, Tensor>> &inputs);
......@@ -64,6 +65,7 @@ class Executor {
LoDTensor *tensor) const;
void InitMemory();
void InitCombineMemory();
void InitNoPersistableMemory(const Tensor &input_tensor);
void LoadMemory(void **data, const std::shared_ptr<VarDesc> var_desc,
LoDTensor *tensor);
#ifdef PADDLE_MOBILE_CL
......@@ -73,14 +75,17 @@ class Executor {
int batch_size_;
bool use_optimize_;
bool lod_mode_;
PaddleMobileConfigInternal config_ = PaddleMobileConfigInternal();
Program<Device> program_;
std::shared_ptr<ProgramDesc> program_desc_;
typedef std::shared_ptr<OperatorBase<Device>> OperatorBasePtr;
std::vector<std::vector<OperatorBasePtr>> ops_of_block_;
// operators list
std::vector<OperatorBasePtr> ops_list_;
// for super resoltion
DDim input_dim_last_;
#ifdef PADDLE_MOBILE_PROFILE
struct ProfInfo {
int tid = 0;
......
......@@ -105,8 +105,7 @@ bool PaddleMobile<Device, T>::LoadCombinedMemory(
executor_ = std::make_shared<framework::Executor<Device, T>>(
loader_->LoadCombinedMemory(model_len, model_buf, combined_params_len,
combined_params_buf, optimize,
quantification),
batch_size, optimize, lod_mode);
quantification), config_, batch_size, optimize, lod_mode);
} else {
LOG(kLOG_INFO) << "executor inited";
}
......
......@@ -33,6 +33,13 @@ namespace paddle_mobile {
template <typename Device, typename T = float>
class PaddleMobile {
public:
PaddleMobile(PaddleMobileConfigInternal config) : config_(config) {
#ifndef PADDLE_MOBILE_CL
bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Device>::value;
PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on");
#endif
}
PaddleMobile() {
#ifndef PADDLE_MOBILE_CL
bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Device>::value;
......@@ -99,6 +106,7 @@ class PaddleMobile {
private:
std::shared_ptr<framework::Loader<Device, T>> loader_;
std::shared_ptr<framework::Executor<Device, T>> executor_;
PaddleMobileConfigInternal config_;
};
} // namespace paddle_mobile
......@@ -270,7 +270,6 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
if (if_bias) {
bias_data = bias->data<float>();
}
float32x4_t zero = vdupq_n_f32(0.0);
for (int b = 0; b < batch_size; ++b) {
......
......@@ -1188,6 +1188,10 @@ void Gemm::WriteWithBnAddRelu(int mc, int nc, float *c, float *C, int ldc,
}
}
void Gemm::VectorKernel(int m, int n, int k, float alpha, const float *A,
int lda, const float *B, int ldb, float beta, float *C,
int ldc, bool relu) {}
#else
void Gemm::AddDot4x4(int k, const float *a, const float *b, float *c, int ldc) {
......
......@@ -23,10 +23,11 @@ int main() {
// ../../../test/models/googlenet
// ../../../test/models/mobilenet
// auto program = loader.Load(g_mobilenet_ssd, true);
std::string g_super = "../models/superresoltion";
// auto program = loader.Load(g_super, true);
// auto program = loader.Load(std::string(g_ocr) + "/model",
// std::string(g_ocr) + "/params", false);
auto program = loader.Load(std::string(g_super) + "/model",
std::string(g_super) + "/params", false);
// program.originProgram->Description("program desc: ");
return 0;
......
......@@ -18,7 +18,10 @@ limitations under the License. */
#include "../test_include.h"
int main() {
paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile;
paddle_mobile::PaddleMobileConfigInternal config;
config.load_when_predict = true;
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile(config);
// paddle_mobile.SetThreadNum(4);
auto time1 = paddle_mobile::time();
#ifdef PADDLE_MOBILE_CL
......@@ -27,38 +30,111 @@ int main() {
auto isok = paddle_mobile.Load(std::string(g_super) + "/model",
std::string(g_super) + "/params", true, false,
1, true);
1, false);
// auto isok = paddle_mobile.Load(std::string(g_mobilenet_mul), true);
if (isok) {
auto time2 = paddle_mobile::time();
std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms"
<< std::endl;
// 300*300
// std::vector<float> input;
// std::vector<int64_t> dims{1, 1, 300, 300};
// GetInput<float>(g_test_image_1x3x224x224, &input, dims);
//
// std::vector<float> vec_result;
std::vector<float> input;
std::vector<int64_t> dims{1, 1, 300, 300};
GetInput<float>(g_yolo_img, &input, dims);
auto time3 = paddle_mobile::time();
int max = 1;
std::vector<float> vec_result;
// for (int i = 0; i < max; ++i) {
// auto time5 = paddle_mobile::time();
// vec_result = paddle_mobile.Predict(input, dims);
// auto time6 = paddle_mobile::time();
// std::cout << "300 predict cost :第" << i << ": "
// << paddle_mobile::time_diff(time5, time6) << "ms" << std::endl;
// }
// auto time4 = paddle_mobile::time();
//
// std::cout << "300 predict cost :"
// << paddle_mobile::time_diff(time3, time4) / max << "ms"
// << std::endl;
// auto biggest =
// std::max_element(std::begin(vec_result), std::end(vec_result));
// std::cout << "300 Max element is " << *biggest << " at position "
// << std::distance(std::begin(vec_result), biggest) << std::endl;
//
// // 500*500
// std::vector<float> vec_result2;
//
// std::vector<float> input2;
// std::vector<int64_t> dims2{1, 1, 500, 500};
// GetInput<float>(g_test_image_1x3x224x224, &input2, dims2);
//
// time3 = paddle_mobile::time();
// for (int i = 0; i < max; ++i) {
// auto time5 = paddle_mobile::time();
// vec_result2 = paddle_mobile.Predict(input2, dims2);
// auto time6 = paddle_mobile::time();
// std::cout << "500 predict cost :第" << i << ": "
// << paddle_mobile::time_diff(time5, time6) << "ms" << std::endl;
// }
//
// time4 = paddle_mobile::time();
// std::cout << "500 predict cost :"
// << paddle_mobile::time_diff(time3, time4) / max << "ms"
// << std::endl;
// biggest = std::max_element(std::begin(vec_result2), std::end(vec_result2));
// std::cout << "500 Max element is " << *biggest << " at position "
// << std::distance(std::begin(vec_result2), biggest) << std::endl;
//
// // 1000*1000
//
// std::vector<float> vec_result3;
// std::vector<float> input3;
// std::vector<int64_t> dims3{1, 1, 1000, 1000};
// GetInput<float>(g_test_image_1x3x224x224, &input3, dims3);
//
// time3 = paddle_mobile::time();
//
// for (int i = 0; i < max; ++i) {
// auto time5 = paddle_mobile::time();
// vec_result3 = paddle_mobile.Predict(input3, dims3);
// auto time6 = paddle_mobile::time();
// std::cout << "1000*1000 predict cost :第" << i << ": "
// << paddle_mobile::time_diff(time5, time6) << "ms" << std::endl;
// }
// time4 = paddle_mobile::time();
// std::cout << "1000*1000 predict cost :"
// << paddle_mobile::time_diff(time3, time4) / max << "ms"
// << std::endl;
// biggest = std::max_element(std::begin(vec_result3), std::end(vec_result3));
// std::cout << "1000*1000 Max element is " << *biggest << " at position "
// << std::distance(std::begin(vec_result3), biggest) << std::endl;
auto time3 = paddle_mobile::time();
int max = 10;
// 224*224
std::vector<float> vec_result4;
std::vector<float> input4;
std::vector<int64_t> dims4{1, 1, 300, 300};
GetInput<float>(g_test_image_1x3x224x224, &input4, dims4);
time3 = paddle_mobile::time();
for (int i = 0; i < max; ++i) {
vec_result = paddle_mobile.Predict(input, dims);
auto time5 = paddle_mobile::time();
vec_result4 = paddle_mobile.Predict(input4, dims4);
auto time6 = paddle_mobile::time();
std::cout << "224*224 predict cost :第" << i << ": "
<< paddle_mobile::time_diff(time5, time6) << "ms" << std::endl;
}
auto time4 = paddle_mobile::time();
std::cout << "predict cost :"
auto time4 = paddle_mobile::time();
std::cout << "224*224 predict cost :"
<< paddle_mobile::time_diff(time3, time4) / max << "ms"
<< std::endl;
std::vector<float>::iterator biggest =
std::max_element(std::begin(vec_result), std::end(vec_result));
std::cout << " Max element is " << *biggest << " at position "
<< std::distance(std::begin(vec_result), biggest) << std::endl;
// biggest = std::max_element(std::begin(vec_result4), std::end(vec_result4));
// std::cout << "224*224 Max element is " << *biggest << " at position "
// << std::distance(std::begin(vec_result4), biggest) << std::endl;
}
std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
"是否存在?"
<< std::endl;
return 0;
}
......@@ -63,6 +63,7 @@ static const char *g_imgfssd_ar = "../images/test_image_ssd_ar";
static const char *g_imgfssd_ar1 = "../images/003_0001.txt";
static const char *g_img = "../images/img.bin";
static const char *g_yolo_img = "../images/in_put_1_3_416_416_2";
static const char *g_super_img = "../images/mingren_input_data";
static const char *g_mobilenet_img = "../images/image";
using paddle_mobile::framework::DDim;
......
# coding=utf-8
import os
path = "yolo_v2_tofile_source/" # 文件夹目录
to_file_path = "yolo_v2_tofile_combined/params"
path = "mobilenet/" # 文件夹目录
to_file_path = "mobilenet_combine/params"
files = os.listdir(path) # 得到文件夹下的所有文件名称
files.sort(cmp=None, key=str.lower)
to_file = open(to_file_path, "wb")
for file in files: # 遍历文件夹
if not os.path.isdir(file): # 判断是否是文件夹,不是文件夹才打开
if not os.path.isdir(file) and file != ".DS_Store": # 判断是否是文件夹,不是文件夹才打开
f = open(path + "/" + file) # 打开文件
name = f.name
print 'name: ' + name
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册