提交 c5ad3169 编写于 作者: qnqinan's avatar qnqinan

Merge remote-tracking branch 'upstream/develop' into develop

...@@ -96,3 +96,7 @@ metal/paddle-mobile/paddle-mobile/CPU/libpaddle-mobile.a ...@@ -96,3 +96,7 @@ metal/paddle-mobile/paddle-mobile/CPU/libpaddle-mobile.a
metal/paddle-mobile-demo/paddle-mobile-demo/images metal/paddle-mobile-demo/paddle-mobile-demo/images
metal/paddle-mobile-demo/paddle-mobile-demo/models metal/paddle-mobile-demo/paddle-mobile-demo/models
metal/paddle-mobile-demo/paddle-mobile-demo/Resources
metal/paddle-mobile-demo/paddle-mobile-demo/Resources/images
metal/paddle-mobile-demo/paddle-mobile-demo/Resources/models
metal/MobileNetDemo/MobileNetDemo/Resources
cmake_minimum_required(VERSION 3.0.0) cmake_minimum_required(VERSION 3.0.0)
option(USE_OPENMP "openmp support" ON) option(USE_OPENMP "build with openmp support" ON)
option(DEBUGING "enable debug mode" ON) option(USE_EXCEPTION "build with exception" ON)
option(USE_EXCEPTION "use std exception" ON) option(WITH_LOGGING "print logging for debug" ON)
option(SYMBOL_HIDDEN "symbol hidden" OFF) # on when use jni or ios io option(WITH_SYMBOL "build with all symbols" ON) # turn off if use jni or ios io
option(LOG_PROFILE "log profile" OFF) option(WITH_PROFILE "print op profile for debug" OFF)
option(WITH_TEST "build with unit tests" ON)
# select the platform to build # select the platform to build
option(CPU "armv7 with neon" ON) option(CPU "build with arm CPU support" ON)
option(GPU_MALI "mali gpu" OFF) option(GPU_MALI "build with arm mali GPU support" OFF)
option(GPU_CL "opencl gpu" OFF) option(GPU_CL "build with OpenCL support" OFF)
option(FPGA "build with FPGA support" OFF)
option(FPGA "fpga" OFF)
if(FPGA) if(FPGA)
option(FPGAV1 "fpga v1" ON) option(FPGAV1 "build with fpga v1 support" ON)
option(FPGAV2 "fpga v2" OFF) option(FPGAV2 "build with fpga v2 support" OFF)
endif() endif()
project(paddle-mobile) project(paddle-mobile)
...@@ -23,7 +23,6 @@ file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm) ...@@ -23,7 +23,6 @@ file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm)
file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h) file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h)
include_directories(src/) include_directories(src/)
set(CMAKE_BUILD_TYPE Release)
set(CMAKE_CXX_FLAGS "-O3 -s -DNDEBUG ${CMAKE_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "-O3 -s -DNDEBUG ${CMAKE_CXX_FLAGS}")
if(IS_IOS) if(IS_IOS)
set(CMAKE_CXX_FLAGS "-mfpu=neon -marm -fobjc-abi-version=2 -fobjc-arc \ set(CMAKE_CXX_FLAGS "-mfpu=neon -marm -fobjc-abi-version=2 -fobjc-arc \
...@@ -33,13 +32,18 @@ else() ...@@ -33,13 +32,18 @@ else()
set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
endif() endif()
if(DEBUGING) if(USE_OPENMP)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
add_definitions(-DPADDLE_MOBILE_USE_OPENMP)
endif()
if(WITH_LOGGING)
message(STATUS "debugging mode") message(STATUS "debugging mode")
add_definitions(-DPADDLE_MOBILE_DEBUG) add_definitions(-DPADDLE_MOBILE_DEBUG)
else() else()
endif() endif()
if(SYMBOL_HIDDEN) if(NOT WITH_SYMBOL)
add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden) add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden)
endif() endif()
...@@ -50,15 +54,10 @@ else() ...@@ -50,15 +54,10 @@ else()
add_definitions(-fno-exceptions) add_definitions(-fno-exceptions)
endif() endif()
if(LOG_PROFILE) if(WITH_PROFILE)
add_definitions(-DPADDLE_MOBILE_PROFILE) add_definitions(-DPADDLE_MOBILE_PROFILE)
endif() endif()
if(USE_OPENMP)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
add_definitions(-DPADDLE_MOBILE_USE_OPENMP)
endif()
# platform control # platform control
if(ARM_LINUX) if(ARM_LINUX)
include("${CMAKE_CURRENT_LIST_DIR}/tools/arm-platform.cmake") include("${CMAKE_CURRENT_LIST_DIR}/tools/arm-platform.cmake")
...@@ -217,7 +216,6 @@ endif() ...@@ -217,7 +216,6 @@ endif()
set_property(CACHE NET PROPERTY STRINGS "default" "googlenet" "mobilenet" "yolo" "squeezenet" "FPGA_NET_V1" "FPGA_NET_V2" "NLP") set_property(CACHE NET PROPERTY STRINGS "default" "googlenet" "mobilenet" "yolo" "squeezenet" "FPGA_NET_V1" "FPGA_NET_V2" "NLP")
include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake") include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake")
# build library # build library
if(ANDROID_NDK_TOOLCHAIN_INCLUDED) if(ANDROID_NDK_TOOLCHAIN_INCLUDED)
list(REMOVE_DUPLICATES CMAKE_CXX_FLAGS) list(REMOVE_DUPLICATES CMAKE_CXX_FLAGS)
...@@ -239,7 +237,7 @@ else() ...@@ -239,7 +237,7 @@ else()
endif() endif()
# unit test # unit test
if(DEBUGING) if(WITH_TEST AND WITH_SYMBOL)
if(IS_IOS) if(IS_IOS)
else() else()
add_subdirectory(test) add_subdirectory(test)
......
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 50;
objects = {
/* Begin PBXBuildFile section */
FA37E99B9AD29A07FEE8E743 /* Pods_MobileNetDemo.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = DD3A2E3175627EF63DACA36C /* Pods_MobileNetDemo.framework */; };
FC74BB3621DFAFEC0055232B /* MobileNet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC74BB3521DFAFEC0055232B /* MobileNet.swift */; };
FCB40DA221E0B7C60075EC91 /* MobilenetPreProcess.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCB40DA121E0B7C60075EC91 /* MobilenetPreProcess.metal */; };
FCB40DA421E0B85B0075EC91 /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCB40DA321E0B85B0075EC91 /* MetalHelper.swift */; };
FCB40DE921E0B9410075EC91 /* banana.jpeg in Resources */ = {isa = PBXBuildFile; fileRef = FCB40DD221E0B9410075EC91 /* banana.jpeg */; };
FCB40E5121E0CEBB0075EC91 /* mobilenet_model in Resources */ = {isa = PBXBuildFile; fileRef = FCB40E4F21E0CEBB0075EC91 /* mobilenet_model */; };
FCB40E5221E0CEBB0075EC91 /* mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FCB40E5021E0CEBB0075EC91 /* mobilenet_params */; };
FCB40E5421E0CEF80075EC91 /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FCB40E5321E0CEF80075EC91 /* synset.txt */; };
FCD3873821E1C31F0052F3D0 /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCD3873721E1C31F0052F3D0 /* paddle_mobile.framework */; };
FCD3873921E1C31F0052F3D0 /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCD3873721E1C31F0052F3D0 /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
FCF2870921DFAEC7009A87DA /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF2870821DFAEC7009A87DA /* AppDelegate.swift */; };
FCF2870B21DFAEC7009A87DA /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF2870A21DFAEC7009A87DA /* ViewController.swift */; };
FCF2870E21DFAEC7009A87DA /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FCF2870C21DFAEC7009A87DA /* Main.storyboard */; };
FCF2871021DFAEC8009A87DA /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = FCF2870F21DFAEC8009A87DA /* Assets.xcassets */; };
FCF2871321DFAEC8009A87DA /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FCF2871121DFAEC8009A87DA /* LaunchScreen.storyboard */; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
FCB40DFC21E0BC360075EC91 /* Embed Frameworks */ = {
isa = PBXCopyFilesBuildPhase;
buildActionMask = 2147483647;
dstPath = "";
dstSubfolderSpec = 10;
files = (
FCD3873921E1C31F0052F3D0 /* paddle_mobile.framework in Embed Frameworks */,
);
name = "Embed Frameworks";
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */
4FE67FF667A24FCB0134F627 /* Pods-MobileNetDemo.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-MobileNetDemo.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-MobileNetDemo/Pods-MobileNetDemo.debug.xcconfig"; sourceTree = "<group>"; };
DD3A2E3175627EF63DACA36C /* Pods_MobileNetDemo.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_MobileNetDemo.framework; sourceTree = BUILT_PRODUCTS_DIR; };
E57059FE3629E3A8DE6C7ECF /* Pods-MobileNetDemo.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-MobileNetDemo.release.xcconfig"; path = "../Pods/Target Support Files/Pods-MobileNetDemo/Pods-MobileNetDemo.release.xcconfig"; sourceTree = "<group>"; };
FC74BB3521DFAFEC0055232B /* MobileNet.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MobileNet.swift; sourceTree = "<group>"; };
FCB40DA121E0B7C60075EC91 /* MobilenetPreProcess.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = MobilenetPreProcess.metal; sourceTree = "<group>"; };
FCB40DA321E0B85B0075EC91 /* MetalHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; name = MetalHelper.swift; path = "../../paddle-mobile-demo/paddle-mobile-demo/MetalHelper.swift"; sourceTree = "<group>"; };
FCB40DD221E0B9410075EC91 /* banana.jpeg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = banana.jpeg; sourceTree = "<group>"; };
FCB40E4F21E0CEBB0075EC91 /* mobilenet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_model; sourceTree = "<group>"; };
FCB40E5021E0CEBB0075EC91 /* mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_params; sourceTree = "<group>"; };
FCB40E5321E0CEF80075EC91 /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = "<group>"; };
FCD3873721E1C31F0052F3D0 /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
FCF2870521DFAEC7009A87DA /* MobileNetDemo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = MobileNetDemo.app; sourceTree = BUILT_PRODUCTS_DIR; };
FCF2870821DFAEC7009A87DA /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
FCF2870A21DFAEC7009A87DA /* ViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = "<group>"; };
FCF2870D21DFAEC7009A87DA /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
FCF2870F21DFAEC8009A87DA /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = "<group>"; };
FCF2871221DFAEC8009A87DA /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
FCF2871421DFAEC8009A87DA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
FCF2870221DFAEC7009A87DA /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
FCD3873821E1C31F0052F3D0 /* paddle_mobile.framework in Frameworks */,
FA37E99B9AD29A07FEE8E743 /* Pods_MobileNetDemo.framework in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
0DDBA47E92A64BC7B0385B0F /* Frameworks */ = {
isa = PBXGroup;
children = (
DD3A2E3175627EF63DACA36C /* Pods_MobileNetDemo.framework */,
);
name = Frameworks;
sourceTree = "<group>";
};
1EACBAAF38D9EDE0AC2B3F90 /* Pods */ = {
isa = PBXGroup;
children = (
4FE67FF667A24FCB0134F627 /* Pods-MobileNetDemo.debug.xcconfig */,
E57059FE3629E3A8DE6C7ECF /* Pods-MobileNetDemo.release.xcconfig */,
);
name = Pods;
sourceTree = "<group>";
};
FCB40DCF21E0B9410075EC91 /* Resources */ = {
isa = PBXGroup;
children = (
FCB40DD021E0B9410075EC91 /* images */,
FCB40DD921E0B9410075EC91 /* models */,
);
path = Resources;
sourceTree = "<group>";
};
FCB40DD021E0B9410075EC91 /* images */ = {
isa = PBXGroup;
children = (
FCB40DD221E0B9410075EC91 /* banana.jpeg */,
);
path = images;
sourceTree = "<group>";
};
FCB40DD921E0B9410075EC91 /* models */ = {
isa = PBXGroup;
children = (
FCB40E4E21E0CEBB0075EC91 /* mobilenet_combine */,
);
path = models;
sourceTree = "<group>";
};
FCB40E4E21E0CEBB0075EC91 /* mobilenet_combine */ = {
isa = PBXGroup;
children = (
FCB40E5321E0CEF80075EC91 /* synset.txt */,
FCB40E4F21E0CEBB0075EC91 /* mobilenet_model */,
FCB40E5021E0CEBB0075EC91 /* mobilenet_params */,
);
path = mobilenet_combine;
sourceTree = "<group>";
};
FCF286FC21DFAEC7009A87DA = {
isa = PBXGroup;
children = (
FCD3873721E1C31F0052F3D0 /* paddle_mobile.framework */,
FCF2870721DFAEC7009A87DA /* MobileNetDemo */,
FCF2870621DFAEC7009A87DA /* Products */,
1EACBAAF38D9EDE0AC2B3F90 /* Pods */,
0DDBA47E92A64BC7B0385B0F /* Frameworks */,
);
sourceTree = "<group>";
};
FCF2870621DFAEC7009A87DA /* Products */ = {
isa = PBXGroup;
children = (
FCF2870521DFAEC7009A87DA /* MobileNetDemo.app */,
);
name = Products;
sourceTree = "<group>";
};
FCF2870721DFAEC7009A87DA /* MobileNetDemo */ = {
isa = PBXGroup;
children = (
FCB40DCF21E0B9410075EC91 /* Resources */,
FCB40DA321E0B85B0075EC91 /* MetalHelper.swift */,
FC74BB3521DFAFEC0055232B /* MobileNet.swift */,
FCF2870821DFAEC7009A87DA /* AppDelegate.swift */,
FCF2870A21DFAEC7009A87DA /* ViewController.swift */,
FCF2870C21DFAEC7009A87DA /* Main.storyboard */,
FCF2870F21DFAEC8009A87DA /* Assets.xcassets */,
FCF2871121DFAEC8009A87DA /* LaunchScreen.storyboard */,
FCF2871421DFAEC8009A87DA /* Info.plist */,
FCB40DA121E0B7C60075EC91 /* MobilenetPreProcess.metal */,
);
path = MobileNetDemo;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXNativeTarget section */
FCF2870421DFAEC7009A87DA /* MobileNetDemo */ = {
isa = PBXNativeTarget;
buildConfigurationList = FCF2871721DFAEC8009A87DA /* Build configuration list for PBXNativeTarget "MobileNetDemo" */;
buildPhases = (
B4EB56AEEFF6F3965DA3D2DA /* [CP] Check Pods Manifest.lock */,
FCF2870121DFAEC7009A87DA /* Sources */,
FCF2870221DFAEC7009A87DA /* Frameworks */,
FCF2870321DFAEC7009A87DA /* Resources */,
1D801B9681ACFCA70D444D2C /* [CP] Embed Pods Frameworks */,
FCB40DFC21E0BC360075EC91 /* Embed Frameworks */,
);
buildRules = (
);
dependencies = (
);
name = MobileNetDemo;
productName = MobileNetDemo;
productReference = FCF2870521DFAEC7009A87DA /* MobileNetDemo.app */;
productType = "com.apple.product-type.application";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
FCF286FD21DFAEC7009A87DA /* Project object */ = {
isa = PBXProject;
attributes = {
LastSwiftUpdateCheck = 1010;
LastUpgradeCheck = 1010;
ORGANIZATIONNAME = Ray;
TargetAttributes = {
FCF2870421DFAEC7009A87DA = {
CreatedOnToolsVersion = 10.1;
};
};
};
buildConfigurationList = FCF2870021DFAEC7009A87DA /* Build configuration list for PBXProject "MobileNetDemo" */;
compatibilityVersion = "Xcode 9.3";
developmentRegion = en;
hasScannedForEncodings = 0;
knownRegions = (
en,
Base,
);
mainGroup = FCF286FC21DFAEC7009A87DA;
productRefGroup = FCF2870621DFAEC7009A87DA /* Products */;
projectDirPath = "";
projectRoot = "";
targets = (
FCF2870421DFAEC7009A87DA /* MobileNetDemo */,
);
};
/* End PBXProject section */
/* Begin PBXResourcesBuildPhase section */
FCF2870321DFAEC7009A87DA /* Resources */ = {
isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647;
files = (
FCF2871321DFAEC8009A87DA /* LaunchScreen.storyboard in Resources */,
FCB40E5121E0CEBB0075EC91 /* mobilenet_model in Resources */,
FCB40DE921E0B9410075EC91 /* banana.jpeg in Resources */,
FCF2871021DFAEC8009A87DA /* Assets.xcassets in Resources */,
FCB40E5421E0CEF80075EC91 /* synset.txt in Resources */,
FCB40E5221E0CEBB0075EC91 /* mobilenet_params in Resources */,
FCF2870E21DFAEC7009A87DA /* Main.storyboard in Resources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXResourcesBuildPhase section */
/* Begin PBXShellScriptBuildPhase section */
1D801B9681ACFCA70D444D2C /* [CP] Embed Pods Frameworks */ = {
isa = PBXShellScriptBuildPhase;
buildActionMask = 2147483647;
files = (
);
inputFileListPaths = (
);
inputPaths = (
"${SRCROOT}/../Pods/Target Support Files/Pods-MobileNetDemo/Pods-MobileNetDemo-frameworks.sh",
"${BUILT_PRODUCTS_DIR}/SwiftProtobuf/SwiftProtobuf.framework",
);
name = "[CP] Embed Pods Frameworks";
outputFileListPaths = (
);
outputPaths = (
"${TARGET_BUILD_DIR}/${FRAMEWORKS_FOLDER_PATH}/SwiftProtobuf.framework",
);
runOnlyForDeploymentPostprocessing = 0;
shellPath = /bin/sh;
shellScript = "\"${SRCROOT}/../Pods/Target Support Files/Pods-MobileNetDemo/Pods-MobileNetDemo-frameworks.sh\"\n";
showEnvVarsInLog = 0;
};
B4EB56AEEFF6F3965DA3D2DA /* [CP] Check Pods Manifest.lock */ = {
isa = PBXShellScriptBuildPhase;
buildActionMask = 2147483647;
files = (
);
inputFileListPaths = (
);
inputPaths = (
"${PODS_PODFILE_DIR_PATH}/Podfile.lock",
"${PODS_ROOT}/Manifest.lock",
);
name = "[CP] Check Pods Manifest.lock";
outputFileListPaths = (
);
outputPaths = (
"$(DERIVED_FILE_DIR)/Pods-MobileNetDemo-checkManifestLockResult.txt",
);
runOnlyForDeploymentPostprocessing = 0;
shellPath = /bin/sh;
shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n # print error to STDERR\n echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n";
showEnvVarsInLog = 0;
};
/* End PBXShellScriptBuildPhase section */
/* Begin PBXSourcesBuildPhase section */
FCF2870121DFAEC7009A87DA /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
FC74BB3621DFAFEC0055232B /* MobileNet.swift in Sources */,
FCB40DA421E0B85B0075EC91 /* MetalHelper.swift in Sources */,
FCB40DA221E0B7C60075EC91 /* MobilenetPreProcess.metal in Sources */,
FCF2870B21DFAEC7009A87DA /* ViewController.swift in Sources */,
FCF2870921DFAEC7009A87DA /* AppDelegate.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin PBXVariantGroup section */
FCF2870C21DFAEC7009A87DA /* Main.storyboard */ = {
isa = PBXVariantGroup;
children = (
FCF2870D21DFAEC7009A87DA /* Base */,
);
name = Main.storyboard;
sourceTree = "<group>";
};
FCF2871121DFAEC8009A87DA /* LaunchScreen.storyboard */ = {
isa = PBXVariantGroup;
children = (
FCF2871221DFAEC8009A87DA /* Base */,
);
name = LaunchScreen.storyboard;
sourceTree = "<group>";
};
/* End PBXVariantGroup section */
/* Begin XCBuildConfiguration section */
FCF2871521DFAEC8009A87DA /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
CODE_SIGN_IDENTITY = "iPhone Developer";
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = dwarf;
ENABLE_STRICT_OBJC_MSGSEND = YES;
ENABLE_TESTABILITY = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_DYNAMIC_NO_PIC = NO;
GCC_NO_COMMON_BLOCKS = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"$(inherited)",
);
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
IPHONEOS_DEPLOYMENT_TARGET = 12.1;
MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE;
MTL_FAST_MATH = YES;
ONLY_ACTIVE_ARCH = YES;
SDKROOT = iphoneos;
SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG;
SWIFT_OPTIMIZATION_LEVEL = "-Onone";
};
name = Debug;
};
FCF2871621DFAEC8009A87DA /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
CLANG_ANALYZER_NONNULL = YES;
CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE;
CLANG_CXX_LANGUAGE_STANDARD = "gnu++14";
CLANG_CXX_LIBRARY = "libc++";
CLANG_ENABLE_MODULES = YES;
CLANG_ENABLE_OBJC_ARC = YES;
CLANG_ENABLE_OBJC_WEAK = YES;
CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES;
CLANG_WARN_BOOL_CONVERSION = YES;
CLANG_WARN_COMMA = YES;
CLANG_WARN_CONSTANT_CONVERSION = YES;
CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES;
CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR;
CLANG_WARN_DOCUMENTATION_COMMENTS = YES;
CLANG_WARN_EMPTY_BODY = YES;
CLANG_WARN_ENUM_CONVERSION = YES;
CLANG_WARN_INFINITE_RECURSION = YES;
CLANG_WARN_INT_CONVERSION = YES;
CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES;
CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES;
CLANG_WARN_OBJC_LITERAL_CONVERSION = YES;
CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR;
CLANG_WARN_RANGE_LOOP_ANALYSIS = YES;
CLANG_WARN_STRICT_PROTOTYPES = YES;
CLANG_WARN_SUSPICIOUS_MOVE = YES;
CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE;
CLANG_WARN_UNREACHABLE_CODE = YES;
CLANG_WARN__DUPLICATE_METHOD_MATCH = YES;
CODE_SIGN_IDENTITY = "iPhone Developer";
COPY_PHASE_STRIP = NO;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
ENABLE_NS_ASSERTIONS = NO;
ENABLE_STRICT_OBJC_MSGSEND = YES;
GCC_C_LANGUAGE_STANDARD = gnu11;
GCC_NO_COMMON_BLOCKS = YES;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR;
GCC_WARN_UNDECLARED_SELECTOR = YES;
GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE;
GCC_WARN_UNUSED_FUNCTION = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
IPHONEOS_DEPLOYMENT_TARGET = 12.1;
MTL_ENABLE_DEBUG_INFO = NO;
MTL_FAST_MATH = YES;
SDKROOT = iphoneos;
SWIFT_COMPILATION_MODE = wholemodule;
SWIFT_OPTIMIZATION_LEVEL = "-O";
VALIDATE_PRODUCT = YES;
};
name = Release;
};
FCF2871821DFAEC8009A87DA /* Debug */ = {
isa = XCBuildConfiguration;
baseConfigurationReference = 4FE67FF667A24FCB0134F627 /* Pods-MobileNetDemo.debug.xcconfig */;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
CODE_SIGN_STYLE = Automatic;
DEVELOPMENT_TEAM = A798K58VVL;
INFOPLIST_FILE = MobileNetDemo/Info.plist;
IPHONEOS_DEPLOYMENT_TARGET = 9.0;
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = Ray.MobileNetDemo;
PRODUCT_NAME = "$(TARGET_NAME)";
SWIFT_VERSION = 4.0;
TARGETED_DEVICE_FAMILY = "1,2";
};
name = Debug;
};
FCF2871921DFAEC8009A87DA /* Release */ = {
isa = XCBuildConfiguration;
baseConfigurationReference = E57059FE3629E3A8DE6C7ECF /* Pods-MobileNetDemo.release.xcconfig */;
buildSettings = {
ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon;
CODE_SIGN_STYLE = Automatic;
DEVELOPMENT_TEAM = A798K58VVL;
INFOPLIST_FILE = MobileNetDemo/Info.plist;
IPHONEOS_DEPLOYMENT_TARGET = 9.0;
LD_RUNPATH_SEARCH_PATHS = (
"$(inherited)",
"@executable_path/Frameworks",
);
PRODUCT_BUNDLE_IDENTIFIER = Ray.MobileNetDemo;
PRODUCT_NAME = "$(TARGET_NAME)";
SWIFT_VERSION = 4.0;
TARGETED_DEVICE_FAMILY = "1,2";
};
name = Release;
};
/* End XCBuildConfiguration section */
/* Begin XCConfigurationList section */
FCF2870021DFAEC7009A87DA /* Build configuration list for PBXProject "MobileNetDemo" */ = {
isa = XCConfigurationList;
buildConfigurations = (
FCF2871521DFAEC8009A87DA /* Debug */,
FCF2871621DFAEC8009A87DA /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
FCF2871721DFAEC8009A87DA /* Build configuration list for PBXNativeTarget "MobileNetDemo" */ = {
isa = XCConfigurationList;
buildConfigurations = (
FCF2871821DFAEC8009A87DA /* Debug */,
FCF2871921DFAEC8009A87DA /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = FCF286FD21DFAEC7009A87DA /* Project object */;
}
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
version = "1.0">
<FileRef
location = "self:MobileNetDemo.xcodeproj">
</FileRef>
</Workspace>
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>IDEDidComputeMac32BitWarning</key>
<true/>
</dict>
</plist>
//
// AppDelegate.swift
// MobileNetDemo
//
// Created by liuRuiLong on 2019/1/4.
// Copyright © 2019 Ray. All rights reserved.
//
import UIKit
@UIApplicationMain
class AppDelegate: UIResponder, UIApplicationDelegate {
var window: UIWindow?
func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool {
// Override point for customization after application launch.
return true
}
func applicationWillResignActive(_ application: UIApplication) {
// Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state.
// Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game.
}
func applicationDidEnterBackground(_ application: UIApplication) {
// Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later.
// If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits.
}
func applicationWillEnterForeground(_ application: UIApplication) {
// Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background.
}
func applicationDidBecomeActive(_ application: UIApplication) {
// Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface.
}
func applicationWillTerminate(_ application: UIApplication) {
// Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:.
}
}
{
"images" : [
{
"idiom" : "iphone",
"size" : "20x20",
"scale" : "2x"
},
{
"idiom" : "iphone",
"size" : "20x20",
"scale" : "3x"
},
{
"idiom" : "iphone",
"size" : "29x29",
"scale" : "2x"
},
{
"idiom" : "iphone",
"size" : "29x29",
"scale" : "3x"
},
{
"idiom" : "iphone",
"size" : "40x40",
"scale" : "2x"
},
{
"idiom" : "iphone",
"size" : "40x40",
"scale" : "3x"
},
{
"idiom" : "iphone",
"size" : "60x60",
"scale" : "2x"
},
{
"idiom" : "iphone",
"size" : "60x60",
"scale" : "3x"
},
{
"idiom" : "ipad",
"size" : "20x20",
"scale" : "1x"
},
{
"idiom" : "ipad",
"size" : "20x20",
"scale" : "2x"
},
{
"idiom" : "ipad",
"size" : "29x29",
"scale" : "1x"
},
{
"idiom" : "ipad",
"size" : "29x29",
"scale" : "2x"
},
{
"idiom" : "ipad",
"size" : "40x40",
"scale" : "1x"
},
{
"idiom" : "ipad",
"size" : "40x40",
"scale" : "2x"
},
{
"idiom" : "ipad",
"size" : "76x76",
"scale" : "1x"
},
{
"idiom" : "ipad",
"size" : "76x76",
"scale" : "2x"
},
{
"idiom" : "ipad",
"size" : "83.5x83.5",
"scale" : "2x"
},
{
"idiom" : "ios-marketing",
"size" : "1024x1024",
"scale" : "1x"
}
],
"info" : {
"version" : 1,
"author" : "xcode"
}
}
\ No newline at end of file
{
"info" : {
"version" : 1,
"author" : "xcode"
}
}
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="13122.16" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" launchScreen="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="01J-lp-oVM">
<dependencies>
<plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="13104.12"/>
<capability name="Safe area layout guides" minToolsVersion="9.0"/>
<capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
</dependencies>
<scenes>
<!--View Controller-->
<scene sceneID="EHf-IW-A2E">
<objects>
<viewController id="01J-lp-oVM" sceneMemberID="viewController">
<view key="view" contentMode="scaleToFill" id="Ze5-6b-2t3">
<rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
<color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
<viewLayoutGuide key="safeArea" id="6Tk-OE-BBY"/>
</view>
</viewController>
<placeholder placeholderIdentifier="IBFirstResponder" id="iYj-Kq-Ea1" userLabel="First Responder" sceneMemberID="firstResponder"/>
</objects>
<point key="canvasLocation" x="53" y="375"/>
</scene>
</scenes>
</document>
<?xml version="1.0" encoding="UTF-8"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14460.31" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="hKf-0C-qAk">
<device id="retina4_7" orientation="portrait">
<adaptation id="fullscreen"/>
</device>
<dependencies>
<deployment identifier="iOS"/>
<plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="14460.20"/>
<capability name="Safe area layout guides" minToolsVersion="9.0"/>
<capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
</dependencies>
<scenes>
<!--View Controller-->
<scene sceneID="Me8-c9-Oox">
<objects>
<viewController id="hKf-0C-qAk" customClass="ViewController" customModule="MobileNetDemo" customModuleProvider="target" sceneMemberID="viewController">
<view key="view" contentMode="scaleToFill" id="Yst-rK-Wk7">
<rect key="frame" x="0.0" y="0.0" width="375" height="667"/>
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
<subviews>
<imageView userInteractionEnabled="NO" contentMode="scaleAspectFit" horizontalHuggingPriority="251" verticalHuggingPriority="251" translatesAutoresizingMaskIntoConstraints="NO" id="bDP-xQ-JgS">
<rect key="frame" x="0.0" y="20" width="375" height="271"/>
</imageView>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="HLo-2k-dr7">
<rect key="frame" x="16" y="597" width="63.5" height="30"/>
<color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<constraints>
<constraint firstAttribute="width" secondItem="HLo-2k-dr7" secondAttribute="height" multiplier="21:10" id="xlA-qq-ubI"/>
</constraints>
<state key="normal" title="Image">
<color key="titleColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
</state>
<connections>
<action selector="selectImageAct:" destination="hKf-0C-qAk" eventType="touchUpInside" id="rJB-ZK-jTR"/>
</connections>
</button>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="Aa7-KR-JhB">
<rect key="frame" x="109.5" y="597" width="63" height="30"/>
<color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<state key="normal" title="Load">
<color key="titleColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
</state>
<connections>
<action selector="loadAct:" destination="hKf-0C-qAk" eventType="touchUpInside" id="Lkj-aW-8vj"/>
</connections>
</button>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="2dy-Ya-PJY">
<rect key="frame" x="202.5" y="597" width="63.5" height="30"/>
<color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<state key="normal" title="Predict">
<color key="titleColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
</state>
<connections>
<action selector="predictAct:" destination="hKf-0C-qAk" eventType="touchUpInside" id="iw4-E7-3br"/>
</connections>
</button>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="Bac-eY-xPP">
<rect key="frame" x="296" y="597" width="63" height="30"/>
<color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<state key="normal" title="Clear">
<color key="titleColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
</state>
<connections>
<action selector="clearAct:" destination="hKf-0C-qAk" eventType="touchUpInside" id="QgH-jd-cR1"/>
</connections>
</button>
<view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="vhI-WH-WKF">
<rect key="frame" x="79.5" y="597" width="30" height="30"/>
<color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<constraints>
<constraint firstAttribute="width" constant="30" id="ffB-31-3Iy"/>
<constraint firstAttribute="height" constant="30" id="nbx-3B-EW0"/>
</constraints>
</view>
<view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="ZoT-1q-tgf">
<rect key="frame" x="266" y="597" width="30" height="30"/>
<color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<constraints>
<constraint firstAttribute="height" constant="30" id="Iu3-ig-lYv"/>
<constraint firstAttribute="width" constant="30" id="Jic-6I-7ch"/>
</constraints>
</view>
<view contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="Zvo-dq-f6D">
<rect key="frame" x="172.5" y="597" width="30" height="30"/>
<color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<constraints>
<constraint firstAttribute="width" constant="30" id="Zgu-c6-rPT"/>
<constraint firstAttribute="height" constant="30" id="c8V-Gd-hiK"/>
</constraints>
</view>
<label opaque="NO" userInteractionEnabled="NO" contentMode="left" horizontalHuggingPriority="251" verticalHuggingPriority="251" text="耗时:" lineBreakMode="tailTruncation" numberOfLines="0" baselineAdjustment="alignBaselines" adjustsFontSizeToFit="NO" translatesAutoresizingMaskIntoConstraints="NO" id="Jox-rT-ieC">
<rect key="frame" x="15" y="301" width="350" height="38"/>
<constraints>
<constraint firstAttribute="height" constant="38" id="8TB-w5-hbk"/>
</constraints>
<fontDescription key="fontDescription" type="system" pointSize="15"/>
<nil key="textColor"/>
<nil key="highlightedColor"/>
</label>
<imageView userInteractionEnabled="NO" contentMode="scaleToFill" horizontalHuggingPriority="251" verticalHuggingPriority="251" image="paddle-mobile.png" translatesAutoresizingMaskIntoConstraints="NO" id="PZO-kk-MVS">
<rect key="frame" x="90" y="637" width="195" height="30"/>
<constraints>
<constraint firstAttribute="width" secondItem="PZO-kk-MVS" secondAttribute="height" multiplier="6.5:1" id="9DJ-Rj-4ex"/>
</constraints>
</imageView>
<textView clipsSubviews="YES" multipleTouchEnabled="YES" contentMode="scaleToFill" editable="NO" text="结果:" textAlignment="natural" translatesAutoresizingMaskIntoConstraints="NO" id="efW-gP-E3g">
<rect key="frame" x="10" y="347" width="355" height="150"/>
<color key="backgroundColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<constraints>
<constraint firstAttribute="height" constant="150" id="whC-NW-nhZ"/>
</constraints>
<fontDescription key="fontDescription" type="system" pointSize="15"/>
<textInputTraits key="textInputTraits" autocapitalizationType="sentences"/>
</textView>
</subviews>
<color key="backgroundColor" red="1" green="1" blue="1" alpha="1" colorSpace="custom" customColorSpace="sRGB"/>
<constraints>
<constraint firstItem="PZO-kk-MVS" firstAttribute="centerX" secondItem="Yst-rK-Wk7" secondAttribute="centerX" id="2ET-tq-zfh"/>
<constraint firstItem="Zvo-dq-f6D" firstAttribute="leading" secondItem="Aa7-KR-JhB" secondAttribute="trailing" id="368-Sl-KgC"/>
<constraint firstItem="bDP-xQ-JgS" firstAttribute="top" secondItem="hlK-mk-uEU" secondAttribute="top" id="3HC-Tb-qff"/>
<constraint firstItem="hlK-mk-uEU" firstAttribute="trailing" secondItem="efW-gP-E3g" secondAttribute="trailing" constant="10" id="8QW-BB-dry"/>
<constraint firstItem="ZoT-1q-tgf" firstAttribute="leading" secondItem="2dy-Ya-PJY" secondAttribute="trailing" id="AhB-vB-1aW"/>
<constraint firstItem="Bac-eY-xPP" firstAttribute="leading" secondItem="ZoT-1q-tgf" secondAttribute="trailing" id="BhE-d9-7Sf"/>
<constraint firstItem="HLo-2k-dr7" firstAttribute="leading" secondItem="hlK-mk-uEU" secondAttribute="leading" constant="16" id="BuX-zw-HOG"/>
<constraint firstItem="HLo-2k-dr7" firstAttribute="width" secondItem="Aa7-KR-JhB" secondAttribute="width" id="Dbs-xF-8in"/>
<constraint firstItem="HLo-2k-dr7" firstAttribute="width" secondItem="Bac-eY-xPP" secondAttribute="width" id="Dov-mA-K38"/>
<constraint firstItem="hlK-mk-uEU" firstAttribute="trailing" secondItem="Jox-rT-ieC" secondAttribute="trailing" constant="10" id="LfU-MA-UTb"/>
<constraint firstItem="Aa7-KR-JhB" firstAttribute="centerY" secondItem="HLo-2k-dr7" secondAttribute="centerY" id="OMl-f7-5CL"/>
<constraint firstItem="HLo-2k-dr7" firstAttribute="top" secondItem="efW-gP-E3g" secondAttribute="bottom" constant="100" id="P2f-lC-F02"/>
<constraint firstItem="hlK-mk-uEU" firstAttribute="bottom" secondItem="HLo-2k-dr7" secondAttribute="bottom" constant="40" id="Po9-43-AFd"/>
<constraint firstItem="bDP-xQ-JgS" firstAttribute="trailing" secondItem="hlK-mk-uEU" secondAttribute="trailing" id="Pqb-0o-qjh"/>
<constraint firstItem="hlK-mk-uEU" firstAttribute="trailing" secondItem="Bac-eY-xPP" secondAttribute="trailing" constant="16" id="VOE-fl-N71"/>
<constraint firstItem="vhI-WH-WKF" firstAttribute="leading" secondItem="HLo-2k-dr7" secondAttribute="trailing" id="Vlg-FW-uEQ"/>
<constraint firstItem="ZoT-1q-tgf" firstAttribute="centerY" secondItem="HLo-2k-dr7" secondAttribute="centerY" id="Wpv-Ck-8l3"/>
<constraint firstItem="efW-gP-E3g" firstAttribute="top" secondItem="Jox-rT-ieC" secondAttribute="bottom" constant="8" id="Z8f-Rs-QDZ"/>
<constraint firstItem="vhI-WH-WKF" firstAttribute="centerY" secondItem="HLo-2k-dr7" secondAttribute="centerY" id="af8-Qd-iQN"/>
<constraint firstItem="bDP-xQ-JgS" firstAttribute="leading" secondItem="hlK-mk-uEU" secondAttribute="leading" id="bZr-fF-a2S"/>
<constraint firstItem="HLo-2k-dr7" firstAttribute="width" secondItem="2dy-Ya-PJY" secondAttribute="width" id="c0U-4X-uIO"/>
<constraint firstItem="2dy-Ya-PJY" firstAttribute="leading" secondItem="Zvo-dq-f6D" secondAttribute="trailing" id="cRa-pW-xi8"/>
<constraint firstItem="2dy-Ya-PJY" firstAttribute="centerY" secondItem="HLo-2k-dr7" secondAttribute="centerY" id="f2B-zG-wXC"/>
<constraint firstItem="PZO-kk-MVS" firstAttribute="top" secondItem="HLo-2k-dr7" secondAttribute="bottom" constant="10" id="hAy-La-Eeh"/>
<constraint firstItem="Zvo-dq-f6D" firstAttribute="centerY" secondItem="HLo-2k-dr7" secondAttribute="centerY" id="hUc-wn-Ua1"/>
<constraint firstItem="Bac-eY-xPP" firstAttribute="centerY" secondItem="HLo-2k-dr7" secondAttribute="centerY" id="jDC-ag-kL6"/>
<constraint firstItem="Aa7-KR-JhB" firstAttribute="leading" secondItem="vhI-WH-WKF" secondAttribute="trailing" id="jgU-OM-v1G"/>
<constraint firstItem="PZO-kk-MVS" firstAttribute="bottom" secondItem="hlK-mk-uEU" secondAttribute="bottom" id="lkS-rk-Ap8"/>
<constraint firstItem="efW-gP-E3g" firstAttribute="leading" secondItem="hlK-mk-uEU" secondAttribute="leading" constant="10" id="q2g-4E-mgJ"/>
<constraint firstItem="Jox-rT-ieC" firstAttribute="top" secondItem="bDP-xQ-JgS" secondAttribute="bottom" constant="10" id="rqK-Pv-SXt"/>
<constraint firstItem="Jox-rT-ieC" firstAttribute="leading" secondItem="hlK-mk-uEU" secondAttribute="leading" constant="15" id="sP3-ym-vhH"/>
</constraints>
<viewLayoutGuide key="safeArea" id="hlK-mk-uEU"/>
</view>
<connections>
<outlet property="elapsedTimeLabel" destination="Jox-rT-ieC" id="QdK-sY-xmq"/>
<outlet property="resultTextView" destination="efW-gP-E3g" id="Vnl-XG-D8E"/>
<outlet property="selectImageView" destination="bDP-xQ-JgS" id="dMV-Wh-YsW"/>
</connections>
</viewController>
<placeholder placeholderIdentifier="IBFirstResponder" id="ShQ-yg-7s0" sceneMemberID="firstResponder"/>
</objects>
<point key="canvasLocation" x="-1558" y="-14"/>
</scene>
</scenes>
<resources>
<image name="paddle-mobile.png" width="16" height="16"/>
</resources>
</document>
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>CFBundleDevelopmentRegion</key>
<string>$(DEVELOPMENT_LANGUAGE)</string>
<key>CFBundleExecutable</key>
<string>$(EXECUTABLE_NAME)</string>
<key>CFBundleIdentifier</key>
<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>$(PRODUCT_NAME)</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
<string>1.0</string>
<key>CFBundleVersion</key>
<string>1</string>
<key>LSRequiresIPhoneOS</key>
<true/>
<key>NSCameraUsageDescription</key>
<string>use camera</string>
<key>UILaunchStoryboardName</key>
<string>LaunchScreen</string>
<key>UIMainStoryboardFile</key>
<string>Main</string>
<key>UIRequiredDeviceCapabilities</key>
<array>
<string>armv7</string>
</array>
<key>UISupportedInterfaceOrientations</key>
<array>
<string>UIInterfaceOrientationPortrait</string>
<string>UIInterfaceOrientationLandscapeLeft</string>
<string>UIInterfaceOrientationLandscapeRight</string>
</array>
<key>UISupportedInterfaceOrientations~ipad</key>
<array>
<string>UIInterfaceOrientationPortrait</string>
<string>UIInterfaceOrientationPortraitUpsideDown</string>
<string>UIInterfaceOrientationLandscapeLeft</string>
<string>UIInterfaceOrientationLandscapeRight</string>
</array>
</dict>
</plist>
...@@ -13,13 +13,13 @@ ...@@ -13,13 +13,13 @@
limitations under the License. */ limitations under the License. */
import Foundation import Foundation
import paddle_mobile
class MobileNet: Net{ public class MobileNet: Net{
class MobilenetPreProccess: CusomKernel { class MobilenetPreProccess: CusomKernel {
init(device: MTLDevice) { init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 224, inHeight: 224, inChannel: 3) let s = Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_preprocess", outputDim: s, usePaddleMobileLib: false) super.init(device: device, inFunctionName: "mobilenet_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
} }
} }
...@@ -43,9 +43,7 @@ class MobileNet: Net{ ...@@ -43,9 +43,7 @@ class MobileNet: Net{
let labels = PreWords.init(fileName: "synset") let labels = PreWords.init(fileName: "synset")
override public func resultStr(res: ResultHolder) -> String { override public func resultStr(res: ResultHolder) -> String {
guard let resPointer = res.result else { let resPointer = res.result
fatalError()
}
var s: [String] = [] var s: [String] = []
(0..<res.capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{ (0..<res.capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100)) s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
...@@ -53,18 +51,13 @@ class MobileNet: Net{ ...@@ -53,18 +51,13 @@ class MobileNet: Net{
return s.joined(separator: "\n") return s.joined(separator: "\n")
} }
override public init(device: MTLDevice) {
override init(device: MTLDevice) {
super.init(device: device) super.init(device: device)
means = [123.68, 116.78, 103.94]
scale = 0.017
except = 0 except = 0
modelPath = Bundle.main.path(forResource: "model", ofType: nil) ?! "model null" modelPath = Bundle.main.path(forResource: "mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "mobilenet_params", ofType: nil) ?! "para null"
modelDir = ""
preprocessKernel = MobilenetPreProccess.init(device: device) preprocessKernel = MobilenetPreProccess.init(device: device)
dim = (n: 1, h: 224, w: 224, c: 3) inputDim = Dim.init(inDim: [1, 224, 224, 3])
} }
} }
//
// MobilenetProcess.metal
// MobileNetDemo
//
// Created by liuRuiLong on 2019/1/5.
// Copyright © 2019 Ray. All rights reserved.
//
#include <metal_stdlib>
using namespace metal;
kernel void mobilenet_preprocess(
texture2d<float, access::read> inTexture [[texture(0)]],
texture2d<float, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = float4(123.68f, 116.78f, 103.94f, 0.0f);
const float4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(float4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
kernel void mobilenet_preprocess_half(
texture2d<half, access::read> inTexture [[texture(0)]],
texture2d<half, access::write> outTexture [[texture(1)]],
uint2 gid [[thread_position_in_grid]])
{
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
const auto means = half4(123.68f, 116.78f, 103.94f, 0.0f);
const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
}
//
// ViewController.swift
// MobileNetDemo
//
// Created by liuRuiLong on 2019/1/4.
// Copyright © 2019 Ray. All rights reserved.
//
import UIKit
import paddle_mobile
class ViewController: UIViewController {
@IBOutlet weak var resultTextView: UITextView!
@IBOutlet weak var selectImageView: UIImageView!
@IBOutlet weak var elapsedTimeLabel: UILabel!
var net: MobileNet!
var runner: Runner!
var toPredictTexture: MTLTexture?
override func viewDidLoad() {
super.viewDidLoad()
GlobalConfig.shared.computePrecision = .Float16
net = MobileNet.init(device: MetalHelper.shared.device)
runner = Runner.init(inNet: net, commandQueue: MetalHelper.shared.queue)
if let selectImage = UIImage.init(named: "banana.jpeg") {
selectImageView.image = selectImage
runner.getTexture(image: selectImage.cgImage!) {[weak self] (texture) in
self?.toPredictTexture = texture
}
}
}
@IBAction func loadAct(_ sender: Any) {
if runner.load() {
let resutText = " load success ! "
print(resutText)
self.resultTextView.text = resutText
} else {
fatalError(" load error ")
}
}
@IBAction func selectImageAct(_ sender: Any) {
let imagePicker = UIImagePickerController()
imagePicker.sourceType = .camera
imagePicker.delegate = self
self.present(imagePicker, animated: true, completion: nil)
}
@IBAction func clearAct(_ sender: Any) {
runner.clear()
}
@IBAction func predictAct(_ sender: Any) {
if let texture = toPredictTexture {
let beginDate = Date.init()
runner.predict(texture: texture) { [weak self] (success, resultHolder) in
if success, let inResultHolder = resultHolder {
let timeUse = Date.init().timeIntervalSince(beginDate)
DispatchQueue.main.async {
self?.elapsedTimeLabel.text = "\(timeUse * 1000)ms"
self?.resultTextView.text = self?.net.resultStr(res: inResultHolder)
}
} else {
print(" predict fail ")
}
}
} else {
print(" toPredictTexture is nil ")
}
}
}
extension ViewController: UIImagePickerControllerDelegate, UINavigationControllerDelegate {
func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [String : Any]) {
picker.dismiss(animated: true){[weak self] in
guard let sSelf = self, let image = info["UIImagePickerControllerOriginalImage"] as? UIImage else {
fatalError("no image")
}
sSelf.selectImageView.image = image
sSelf.runner.getTexture(image: image.cgImage!, getTexture: { (texture) in
sSelf.toPredictTexture = texture
})
}
}
}
...@@ -17,3 +17,9 @@ target 'paddle-mobile-unit-test' do ...@@ -17,3 +17,9 @@ target 'paddle-mobile-unit-test' do
project 'paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj' project 'paddle-mobile-unit-test/paddle-mobile-unit-test.xcodeproj'
pod 'SwiftProtobuf', '~> 1.0' pod 'SwiftProtobuf', '~> 1.0'
end end
target 'MobileNetDemo' do
project 'MobileNetDemo/MobileNetDemo.xcodeproj'
pod 'SwiftProtobuf', '~> 1.0'
end
...@@ -10,20 +10,42 @@ ...@@ -10,20 +10,42 @@
30D0ED21F392CFA3885B1002 /* Pods_paddle_mobile_demo.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */; }; 30D0ED21F392CFA3885B1002 /* Pods_paddle_mobile_demo.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 18896810981724F8A0FED62A /* Pods_paddle_mobile_demo.framework */; };
C2CBB49021B778EA0020DC6C /* libc++.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = FC4FD97B2140EE250073E130 /* libc++.tbd */; }; C2CBB49021B778EA0020DC6C /* libc++.tbd in Frameworks */ = {isa = PBXBuildFile; fileRef = FC4FD97B2140EE250073E130 /* libc++.tbd */; };
C2E67E5E21524E460013F575 /* LoadPointerViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = C2E67E5D21524E460013F575 /* LoadPointerViewController.m */; }; C2E67E5E21524E460013F575 /* LoadPointerViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = C2E67E5D21524E460013F575 /* LoadPointerViewController.m */; };
FC013928210204A3008100E3 /* PreProcessKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC013927210204A3008100E3 /* PreProcessKernel.metal */; };
FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B8120E11C550081E9F8 /* AppDelegate.swift */; }; FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B8120E11C550081E9F8 /* AppDelegate.swift */; };
FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B8320E11C550081E9F8 /* ViewController.swift */; }; FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC039B8320E11C550081E9F8 /* ViewController.swift */; };
FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8520E11C550081E9F8 /* Main.storyboard */; }; FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8520E11C550081E9F8 /* Main.storyboard */; };
FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8820E11C560081E9F8 /* Assets.xcassets */; }; FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8820E11C560081E9F8 /* Assets.xcassets */; };
FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */; }; FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */; };
FC203FB221CBFDBA00B37166 /* test.jpg in Resources */ = {isa = PBXBuildFile; fileRef = FC203FA921CBFDBA00B37166 /* test.jpg */; }; FC203FB221CBFDBA00B37166 /* test.jpg in Resources */ = {isa = PBXBuildFile; fileRef = FC203FA921CBFDBA00B37166 /* test.jpg */; };
FC203FB321CBFDBA00B37166 /* combined_mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FC203FAD21CBFDBA00B37166 /* combined_mobilenet_params */; }; FC2BFCBC21DF0A8600C262B2 /* 00001.jpg in Resources */ = {isa = PBXBuildFile; fileRef = FC2BFCBB21DF0A8600C262B2 /* 00001.jpg */; };
FC203FB421CBFDBA00B37166 /* combined_mobilenet_model in Resources */ = {isa = PBXBuildFile; fileRef = FC203FAE21CBFDBA00B37166 /* combined_mobilenet_model */; }; FC2BFCBE21DF15D900C262B2 /* 123.jpg in Resources */ = {isa = PBXBuildFile; fileRef = FC2BFCBD21DF15D900C262B2 /* 123.jpg */; };
FC203FB521CBFDBA00B37166 /* yolo_params in Resources */ = {isa = PBXBuildFile; fileRef = FC203FB021CBFDBA00B37166 /* yolo_params */; }; FC2BFCC021DF279900C262B2 /* classify-img-output.png in Resources */ = {isa = PBXBuildFile; fileRef = FC2BFCBF21DF279900C262B2 /* classify-img-output.png */; };
FC203FB621CBFDBA00B37166 /* yolo_model in Resources */ = {isa = PBXBuildFile; fileRef = FC203FB121CBFDBA00B37166 /* yolo_model */; }; FC2BFD3021DF3FEA00C262B2 /* MobilenetSSD_AR.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD2A21DF3FE900C262B2 /* MobilenetSSD_AR.swift */; };
FC2BFD3121DF3FEA00C262B2 /* Genet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD2B21DF3FE900C262B2 /* Genet.swift */; };
FC2BFD3221DF3FEA00C262B2 /* MobileNetSSD.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD2C21DF3FE900C262B2 /* MobileNetSSD.swift */; };
FC2BFD3321DF3FEA00C262B2 /* YoloNet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD2D21DF3FE900C262B2 /* YoloNet.swift */; };
FC2BFD3421DF3FEA00C262B2 /* MobileNetCombined.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD2E21DF3FEA00C262B2 /* MobileNetCombined.swift */; };
FC2BFD3521DF3FEA00C262B2 /* MobileNet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD2F21DF3FEA00C262B2 /* MobileNet.swift */; };
FC2BFD3821DF46DE00C262B2 /* OCDemoViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD3721DF46DE00C262B2 /* OCDemoViewController.m */; };
FC2BFD3C21DF480400C262B2 /* CPUCompute.mm in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD3B21DF480400C262B2 /* CPUCompute.mm */; };
FC2BFD3E21DF5CE800C262B2 /* PreProcessKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD3D21DF5CE800C262B2 /* PreProcessKernel.metal */; };
FC2BFD4321DF5E1E00C262B2 /* PaddleMobileGPU.m in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD4021DF5E1E00C262B2 /* PaddleMobileGPU.m */; };
FC2BFD4421DF5E1E00C262B2 /* SuperResolutionNet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD4221DF5E1E00C262B2 /* SuperResolutionNet.swift */; };
FC5E03B221DCE8D90016C137 /* mingren_input_data in Resources */ = {isa = PBXBuildFile; fileRef = FC5E03B121DCE8D90016C137 /* mingren_input_data */; };
FC704C1921D2375300F98BAB /* super_params in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1721D2375300F98BAB /* super_params */; };
FC704C1A21D2375300F98BAB /* super_model in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1821D2375300F98BAB /* super_model */; };
FC704C2221D237FC00F98BAB /* combined_mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1D21D237FC00F98BAB /* combined_mobilenet_params */; };
FC704C2321D237FC00F98BAB /* combined_mobilenet_model in Resources */ = {isa = PBXBuildFile; fileRef = FC704C1E21D237FC00F98BAB /* combined_mobilenet_model */; };
FC704C2421D237FC00F98BAB /* yolo_params in Resources */ = {isa = PBXBuildFile; fileRef = FC704C2021D237FC00F98BAB /* yolo_params */; };
FC704C2521D237FC00F98BAB /* yolo_model in Resources */ = {isa = PBXBuildFile; fileRef = FC704C2121D237FC00F98BAB /* yolo_model */; };
FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCB214D27920094B8E5 /* FPSCounter.swift */; }; FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCB214D27920094B8E5 /* FPSCounter.swift */; };
FC803BCE214D27930094B8E5 /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCC214D27920094B8E5 /* VideoCapture.swift */; }; FC803BCE214D27930094B8E5 /* VideoCapture.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BCC214D27920094B8E5 /* VideoCapture.swift */; };
FC9797BE21D6045B00F2FD90 /* banana.jpeg in Resources */ = {isa = PBXBuildFile; fileRef = FC9797BD21D6045B00F2FD90 /* banana.jpeg */; };
FC9797C221D608E000F2FD90 /* mobilenet_model in Resources */ = {isa = PBXBuildFile; fileRef = FC9797C021D608DF00F2FD90 /* mobilenet_model */; };
FC9797C321D608E000F2FD90 /* mobilenet_params in Resources */ = {isa = PBXBuildFile; fileRef = FC9797C121D608DF00F2FD90 /* mobilenet_params */; };
FC9797C721D609FB00F2FD90 /* synset.txt in Resources */ = {isa = PBXBuildFile; fileRef = FC9797C621D609FB00F2FD90 /* synset.txt */; };
FC9797CF21D6506F00F2FD90 /* mingren.jpg in Resources */ = {isa = PBXBuildFile; fileRef = FC9797CE21D6506F00F2FD90 /* mingren.jpg */; };
FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC542122EF5400D94F7E /* MetalHelper.swift */; }; FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC542122EF5400D94F7E /* MetalHelper.swift */; };
FCCED60521D7646E00BE8D5F /* test_image_super in Resources */ = {isa = PBXBuildFile; fileRef = FCCED60421D7646E00BE8D5F /* test_image_super */; };
FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; }; FCEBEC2C20E1391F00C0B14D /* paddle_mobile.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; };
FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; FCEBEC2D20E1391F00C0B14D /* paddle_mobile.framework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
FCF437E8214B6DDB00943429 /* MultiPredictViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */; }; FCF437E8214B6DDB00943429 /* MultiPredictViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */; };
...@@ -49,7 +71,6 @@ ...@@ -49,7 +71,6 @@
878829884E1A14D7044721D5 /* Pods-paddle-mobile-demo.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile-demo.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo.debug.xcconfig"; sourceTree = "<group>"; }; 878829884E1A14D7044721D5 /* Pods-paddle-mobile-demo.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-paddle-mobile-demo.debug.xcconfig"; path = "../Pods/Target Support Files/Pods-paddle-mobile-demo/Pods-paddle-mobile-demo.debug.xcconfig"; sourceTree = "<group>"; };
C2E67E5C21524E460013F575 /* LoadPointerViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = LoadPointerViewController.h; sourceTree = "<group>"; }; C2E67E5C21524E460013F575 /* LoadPointerViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = LoadPointerViewController.h; sourceTree = "<group>"; };
C2E67E5D21524E460013F575 /* LoadPointerViewController.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = LoadPointerViewController.m; sourceTree = "<group>"; }; C2E67E5D21524E460013F575 /* LoadPointerViewController.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = LoadPointerViewController.m; sourceTree = "<group>"; };
FC013927210204A3008100E3 /* PreProcessKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreProcessKernel.metal; sourceTree = "<group>"; };
FC039B7E20E11C550081E9F8 /* paddle-mobile-demo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "paddle-mobile-demo.app"; sourceTree = BUILT_PRODUCTS_DIR; }; FC039B7E20E11C550081E9F8 /* paddle-mobile-demo.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "paddle-mobile-demo.app"; sourceTree = BUILT_PRODUCTS_DIR; };
FC039B8120E11C550081E9F8 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; }; FC039B8120E11C550081E9F8 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
FC039B8320E11C550081E9F8 /* ViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = "<group>"; }; FC039B8320E11C550081E9F8 /* ViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = "<group>"; };
...@@ -58,15 +79,41 @@ ...@@ -58,15 +79,41 @@
FC039B8B20E11C560081E9F8 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; }; FC039B8B20E11C560081E9F8 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = "<group>"; };
FC039B8D20E11C560081E9F8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; }; FC039B8D20E11C560081E9F8 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = "<group>"; };
FC203FA921CBFDBA00B37166 /* test.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = test.jpg; sourceTree = "<group>"; }; FC203FA921CBFDBA00B37166 /* test.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = test.jpg; sourceTree = "<group>"; };
FC203FAD21CBFDBA00B37166 /* combined_mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_params; sourceTree = "<group>"; };
FC203FAE21CBFDBA00B37166 /* combined_mobilenet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_model; sourceTree = "<group>"; };
FC203FB021CBFDBA00B37166 /* yolo_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_params; sourceTree = "<group>"; };
FC203FB121CBFDBA00B37166 /* yolo_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_model; sourceTree = "<group>"; };
FC27991121343A39000B6BAD /* paddle-mobile-demo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "paddle-mobile-demo-Bridging-Header.h"; sourceTree = "<group>"; }; FC27991121343A39000B6BAD /* paddle-mobile-demo-Bridging-Header.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = "paddle-mobile-demo-Bridging-Header.h"; sourceTree = "<group>"; };
FC2BFCBB21DF0A8600C262B2 /* 00001.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = 00001.jpg; sourceTree = "<group>"; };
FC2BFCBD21DF15D900C262B2 /* 123.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = 123.jpg; sourceTree = "<group>"; };
FC2BFCBF21DF279900C262B2 /* classify-img-output.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; path = "classify-img-output.png"; sourceTree = "<group>"; };
FC2BFD2A21DF3FE900C262B2 /* MobilenetSSD_AR.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MobilenetSSD_AR.swift; sourceTree = "<group>"; };
FC2BFD2B21DF3FE900C262B2 /* Genet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Genet.swift; sourceTree = "<group>"; };
FC2BFD2C21DF3FE900C262B2 /* MobileNetSSD.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MobileNetSSD.swift; sourceTree = "<group>"; };
FC2BFD2D21DF3FE900C262B2 /* YoloNet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = YoloNet.swift; sourceTree = "<group>"; };
FC2BFD2E21DF3FEA00C262B2 /* MobileNetCombined.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MobileNetCombined.swift; sourceTree = "<group>"; };
FC2BFD2F21DF3FEA00C262B2 /* MobileNet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MobileNet.swift; sourceTree = "<group>"; };
FC2BFD3621DF46DE00C262B2 /* OCDemoViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = OCDemoViewController.h; sourceTree = "<group>"; };
FC2BFD3721DF46DE00C262B2 /* OCDemoViewController.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = OCDemoViewController.m; sourceTree = "<group>"; };
FC2BFD3A21DF480300C262B2 /* CPUCompute.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CPUCompute.h; sourceTree = "<group>"; };
FC2BFD3B21DF480400C262B2 /* CPUCompute.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = CPUCompute.mm; sourceTree = "<group>"; };
FC2BFD3D21DF5CE800C262B2 /* PreProcessKernel.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = PreProcessKernel.metal; sourceTree = "<group>"; };
FC2BFD4021DF5E1E00C262B2 /* PaddleMobileGPU.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = PaddleMobileGPU.m; sourceTree = "<group>"; };
FC2BFD4121DF5E1E00C262B2 /* PaddleMobileGPU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PaddleMobileGPU.h; sourceTree = "<group>"; };
FC2BFD4221DF5E1E00C262B2 /* SuperResolutionNet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SuperResolutionNet.swift; sourceTree = "<group>"; };
FC4FD97B2140EE250073E130 /* libc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libc++.tbd"; path = "usr/lib/libc++.tbd"; sourceTree = SDKROOT; }; FC4FD97B2140EE250073E130 /* libc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libc++.tbd"; path = "usr/lib/libc++.tbd"; sourceTree = SDKROOT; };
FC5E03B121DCE8D90016C137 /* mingren_input_data */ = {isa = PBXFileReference; lastKnownFileType = file; path = mingren_input_data; sourceTree = "<group>"; };
FC704C1721D2375300F98BAB /* super_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = super_params; sourceTree = "<group>"; };
FC704C1821D2375300F98BAB /* super_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = super_model; sourceTree = "<group>"; };
FC704C1D21D237FC00F98BAB /* combined_mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_params; sourceTree = "<group>"; };
FC704C1E21D237FC00F98BAB /* combined_mobilenet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = combined_mobilenet_model; sourceTree = "<group>"; };
FC704C2021D237FC00F98BAB /* yolo_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_params; sourceTree = "<group>"; };
FC704C2121D237FC00F98BAB /* yolo_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = yolo_model; sourceTree = "<group>"; };
FC803BCB214D27920094B8E5 /* FPSCounter.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FPSCounter.swift; sourceTree = "<group>"; }; FC803BCB214D27920094B8E5 /* FPSCounter.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = FPSCounter.swift; sourceTree = "<group>"; };
FC803BCC214D27920094B8E5 /* VideoCapture.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = "<group>"; }; FC803BCC214D27920094B8E5 /* VideoCapture.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VideoCapture.swift; sourceTree = "<group>"; };
FC9797BD21D6045B00F2FD90 /* banana.jpeg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = banana.jpeg; sourceTree = "<group>"; };
FC9797C021D608DF00F2FD90 /* mobilenet_model */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_model; sourceTree = "<group>"; };
FC9797C121D608DF00F2FD90 /* mobilenet_params */ = {isa = PBXFileReference; lastKnownFileType = file; path = mobilenet_params; sourceTree = "<group>"; };
FC9797C621D609FB00F2FD90 /* synset.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = synset.txt; sourceTree = "<group>"; };
FC9797CE21D6506F00F2FD90 /* mingren.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = mingren.jpg; sourceTree = "<group>"; };
FCBCCC542122EF5400D94F7E /* MetalHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MetalHelper.swift; sourceTree = "<group>"; }; FCBCCC542122EF5400D94F7E /* MetalHelper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MetalHelper.swift; sourceTree = "<group>"; };
FCCED60421D7646E00BE8D5F /* test_image_super */ = {isa = PBXFileReference; lastKnownFileType = file; path = test_image_super; sourceTree = "<group>"; };
FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; }; FCEBEC2B20E1391F00C0B14D /* paddle_mobile.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; path = paddle_mobile.framework; sourceTree = BUILT_PRODUCTS_DIR; };
FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MultiPredictViewController.swift; sourceTree = "<group>"; }; FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MultiPredictViewController.swift; sourceTree = "<group>"; };
/* End PBXFileReference section */ /* End PBXFileReference section */
...@@ -125,8 +172,11 @@ ...@@ -125,8 +172,11 @@
FC039B8020E11C550081E9F8 /* paddle-mobile-demo */ = { FC039B8020E11C550081E9F8 /* paddle-mobile-demo */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FC203FA821CBFDBA00B37166 /* images */, FC2BFD4F21DF892500C262B2 /* Resources */,
FC203FAA21CBFDBA00B37166 /* models */, FCBCCC542122EF5400D94F7E /* MetalHelper.swift */,
FC2BFD3F21DF5DDF00C262B2 /* OCInterface */,
FC27991121343A39000B6BAD /* paddle-mobile-demo-Bridging-Header.h */,
FC2BFD3921DF46F000C262B2 /* OCDemo */,
FC803BCA214D27920094B8E5 /* VideoCapture */, FC803BCA214D27920094B8E5 /* VideoCapture */,
FC8CFED2213519540094D569 /* Net */, FC8CFED2213519540094D569 /* Net */,
FC039B8120E11C550081E9F8 /* AppDelegate.swift */, FC039B8120E11C550081E9F8 /* AppDelegate.swift */,
...@@ -135,10 +185,7 @@ ...@@ -135,10 +185,7 @@
FC039B8820E11C560081E9F8 /* Assets.xcassets */, FC039B8820E11C560081E9F8 /* Assets.xcassets */,
FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */, FC039B8A20E11C560081E9F8 /* LaunchScreen.storyboard */,
FC039B8D20E11C560081E9F8 /* Info.plist */, FC039B8D20E11C560081E9F8 /* Info.plist */,
FC27991121343A39000B6BAD /* paddle-mobile-demo-Bridging-Header.h */,
FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */, FCF437E7214B6DDB00943429 /* MultiPredictViewController.swift */,
C2E67E5C21524E460013F575 /* LoadPointerViewController.h */,
C2E67E5D21524E460013F575 /* LoadPointerViewController.m */,
); );
path = "paddle-mobile-demo"; path = "paddle-mobile-demo";
sourceTree = "<group>"; sourceTree = "<group>";
...@@ -146,44 +193,90 @@ ...@@ -146,44 +193,90 @@
FC203FA821CBFDBA00B37166 /* images */ = { FC203FA821CBFDBA00B37166 /* images */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FC2BFCBF21DF279900C262B2 /* classify-img-output.png */,
FC2BFCBD21DF15D900C262B2 /* 123.jpg */,
FC2BFCBB21DF0A8600C262B2 /* 00001.jpg */,
FC5E03B121DCE8D90016C137 /* mingren_input_data */,
FCCED60421D7646E00BE8D5F /* test_image_super */,
FC9797CE21D6506F00F2FD90 /* mingren.jpg */,
FC9797BD21D6045B00F2FD90 /* banana.jpeg */,
FC203FA921CBFDBA00B37166 /* test.jpg */, FC203FA921CBFDBA00B37166 /* test.jpg */,
); );
name = images; path = images;
path = ../../images;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
FC203FAA21CBFDBA00B37166 /* models */ = { FC203FAA21CBFDBA00B37166 /* models */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FC203FAB21CBFDBA00B37166 /* vision_model */, FC9797BF21D608DF00F2FD90 /* mobilenet */,
FC704C1B21D237FC00F98BAB /* vision_model */,
FC704C1621D2375300F98BAB /* superresoltion */,
);
path = models;
sourceTree = "<group>";
};
FC2BFD3921DF46F000C262B2 /* OCDemo */ = {
isa = PBXGroup;
children = (
C2E67E5C21524E460013F575 /* LoadPointerViewController.h */,
C2E67E5D21524E460013F575 /* LoadPointerViewController.m */,
FC2BFD3621DF46DE00C262B2 /* OCDemoViewController.h */,
FC2BFD3721DF46DE00C262B2 /* OCDemoViewController.m */,
);
path = OCDemo;
sourceTree = "<group>";
};
FC2BFD3F21DF5DDF00C262B2 /* OCInterface */ = {
isa = PBXGroup;
children = (
FC2BFD4121DF5E1E00C262B2 /* PaddleMobileGPU.h */,
FC2BFD4021DF5E1E00C262B2 /* PaddleMobileGPU.m */,
FC2BFD4221DF5E1E00C262B2 /* SuperResolutionNet.swift */,
); );
name = models; path = OCInterface;
path = ../../models;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
FC203FAB21CBFDBA00B37166 /* vision_model */ = { FC2BFD4F21DF892500C262B2 /* Resources */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FC203FAC21CBFDBA00B37166 /* mobilenet */, FC203FA821CBFDBA00B37166 /* images */,
FC203FAF21CBFDBA00B37166 /* yolo */, FC203FAA21CBFDBA00B37166 /* models */,
);
path = Resources;
sourceTree = "<group>";
};
FC704C1621D2375300F98BAB /* superresoltion */ = {
isa = PBXGroup;
children = (
FC704C1721D2375300F98BAB /* super_params */,
FC704C1821D2375300F98BAB /* super_model */,
);
path = superresoltion;
sourceTree = "<group>";
};
FC704C1B21D237FC00F98BAB /* vision_model */ = {
isa = PBXGroup;
children = (
FC704C1C21D237FC00F98BAB /* mobilenet */,
FC704C1F21D237FC00F98BAB /* yolo */,
); );
path = vision_model; path = vision_model;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
FC203FAC21CBFDBA00B37166 /* mobilenet */ = { FC704C1C21D237FC00F98BAB /* mobilenet */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FC203FAD21CBFDBA00B37166 /* combined_mobilenet_params */, FC704C1D21D237FC00F98BAB /* combined_mobilenet_params */,
FC203FAE21CBFDBA00B37166 /* combined_mobilenet_model */, FC704C1E21D237FC00F98BAB /* combined_mobilenet_model */,
); );
path = mobilenet; path = mobilenet;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
FC203FAF21CBFDBA00B37166 /* yolo */ = { FC704C1F21D237FC00F98BAB /* yolo */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FC203FB021CBFDBA00B37166 /* yolo_params */, FC704C2021D237FC00F98BAB /* yolo_params */,
FC203FB121CBFDBA00B37166 /* yolo_model */, FC704C2121D237FC00F98BAB /* yolo_model */,
); );
path = yolo; path = yolo;
sourceTree = "<group>"; sourceTree = "<group>";
...@@ -200,12 +293,29 @@ ...@@ -200,12 +293,29 @@
FC8CFED2213519540094D569 /* Net */ = { FC8CFED2213519540094D569 /* Net */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FC013927210204A3008100E3 /* PreProcessKernel.metal */, FC2BFD3A21DF480300C262B2 /* CPUCompute.h */,
FCBCCC542122EF5400D94F7E /* MetalHelper.swift */, FC2BFD3B21DF480400C262B2 /* CPUCompute.mm */,
FC2BFD3D21DF5CE800C262B2 /* PreProcessKernel.metal */,
FC2BFD2B21DF3FE900C262B2 /* Genet.swift */,
FC2BFD2F21DF3FEA00C262B2 /* MobileNet.swift */,
FC2BFD2E21DF3FEA00C262B2 /* MobileNetCombined.swift */,
FC2BFD2A21DF3FE900C262B2 /* MobilenetSSD_AR.swift */,
FC2BFD2C21DF3FE900C262B2 /* MobileNetSSD.swift */,
FC2BFD2D21DF3FE900C262B2 /* YoloNet.swift */,
); );
path = Net; path = Net;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
FC9797BF21D608DF00F2FD90 /* mobilenet */ = {
isa = PBXGroup;
children = (
FC9797C621D609FB00F2FD90 /* synset.txt */,
FC9797C021D608DF00F2FD90 /* mobilenet_model */,
FC9797C121D608DF00F2FD90 /* mobilenet_params */,
);
path = mobilenet;
sourceTree = "<group>";
};
/* End PBXGroup section */ /* End PBXGroup section */
/* Begin PBXNativeTarget section */ /* Begin PBXNativeTarget section */
...@@ -268,14 +378,26 @@ ...@@ -268,14 +378,26 @@
isa = PBXResourcesBuildPhase; isa = PBXResourcesBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
FCCED60521D7646E00BE8D5F /* test_image_super in Resources */,
FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */, FC039B8C20E11C560081E9F8 /* LaunchScreen.storyboard in Resources */,
FC203FB421CBFDBA00B37166 /* combined_mobilenet_model in Resources */, FC9797CF21D6506F00F2FD90 /* mingren.jpg in Resources */,
FC203FB321CBFDBA00B37166 /* combined_mobilenet_params in Resources */, FC704C2221D237FC00F98BAB /* combined_mobilenet_params in Resources */,
FC704C1921D2375300F98BAB /* super_params in Resources */,
FC2BFCBE21DF15D900C262B2 /* 123.jpg in Resources */,
FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */, FC039B8920E11C560081E9F8 /* Assets.xcassets in Resources */,
FC203FB521CBFDBA00B37166 /* yolo_params in Resources */, FC9797C721D609FB00F2FD90 /* synset.txt in Resources */,
FC203FB621CBFDBA00B37166 /* yolo_model in Resources */, FC5E03B221DCE8D90016C137 /* mingren_input_data in Resources */,
FC704C1A21D2375300F98BAB /* super_model in Resources */,
FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */, FC039B8720E11C550081E9F8 /* Main.storyboard in Resources */,
FC9797C221D608E000F2FD90 /* mobilenet_model in Resources */,
FC2BFCC021DF279900C262B2 /* classify-img-output.png in Resources */,
FC203FB221CBFDBA00B37166 /* test.jpg in Resources */, FC203FB221CBFDBA00B37166 /* test.jpg in Resources */,
FC704C2321D237FC00F98BAB /* combined_mobilenet_model in Resources */,
FC9797C321D608E000F2FD90 /* mobilenet_params in Resources */,
FC704C2421D237FC00F98BAB /* yolo_params in Resources */,
FC2BFCBC21DF0A8600C262B2 /* 00001.jpg in Resources */,
FC9797BE21D6045B00F2FD90 /* banana.jpeg in Resources */,
FC704C2521D237FC00F98BAB /* yolo_model in Resources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
...@@ -325,14 +447,24 @@ ...@@ -325,14 +447,24 @@
isa = PBXSourcesBuildPhase; isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
FC2BFD3221DF3FEA00C262B2 /* MobileNetSSD.swift in Sources */,
FC2BFD3C21DF480400C262B2 /* CPUCompute.mm in Sources */,
FC2BFD4321DF5E1E00C262B2 /* PaddleMobileGPU.m in Sources */,
FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */, FC039B8420E11C550081E9F8 /* ViewController.swift in Sources */,
FC803BCE214D27930094B8E5 /* VideoCapture.swift in Sources */, FC803BCE214D27930094B8E5 /* VideoCapture.swift in Sources */,
FC013928210204A3008100E3 /* PreProcessKernel.metal in Sources */,
FCF437E8214B6DDB00943429 /* MultiPredictViewController.swift in Sources */, FCF437E8214B6DDB00943429 /* MultiPredictViewController.swift in Sources */,
FC2BFD3021DF3FEA00C262B2 /* MobilenetSSD_AR.swift in Sources */,
FC2BFD3321DF3FEA00C262B2 /* YoloNet.swift in Sources */,
FC2BFD3421DF3FEA00C262B2 /* MobileNetCombined.swift in Sources */,
FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */, FCBCCC552122EF5500D94F7E /* MetalHelper.swift in Sources */,
FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */, FC803BCD214D27930094B8E5 /* FPSCounter.swift in Sources */,
FC2BFD3521DF3FEA00C262B2 /* MobileNet.swift in Sources */,
C2E67E5E21524E460013F575 /* LoadPointerViewController.m in Sources */, C2E67E5E21524E460013F575 /* LoadPointerViewController.m in Sources */,
FC2BFD3121DF3FEA00C262B2 /* Genet.swift in Sources */,
FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */, FC039B8220E11C550081E9F8 /* AppDelegate.swift in Sources */,
FC2BFD4421DF5E1E00C262B2 /* SuperResolutionNet.swift in Sources */,
FC2BFD3E21DF5CE800C262B2 /* PreProcessKernel.metal in Sources */,
FC2BFD3821DF46DE00C262B2 /* OCDemoViewController.m in Sources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
......
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1010"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
BuildableName = "paddle-mobile-demo.app"
BlueprintName = "paddle-mobile-demo"
ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
<MacroExpansion>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
BuildableName = "paddle-mobile-demo.app"
BlueprintName = "paddle-mobile-demo"
ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
</BuildableReference>
</MacroExpansion>
<AdditionalOptions>
</AdditionalOptions>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
BuildableName = "paddle-mobile-demo.app"
BlueprintName = "paddle-mobile-demo"
ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
<AdditionalOptions>
</AdditionalOptions>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<BuildableProductRunnable
runnableDebuggingMode = "0">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B7D20E11C550081E9F8"
BuildableName = "paddle-mobile-demo.app"
BlueprintName = "paddle-mobile-demo"
ReferencedContainer = "container:paddle-mobile-demo.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14113" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r"> <document type="com.apple.InterfaceBuilder3.CocoaTouch.Storyboard.XIB" version="3.0" toolsVersion="14460.31" targetRuntime="iOS.CocoaTouch" propertyAccessControl="none" useAutolayout="YES" useTraitCollections="YES" useSafeAreas="YES" colorMatched="YES" initialViewController="BYZ-38-t0r">
<device id="retina4_7" orientation="portrait"> <device id="retina4_7" orientation="portrait">
<adaptation id="fullscreen"/> <adaptation id="fullscreen"/>
</device> </device>
<dependencies> <dependencies>
<deployment identifier="iOS"/> <deployment identifier="iOS"/>
<plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="14088"/> <plugIn identifier="com.apple.InterfaceBuilder.IBCocoaTouchPlugin" version="14460.20"/>
<capability name="Aspect ratio constraints" minToolsVersion="5.1"/>
<capability name="Safe area layout guides" minToolsVersion="9.0"/> <capability name="Safe area layout guides" minToolsVersion="9.0"/>
<capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/> <capability name="documents saved in the Xcode 8 format" minToolsVersion="8.0"/>
</dependencies> </dependencies>
...@@ -20,7 +19,7 @@ ...@@ -20,7 +19,7 @@
<autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/> <autoresizingMask key="autoresizingMask" widthSizable="YES" heightSizable="YES"/>
<subviews> <subviews>
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="TQt-X9-PdF"> <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="TQt-X9-PdF">
<rect key="frame" x="164" y="318" width="46" height="30"/> <rect key="frame" x="164.5" y="318.5" width="46" height="30"/>
<state key="normal" title="Button"/> <state key="normal" title="Button"/>
<connections> <connections>
<action selector="predictAct:" destination="Vwd-lt-764" eventType="touchUpInside" id="d4z-Cv-6jY"/> <action selector="predictAct:" destination="Vwd-lt-764" eventType="touchUpInside" id="d4z-Cv-6jY"/>
...@@ -60,7 +59,7 @@ ...@@ -60,7 +59,7 @@
<nil key="highlightedColor"/> <nil key="highlightedColor"/>
</label> </label>
<pickerView contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="DlO-dk-RMr"> <pickerView contentMode="scaleToFill" translatesAutoresizingMaskIntoConstraints="NO" id="DlO-dk-RMr">
<rect key="frame" x="55" y="510.5" width="320" height="80"/> <rect key="frame" x="55" y="510" width="320" height="80"/>
<constraints> <constraints>
<constraint firstAttribute="height" constant="80" id="Sbi-05-Mwd"/> <constraint firstAttribute="height" constant="80" id="Sbi-05-Mwd"/>
</constraints> </constraints>
...@@ -83,6 +82,9 @@ ...@@ -83,6 +82,9 @@
<button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="wUL-9N-u1V"> <button opaque="NO" contentMode="scaleToFill" contentHorizontalAlignment="center" contentVerticalAlignment="center" buttonType="roundedRect" showsTouchWhenHighlighted="YES" lineBreakMode="middleTruncation" translatesAutoresizingMaskIntoConstraints="NO" id="wUL-9N-u1V">
<rect key="frame" x="16" y="597" width="63.5" height="30"/> <rect key="frame" x="16" y="597" width="63.5" height="30"/>
<color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/> <color key="backgroundColor" white="0.0" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
<constraints>
<constraint firstAttribute="width" secondItem="wUL-9N-u1V" secondAttribute="height" multiplier="21:10" id="cp7-bd-CvU"/>
</constraints>
<state key="normal" title="Image"> <state key="normal" title="Image">
<color key="titleColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/> <color key="titleColor" white="1" alpha="1" colorSpace="custom" customColorSpace="genericGamma22GrayColorSpace"/>
</state> </state>
......
//
// LoadPointerViewController.h
// paddle-mobile-demo
//
// Created by Xiao,Haichun on 2018/9/19.
// Copyright © 2018年 orange. All rights reserved.
//
#import <UIKit/UIKit.h>
@interface LoadPointerViewController : UIViewController
@end
...@@ -27,7 +27,4 @@ public class MetalHelper { ...@@ -27,7 +27,4 @@ public class MetalHelper {
queue = device.makeCommandQueue()! queue = device.makeCommandQueue()!
textureLoader = MTKTextureLoader.init(device: device) textureLoader = MTKTextureLoader.init(device: device)
} }
} }
...@@ -22,10 +22,10 @@ class MultiPredictViewController: UIViewController { ...@@ -22,10 +22,10 @@ class MultiPredictViewController: UIViewController {
super.viewDidLoad() super.viewDidLoad()
let mobileNet = MobileNet_ssd_hand.init(device: MetalHelper.shared.device) let mobileNet = MobileNet_ssd_hand.init(device: MetalHelper.shared.device)
let genet = Genet.init(device: MetalHelper.shared.device) let genet = Genet.init(device: MetalHelper.shared.device)
runner1 = Runner.init(inNet: mobileNet, commandQueue: MetalHelper.shared.queue, inPlatform: .GPU) runner1 = Runner.init(inNet: mobileNet, commandQueue: MetalHelper.shared.queue)
let queue2 = MetalHelper.shared.device.makeCommandQueue() let queue2 = MetalHelper.shared.device.makeCommandQueue()
runner2 = Runner.init(inNet: genet, commandQueue: MetalHelper.shared.queue, inPlatform: .GPU) runner2 = Runner.init(inNet: genet, commandQueue: MetalHelper.shared.queue)
} }
@IBAction func predictAct(_ sender: Any) { @IBAction func predictAct(_ sender: Any) {
......
//
// RGBToYCrCb_Y.metal
// paddle-mobile-demo
//
// Created by liuRuiLong on 2018/12/28.
// Copyright © 2018 orange. All rights reserved.
//
#include <metal_stdlib>
using namespace metal;
kernel void buffer_to_texture_kernel( const device float *input [[buffer(0)]],
texture2d<float, access::write> outTexture [[texture(0)]],
uint2 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
float y = input[outTexture.get_width() * gid.y + gid.x];
outTexture.write(float4(y, 0.0f, 0.0f, 0.0f), gid);
}
kernel void buffer_to_texture_kernel_half( const device float *input [[buffer(0)]],
texture2d<half, access::write> outTexture [[texture(0)]],
uint2 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) {
return;
}
float y = input[outTexture.get_width() * gid.y + gid.x];
outTexture.write(half4(y, 0.0f, 0.0f, 0.0f), gid);
}
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
#import <Foundation/Foundation.h> #import <Foundation/Foundation.h>
@interface CPUResult: NSObject @interface CPUResult: NSObject
@property (assign, nonatomic) float *output; @property (assign, nonatomic) float *output;
@property (assign, nonatomic) int outputSize; @property (assign, nonatomic) int outputSize;
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#import "CPUCompute.h" #import "CPUCompute.h"
#import <map> #import <map>
...@@ -20,9 +19,6 @@ ...@@ -20,9 +19,6 @@
#import <utility> #import <utility>
#import <algorithm> #import <algorithm>
struct NMSParam { struct NMSParam {
float *score_data; float *score_data;
......
...@@ -13,42 +13,36 @@ ...@@ -13,42 +13,36 @@
limitations under the License. */ limitations under the License. */
import Foundation import Foundation
import paddle_mobile
public class Genet: Net { public class Genet: Net {
@objc public override init(device: MTLDevice) { @objc public override init(device: MTLDevice) {
super.init(device: device) super.init(device: device)
means = [128.0, 128.0, 128.0]
scale = 0.017
except = 0
modelPath = Bundle.main.path(forResource: "genet_model", ofType: nil) ?! "model null" modelPath = Bundle.main.path(forResource: "genet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "genet_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "genet_params", ofType: nil) ?! "para null"
modelDir = ""
preprocessKernel = GenetPreProccess.init(device: device) preprocessKernel = GenetPreProccess.init(device: device)
dim = (n: 1, h: 128, w: 128, c: 3) inputDim = Dim.init(inDim: [1, 128, 128, 3])
} }
@objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) { @objc override public init(device: MTLDevice, paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize) super.init(device: device,
means = [128.0, 128.0, 128.0] paramPointer: paramPointer,
scale = 0.017 paramSize: paramSize,
except = 0 modePointer: modePointer,
modelPath = "" modelSize: modelSize)
paramPath = ""
modelDir = ""
preprocessKernel = GenetPreProccess.init(device: device) preprocessKernel = GenetPreProccess.init(device: device)
dim = (n: 1, h: 128, w: 128, c: 3) inputDim = Dim.init(inDim: [1, 128, 128, 3])
} }
class GenetPreProccess: CusomKernel { class GenetPreProccess: CusomKernel {
init(device: MTLDevice) { init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 128, inHeight: 128, inChannel: 3) let s = Shape.init(inWidth: 128, inHeight: 128, inChannel: 3)
super.init(device: device, inFunctionName: "genet_preprocess", outputDim: s, usePaddleMobileLib: false) super.init(device: device, inFunctionName: "genet_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
} }
} }
override public func resultStr(res: ResultHolder) -> String { override public func resultStr(res: ResultHolder) -> String {
// fatalError() return " \(res.result[0]) ... "
return " \(res.result![0]) ... "
} }
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
import paddle_mobile
public class MobileNet: Net{
class MobilenetPreProccess: CusomKernel {
init(device: MTLDevice) {
let s = Shape.init(inWidth: 224, inHeight: 224, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
}
}
class PreWords {
var contents: [String] = []
init(fileName: String, type: String = "txt", inBundle: Bundle = Bundle.main) {
if let filePath = inBundle.path(forResource: fileName, ofType: type) {
let string = try! String.init(contentsOfFile: filePath)
contents = string.components(separatedBy: CharacterSet.newlines).filter{$0.count > 10}.map{
String($0[$0.index($0.startIndex, offsetBy: 10)...])
}
}else{
fatalError("no file call \(fileName)")
}
}
subscript(index: Int) -> String {
return contents[index]
}
}
let labels = PreWords.init(fileName: "synset")
override public func resultStr(res: ResultHolder) -> String {
let resPointer = res.result
var s: [String] = []
(0..<res.capacity).map { resPointer[$0] }.top(r: 5).enumerated().forEach{
s.append(String(format: "%d: %@ (%3.2f%%)", $0 + 1, labels[$1.0], $1.1 * 100))
}
return s.joined(separator: "\n")
}
override public init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "mobilenet_params", ofType: nil) ?! "para null"
// metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil "
preprocessKernel = MobilenetPreProccess.init(device: device)
inputDim = Dim.init(inDim: [1, 224, 224, 3])
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
import paddle_mobile
public class MobileNetCombined: Net {
@objc public override init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "combined_mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "combined_mobilenet_params", ofType: nil) ?! "para null"
inputDim = Dim.init(inDim: [1, 224, 224, 3])
// metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil "
}
override public func resultStr(res: ResultHolder) -> String {
return " \(res.result[0]) ... "
}
}
...@@ -13,36 +13,35 @@ ...@@ -13,36 +13,35 @@
limitations under the License. */ limitations under the License. */
import Foundation import Foundation
import paddle_mobile
public class MobileNet_ssd_hand: Net{ public class MobileNet_ssd_hand: Net {
@objc public override init(device: MTLDevice) { @objc public override init(device: MTLDevice) {
super.init(device: device) super.init(device: device)
means = [123.68, 116.78, 103.94]
scale = 0.017
except = 2 except = 2
modelPath = Bundle.main.path(forResource: "ssd_hand_model", ofType: nil) ?! "model null" modelPath = Bundle.main.path(forResource: "ssd_hand_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "ssd_hand_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "ssd_hand_params", ofType: nil) ?! "para null"
modelDir = "" // metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil "
preprocessKernel = MobilenetssdPreProccess.init(device: device) preprocessKernel = MobilenetssdPreProccess.init(device: device)
dim = (n: 1, h: 300, w: 300, c: 3) inputDim = Dim.init(inDim: [1, 300, 300, 3])
} }
@objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) { @objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize) super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize)
means = [123.68, 116.78, 103.94]
scale = 0.017
except = 2 except = 2
modelPath = "" modelPath = ""
paramPath = "" paramPath = ""
modelDir = "" // metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil "
preprocessKernel = MobilenetssdPreProccess.init(device: device) preprocessKernel = MobilenetssdPreProccess.init(device: device)
dim = (n: 1, h: 300, w: 300, c: 3) inputDim = Dim.init(inDim: [1, 300, 300, 3])
} }
class MobilenetssdPreProccess: CusomKernel { class MobilenetssdPreProccess: CusomKernel {
init(device: MTLDevice) { init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 300, inHeight: 300, inChannel: 3) let s = Shape.init(inWidth: 300, inHeight: 300, inChannel: 3)
super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess", outputDim: s, usePaddleMobileLib: false) super.init(device: device, inFunctionName: "mobilenet_ssd_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
} }
} }
...@@ -50,7 +49,7 @@ public class MobileNet_ssd_hand: Net{ ...@@ -50,7 +49,7 @@ public class MobileNet_ssd_hand: Net{
return " \(res)" return " \(res)"
} }
override func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder { override public func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder {
// guard let interRes = paddleMobileRes.intermediateResults else { // guard let interRes = paddleMobileRes.intermediateResults else {
// fatalError(" need have inter result ") // fatalError(" need have inter result ")
......
...@@ -13,55 +13,49 @@ ...@@ -13,55 +13,49 @@
limitations under the License. */ limitations under the License. */
import Foundation import Foundation
import paddle_mobile
public class MobileNet_ssd_AR: Net{ public class MobileNet_ssd_AR: Net {
@objc public override init(device: MTLDevice) { @objc public override init(device: MTLDevice) {
super.init(device: device) super.init(device: device)
means = [103.94, 116.78, 123.68]
scale = 1
except = 2 except = 2
modelPath = Bundle.main.path(forResource: "ar_model", ofType: nil) ?! "model null" modelPath = Bundle.main.path(forResource: "ar_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "ar_params", ofType: nil) ?! "para null" paramPath = Bundle.main.path(forResource: "ar_params", ofType: nil) ?! "para null"
modelDir = ""
preprocessKernel = MobilenetssdPreProccess.init(device: device) preprocessKernel = MobilenetssdPreProccess.init(device: device)
dim = (n: 1, h: 160, w: 160, c: 3) inputDim = Dim.init(inDim: [1, 160, 160, 3])
} }
@objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) { @objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize) super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize)
means = [103.94, 116.78, 123.68]
scale = 1
except = 2 except = 2
modelPath = ""
paramPath = ""
modelDir = ""
preprocessKernel = MobilenetssdPreProccess.init(device: device) preprocessKernel = MobilenetssdPreProccess.init(device: device)
dim = (n: 1, h: 160, w: 160, c: 3) inputDim = Dim.init(inDim: [1, 160, 160, 3])
} }
class MobilenetssdPreProccess: CusomKernel { class MobilenetssdPreProccess: CusomKernel {
init(device: MTLDevice) { init(device: MTLDevice) {
let s = CusomKernel.Shape.init(inWidth: 160, inHeight: 160, inChannel: 3) let s = Shape.init(inWidth: 160, inHeight: 160, inChannel: 3)
super.init(device: device, inFunctionName: "mobilent_ar_preprocess", outputDim: s, usePaddleMobileLib: false) super.init(device: device, inFunctionName: "mobilent_ar_preprocess", outputDim: s, metalLoadModel: .LoadMetalInDefaultLib, metalLibPath: nil)
} }
} }
override public func resultStr(res: ResultHolder) -> String { override public func resultStr(res: ResultHolder) -> String {
return " \(res.result![0])" return " \(res.result[0])"
} }
override func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder { override public func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder {
guard let interRes = paddleMobileRes.intermediateResults else { fatalError()
fatalError(" need have inter result ") // guard let interRes = paddleMobileRes.intermediateResults else {
} // fatalError(" need have inter result ")
// }
guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? FetchHolder else { //
fatalError(" need score ") // guard let scores = interRes["Scores"], scores.count > 0, let score = scores[0] as? FetchHolder else {
} // fatalError(" need score ")
// }
guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? FetchHolder else { //
fatalError() // guard let bboxs = interRes["BBoxes"], bboxs.count > 0, let bbox = bboxs[0] as? FetchHolder else {
} // fatalError()
// }
// let startDate = Date.init() // let startDate = Date.init()
...@@ -72,19 +66,19 @@ public class MobileNet_ssd_AR: Net{ ...@@ -72,19 +66,19 @@ public class MobileNet_ssd_AR: Net{
// //
// print((0..<bbox.capacity).map{ bbox.result[$0] }.strideArray()) // print((0..<bbox.capacity).map{ bbox.result[$0] }.strideArray())
let nmsCompute = NMSCompute.init() // let nmsCompute = NMSCompute.init()
nmsCompute.scoreThredshold = 0.25 // nmsCompute.scoreThredshold = 0.25
nmsCompute.nmsTopK = 100 // nmsCompute.nmsTopK = 100
nmsCompute.keepTopK = 100 // nmsCompute.keepTopK = 100
nmsCompute.nmsEta = 1.0 // nmsCompute.nmsEta = 1.0
nmsCompute.nmsThreshold = 0.449999988 // nmsCompute.nmsThreshold = 0.449999988
nmsCompute.background_label = 0; // nmsCompute.background_label = 0;
nmsCompute.scoreDim = [NSNumber.init(value: score.dim[0]), NSNumber.init(value: score.dim[1]), NSNumber.init(value: score.dim[2])] // nmsCompute.scoreDim = [NSNumber.init(value: score.dim[0]), NSNumber.init(value: score.dim[1]), NSNumber.init(value: score.dim[2])]
nmsCompute.bboxDim = [NSNumber.init(value: bbox.dim[0]), NSNumber.init(value: bbox.dim[1]), NSNumber.init(value: bbox.dim[2])] // nmsCompute.bboxDim = [NSNumber.init(value: bbox.dim[0]), NSNumber.init(value: bbox.dim[1]), NSNumber.init(value: bbox.dim[2])]
guard let result = nmsCompute.compute(withScore: score.result, andBBoxs: bbox.result) else { // guard let result = nmsCompute.compute(withScore: score.result, andBBoxs: bbox.result) else {
fatalError( " result error " ) // fatalError( " result error " )
} // }
let resultHolder = ResultHolder.init(inResult: result.output, inCapacity: Int(result.outputSize)) // let resultHolder = ResultHolder.init(inResult: result.output, inCapacity: Int(result.outputSize))
// for i in 0..<Int(result.outputSize) { // for i in 0..<Int(result.outputSize) {
// //
// print("i \(i) : \(result.output[i])") // print("i \(i) : \(result.output[i])")
...@@ -92,62 +86,63 @@ public class MobileNet_ssd_AR: Net{ ...@@ -92,62 +86,63 @@ public class MobileNet_ssd_AR: Net{
// print(Date.init().timeIntervalSince(startDate)) // print(Date.init().timeIntervalSince(startDate))
// print(resultHolder.result![0]) // print(resultHolder.result![0])
return resultHolder // return resultHolder
} }
override func updateProgram(program: Program) { // override func updateProgram(program: Program) {
for i in [56, 66, 76, 86, 93, 99] {
let opDesc = program.programDesc.blocks[0].ops[i] // for i in [56, 66, 76, 86, 93, 99] {
let output = opDesc.outputs["Out"]!.first! // let opDesc = program.programDesc.blocks[0].ops[i]
let v = program.scope[output]! // let output = opDesc.outputs["Out"]!.first!
let originTexture = v as! Texture<Float32> // let v = program.scope[output]!
originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1] / 7, originTexture.tensorDim[0] * 7]) // let originTexture = v as! Texture
// originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1] / 7, originTexture.tensorDim[0] * 7])
originTexture.dim = Dim.init(inDim: [1, 1, originTexture.dim[3] / 7, originTexture.dim[2] * 7]) //
// originTexture.dim = Dim.init(inDim: [1, 1, originTexture.dim[3] / 7, originTexture.dim[2] * 7])
originTexture.padToFourDim = Dim.init(inDim: [1, 1, originTexture.padToFourDim[3] / 7, originTexture.padToFourDim[2] * 7]) //
// originTexture.padToFourDim = Dim.init(inDim: [1, 1, originTexture.padToFourDim[3] / 7, originTexture.padToFourDim[2] * 7])
program.scope[output] = originTexture //
// program.scope[output] = originTexture
if i == 99 { //
opDesc.attrs["axis"] = 0 // if i == 99 {
} else { // opDesc.attrs["axis"] = 0
opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) } // } else {
} // opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
} // }
// }
for i in [58, 59, 88, 89, 95, 96, 68, 69, 78, 79] { //
let opDesc = program.programDesc.blocks[0].ops[i] // for i in [58, 59, 88, 89, 95, 96, 68, 69, 78, 79] {
let output = opDesc.outputs["Out"]!.first! // let opDesc = program.programDesc.blocks[0].ops[i]
let v = program.scope[output]! // let output = opDesc.outputs["Out"]!.first!
// let v = program.scope[output]!
//
//
let originTexture = v as! Texture<Float32> //
originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]]) // let originTexture = v as! Texture
opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) } // originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
} // opDesc.attrs["shape"] = originTexture.tensorDim.dims.map { Int32($0) }
// }
for i in [60, 101, 90, 97, 70, 80] { //
let opDesc = program.programDesc.blocks[0].ops[i] // for i in [60, 101, 90, 97, 70, 80] {
let output = opDesc.outputs["Out"]!.first! // let opDesc = program.programDesc.blocks[0].ops[i]
let v = program.scope[output]! // let output = opDesc.outputs["Out"]!.first!
let originTexture = v as! Texture<Float32> // let v = program.scope[output]!
originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]]) // let originTexture = v as! Texture
opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1 // originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
} // opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
// }
for i in [102] { //
let opDesc = program.programDesc.blocks[0].ops[i] // for i in [102] {
for output in opDesc.outputs["Out"]! { // let opDesc = program.programDesc.blocks[0].ops[i]
let v = program.scope[output]! // for output in opDesc.outputs["Out"]! {
let originTexture = v as! Texture<Float32> // let v = program.scope[output]!
originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]]) // let originTexture = v as! Texture
} // originTexture.tensorDim = Dim.init(inDim: [originTexture.tensorDim[1], originTexture.tensorDim[2]])
opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1 // }
print(" split axis \(opDesc.attrs["axis"])") // opDesc.attrs["axis"] = (opDesc.attrs["axis"]! as! Int) - 1
} // print(" split axis \(opDesc.attrs["axis"])")
// }
// 99 // 99
} // }
} }
//
// PaddleMobile.swift
// paddle-mobile-demo
//
// Created by liuRuiLong on 2018/9/5.
// Copyright © 2018年 orange. All rights reserved.
//
import Foundation
...@@ -115,23 +115,3 @@ kernel void mobilent_ar_preprocess_half(texture2d<half, access::read> inTexture ...@@ -115,23 +115,3 @@ kernel void mobilent_ar_preprocess_half(texture2d<half, access::read> inTexture
const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017; const half4 inColor = (inTexture.read(gid) * 255.0 - means) * 0.017;
outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid); outTexture.write(half4(inColor.z, inColor.y, inColor.x, 0.0f), gid);
} }
kernel void scale(texture2d<float, access::sample> inTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) return;
float w_stride = inTexture.get_width() / outTexture.get_width();
float h_stride = inTexture.get_height() / outTexture.get_height();
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x * w_stride, gid.y * h_stride), 0);
outTexture.write(input, gid);
}
kernel void scale_half(texture2d<float, access::sample> inTexture [[texture(0)]], texture2d<half, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) return;
float w_stride = inTexture.get_width() / outTexture.get_width();
float h_stride = inTexture.get_height() / outTexture.get_height();
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x * w_stride, gid.y * h_stride), 0);
outTexture.write(half4(input), gid);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Metal
import Foundation
import paddle_mobile
public class YoloNet: Net {
@objc public override init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "yolo_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "yolo_params", ofType: nil) ?! "para null"
inputDim = Dim.init(inDim: [1, 416, 416, 3])
// metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil "
}
override public func resultStr(res: ResultHolder) -> String {
return " \(res.result[0]) ... "
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#import <UIKit/UIKit.h>
@interface LoadPointerViewController : UIViewController
@end
// /* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
// LoadPointerViewController.m
// paddle-mobile-demo Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// Created by Xiao,Haichun on 2018/9/19. You may obtain a copy of the License at
// Copyright © 2018年 orange. All rights reserved.
// http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#import "PaddleMobileGPU.h"
#import "LoadPointerViewController.h" #import "LoadPointerViewController.h"
#import <Metal/Metal.h>
#import "paddle-mobile-demo-Bridging-Header.h" #import "paddle-mobile-demo-Bridging-Header.h"
#import <Metal/Metal.h>
@interface LoadPointerViewController () @interface LoadPointerViewController ()
@property (strong, nonatomic) id<MTLDevice> device; @property (strong, nonatomic) id<MTLDevice> device;
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#import <Foundation/Foundation.h>
NS_ASSUME_NONNULL_BEGIN
@interface OCDemoViewController : NSObject
@end
NS_ASSUME_NONNULL_END
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#import "OCDemoViewController.h"
@implementation OCDemoViewController
@end
...@@ -16,9 +16,8 @@ ...@@ -16,9 +16,8 @@
#import <Foundation/Foundation.h> #import <Foundation/Foundation.h>
typedef enum : NSUInteger { typedef enum : NSUInteger {
MobileNetType, SuperResolutionNetType,
MobileNetSSDType, MobileNetSSDType
GenetType,
} NetType; } NetType;
@interface PaddleMobileGPUResult: NSObject @interface PaddleMobileGPUResult: NSObject
......
...@@ -12,10 +12,10 @@ ...@@ -12,10 +12,10 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#import <Foundation/Foundation.h>
#import "PaddleMobileGPU.h" #import "PaddleMobileGPU.h"
#import "paddle_mobile.h"
#import <paddle_mobile/paddle_mobile-Swift.h> #import <Foundation/Foundation.h>
#import <paddle_mobile_demo-Swift.h>
@implementation ModelConfig @implementation ModelConfig
@end @end
...@@ -52,14 +52,12 @@ ...@@ -52,14 +52,12 @@
self = [super init]; self = [super init];
if (self) { if (self) {
Net *net = nil; Net *net = nil;
if (netType == GenetType) { if (netType == SuperResolutionNetType) {
net = [[Genet alloc] initWithDevice:queue.device paramPointer:config.paramPointer paramSize:config.paramSize modePointer:config.modelPointer modelSize:config.modelSize]; net = [[SuperResolutionNet alloc] initWithDevice:queue.device];
} else if (netType == MobileNetSSDType) { } else if (netType == MobileNetSSDType) {
net = [[MobileNet_ssd_AR alloc] initWithDevice:queue.device paramPointer:config.paramPointer paramSize:config.paramSize modePointer:config.modelPointer modelSize:config.modelSize]; net = [[MobileNet_ssd_AR alloc] initWithDevice:queue.device paramPointer:config.paramPointer paramSize:config.paramSize modePointer:config.modelPointer modelSize:config.modelSize];
} else if (netType == MobileNetType) {
} }
runner = [[Runner alloc] initInNet:net commandQueue:queue inPlatform:PlatformGPU]; runner = [[Runner alloc] initInNet:net commandQueue:queue];
} }
return self; return self;
} }
...@@ -69,6 +67,7 @@ ...@@ -69,6 +67,7 @@
} }
-(void)predict:(id<MTLTexture>)texture withCompletion:(void (^)(BOOL, NSArray<NSNumber *> *))completion { -(void)predict:(id<MTLTexture>)texture withCompletion:(void (^)(BOOL, NSArray<NSNumber *> *))completion {
[runner predictWithTexture:texture completion:^(BOOL success, ResultHolder * _Nullable result) { [runner predictWithTexture:texture completion:^(BOOL success, ResultHolder * _Nullable result) {
NSMutableArray<NSNumber *> *resultArray = [NSMutableArray arrayWithCapacity:result.capacity]; NSMutableArray<NSNumber *> *resultArray = [NSMutableArray arrayWithCapacity:result.capacity];
for (int i = 0; i < result.capacity; ++i) { for (int i = 0; i < result.capacity; ++i) {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
import paddle_mobile
@objc public class SuperResolutionNet: Net{
override public func resultStr(res: ResultHolder) -> String {
return "未实现"
}
@objc override public init(device: MTLDevice) {
super.init(device: device)
except = 0
modelPath = Bundle.main.path(forResource: "super_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "super_params", ofType: nil) ?! "para null"
preprocessKernel = nil
inputDim = Dim.init(inDim: [1, 224, 224, 1])
// metalLoadMode = .LoadMetalInCustomMetalLib
// metalLibPath = Bundle.main.path(forResource: "PaddleMobileMetal", ofType: "metallib") ?! " can't be nil "
}
override public func updateProgram(program: Program) {
// n h w c
for block in program.programDesc.blocks {
for varDesc in block.vars {
if !varDesc.persistable {
if varDesc.type == .LodTensor {
let varEle = program.scope.vars[varDesc.name]
if let texture = varEle as? Texture {
let newDim = Dim.init(inDim: [texture.dim[0], inputDim[1], inputDim[2], texture.tensorDim[1]])
print(" var desc name " + varDesc.name + " new dim" + "\(newDim)")
texture.updateDims(inTensorDim: Dim.init(inDim: [texture.tensorDim[0], texture.tensorDim[1], inputDim[1], inputDim[2]]), inDim: newDim)
texture.initTexture(device: device, inTranspose: [0, 1, 2, 3], computePrecision: GlobalConfig.shared.computePrecision)
let output: FetchHolder = program.scope.output() as! FetchHolder
output.dim = newDim
output.capacity = newDim.numel()
output.paddedCapacity = newDim.numel() * 4
output.initBuffer(device: device)
}
}
}
}
}
}
}
...@@ -18,31 +18,56 @@ import CoreMedia ...@@ -18,31 +18,56 @@ import CoreMedia
import paddle_mobile import paddle_mobile
import MetalPerformanceShaders import MetalPerformanceShaders
var platform: Platform = .GPU class FileReader {
let threadSupport: [(Platform, String)] = [(.GPU, "GPU"), (.CPU, "CPU")] let file: UnsafeMutablePointer<FILE>
let fileSize: Int
init(paramPath: String) throws {
guard let tmpFile = fopen(paramPath, "rb") else {
throw PaddleMobileError.loaderError(message: "open param file error" + paramPath)
}
file = tmpFile
fseek(file, 0, SEEK_END)
fileSize = ftell(file)
guard fileSize > 0 else {
throw PaddleMobileError.loaderError(message: "param file size is too small")
}
rewind(file)
}
func read<T>() -> UnsafeMutablePointer<T> {
let ptr = UnsafeMutablePointer<T>.allocate(capacity: MemoryLayout<T>.size * fileSize)
fread(ptr, fileSize, 1, file)
return ptr
}
deinit {
fclose(file)
}
}
//.mobilenet_ssd : Runner.init(inNet: MobileNet_ssd_hand.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform), enum Platform {
let modelHelperMap: [SupportModel : Runner] = [ case GPU
.yolo : Runner.init(inNet: YoloNet.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform), }
.mobilenet_combined : Runner.init(inNet: MobileNetCombined.init(device: MetalHelper.shared.device), commandQueue: MetalHelper.shared.queue, inPlatform: platform)]
//, .genet : Genet.init()
//let modelHelperMap: [SupportModel : Net] = [.mobilenet : MobileNet.init(), .mobilenet_ssd : MobileNet_ssd_hand.init()]
let netSupport: [SupportModel : Net] = [.yolo : YoloNet.init(device: MetalHelper.shared.device), .mobilenet_combined : MobileNetCombined.init(device: MetalHelper.shared.device)] let platformSupport: [(Platform, String)] = [(.GPU, "GPU")]
enum SupportModel: String{ enum SupportModel: String{
// case mobilenet = "mobilenet" case yolo = "yolo"
// case mobilenet_ssd = "mobilenetssd"
case yolo = "yolo"
case mobilenet_combined = "mobilenet_combined" case mobilenet_combined = "mobilenet_combined"
case super_resolution = "superresoltion"
case mobilenet = "mobilenet"
static func supportedModels() -> [SupportModel] { static func supportedModels() -> [SupportModel] {
// .mobilenet, return [.super_resolution, .yolo, .mobilenet_combined, .mobilenet]
// .mobilenet_ssd,
return [.yolo, .mobilenet_combined]
} }
} }
let netSupport: [SupportModel : Net] = [
.super_resolution : SuperResolutionNet.init(device: MetalHelper.shared.device),
.yolo : YoloNet.init(device: MetalHelper.shared.device),
.mobilenet_combined : MobileNetCombined.init(device: MetalHelper.shared.device),
.mobilenet : MobileNet.init(device: MetalHelper.shared.device)]
class ViewController: UIViewController { class ViewController: UIViewController {
@IBOutlet weak var resultTextView: UITextView! @IBOutlet weak var resultTextView: UITextView!
@IBOutlet weak var selectImageView: UIImageView! @IBOutlet weak var selectImageView: UIImageView!
...@@ -50,28 +75,37 @@ class ViewController: UIViewController { ...@@ -50,28 +75,37 @@ class ViewController: UIViewController {
@IBOutlet weak var modelPickerView: UIPickerView! @IBOutlet weak var modelPickerView: UIPickerView!
@IBOutlet weak var threadPickerView: UIPickerView! @IBOutlet weak var threadPickerView: UIPickerView!
@IBOutlet weak var videoView: UIView! @IBOutlet weak var videoView: UIView!
// var videoCapture: VideoCapture! // var videoCapture: VideoCapture!
var selectImage: UIImage? var selectImage: UIImage?
var inputPointer: UnsafeMutablePointer<Float32>? var inputPointer: UnsafeMutablePointer<Float32>?
var modelType: SupportModel = SupportModel.supportedModels()[0] var modelType: SupportModel = SupportModel.supportedModels()[0]
var toPredictTexture: MTLTexture? var toPredictTexture: MTLTexture?
var runner: Runner! var runner: Runner!
var platform: Platform = .GPU
var threadNum = 1 var threadNum = 1
@IBAction func loadAct(_ sender: Any) { @IBAction func loadAct(_ sender: Any) {
runner = Runner.init(inNet: netSupport[modelType]!, commandQueue: MetalHelper.shared.queue, inPlatform: platform) runner = Runner.init(inNet: netSupport[modelType]!, commandQueue: MetalHelper.shared.queue)
if platform == .GPU {
if platform == .CPU { // let filePath = Bundle.main.path(forResource: "mingren_input_data", ofType: nil)
if inputPointer == nil { // let fileReader = try! FileReader.init(paramPath: filePath!)
inputPointer = runner.preproccess(image: selectImage!.cgImage!) // let pointer: UnsafeMutablePointer<Float32> = fileReader.read()
//
} //
} else if platform == .GPU { // let buffer = MetalHelper.shared.device.makeBuffer(length: fileReader.fileSize, options: .storageModeShared)
//
// buffer?.contents().copyMemory(from: pointer, byteCount: fileReader.fileSize)
if self.toPredictTexture == nil { if self.toPredictTexture == nil {
runner.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
// runner.getTexture(inBuffer: buffer!) { [weak self] (texture) in
// self?.toPredictTexture = texture
// }
runner.getTexture(image: selectImage!.cgImage!) { [weak self] (texture) in
self?.toPredictTexture = texture self?.toPredictTexture = texture
} }
} }
...@@ -106,27 +140,21 @@ class ViewController: UIViewController { ...@@ -106,27 +140,21 @@ class ViewController: UIViewController {
return return
} }
// for _ in 0..<1{
// runner.predict(texture: inTexture) { (success, resultHolder) in
// resultHolder?.releasePointer()
// }
// }
let startDate = Date.init() let startDate = Date.init()
for i in 0..<max { for i in 0..<max {
runner.predict(texture: inTexture) { [weak self] (success, resultHolder) in self.runner.predict(texture: inTexture) { [weak self] (success, resultHolder) in
guard let sSelf = self else { guard let sSelf = self else {
fatalError() fatalError()
} }
if success {
if success, let inResultHolder = resultHolder {
if i == max - 1 { if i == max - 1 {
let time = Date.init().timeIntervalSince(startDate) let time = Date.init().timeIntervalSince(startDate)
print(inResultHolder.result.floatArr(count: inResultHolder.capacity).strideArray())
DispatchQueue.main.async { DispatchQueue.main.async {
// print(resultHolder!.result![0])
sSelf.resultTextView.text = sSelf.runner.net.resultStr(res: resultHolder!) sSelf.resultTextView.text = sSelf.runner.net.resultStr(res: resultHolder!)
sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max) * 1000.0) ms" sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max) * 1000.0) ms"
} }
} }
} }
...@@ -134,39 +162,6 @@ class ViewController: UIViewController { ...@@ -134,39 +162,6 @@ class ViewController: UIViewController {
DispatchQueue.main.async { DispatchQueue.main.async {
resultHolder?.releasePointer() resultHolder?.releasePointer()
} }
// print("释放")
}
// print("sleep before ")
// usleep(33000)
// print("sleep after ")
}
case .CPU:
guard let inInputPointer = inputPointer else {
fatalError( " need input pointer " )
}
for _ in 0..<10 {
runner.predict(inputPointer: inInputPointer) { (success, res) in
res?.releaseOutput()
}
}
let startDate = Date.init()
for i in 0..<max {
runner.predict(inputPointer: inInputPointer) { [weak self](success, res) in
guard let sSelf = self else {
fatalError()
}
if success {
if i == max - 1 {
let time = Date.init().timeIntervalSince(startDate)
DispatchQueue.main.async {
// sSelf.resultTextView.text = sSelf.runner.net.resultStr(res: res)
sSelf.elapsedTimeLabel.text = "平均耗时: \(time/Double(max) * 1000.0) ms"
}
}
}
res?.releaseOutput()
} }
} }
} }
...@@ -179,37 +174,38 @@ class ViewController: UIViewController { ...@@ -179,37 +174,38 @@ class ViewController: UIViewController {
modelPickerView.dataSource = self modelPickerView.dataSource = self
threadPickerView.delegate = self threadPickerView.delegate = self
threadPickerView.dataSource = self threadPickerView.dataSource = self
if let image = UIImage.init(named: "test.jpg") { if let image = UIImage.init(named: "classify-img-output.png") {
selectImage = image selectImage = image
selectImageView.image = image selectImageView.image = image
} else { } else {
print("请添加测试图片") print("请添加测试图片")
} }
GlobalConfig.shared.computePrecision = .Float32
// if platform == .CPU { // if platform == .CPU {
// inputPointer = runner.preproccess(image: selectImage!.cgImage!) // inputPointer = runner.preproccess(image: selectImage!.cgImage!)
// } else if platform == .GPU { // } else if platform == .GPU {
// runner.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in // runner.getTexture(image: selectImage!.cgImage!) {[weak self] (texture) in
// self?.toPredictTexture = texture // self?.toPredictTexture = texture
// } // }
// } else { // } else {
// fatalError( " unsupport " ) // fatalError( " unsupport " )
// } // }
// videoCapture = VideoCapture.init(device: MetalHelper.shared.device, orientation: .portrait, position: .back)
// videoCapture.fps = 30
// videoCapture.delegate = self
// videoCapture.setUp { (success) in
// DispatchQueue.main.async {
// if let preViewLayer = self.videoCapture.previewLayer {
// self.videoView.layer.addSublayer(preViewLayer)
// self.videoCapture.previewLayer?.frame = self.videoView.bounds
// }
// self.videoCapture.start()
// }
// }
// videoCapture = VideoCapture.init(device: MetalHelper.shared.device, orientation: .portrait, position: .back)
// videoCapture.fps = 30
// videoCapture.delegate = self
// videoCapture.setUp { (success) in
// DispatchQueue.main.async {
// if let preViewLayer = self.videoCapture.previewLayer {
// self.videoView.layer.addSublayer(preViewLayer)
// self.videoCapture.previewLayer?.frame = self.videoView.bounds
// }
// self.videoCapture.start()
// }
// }
} }
} }
...@@ -228,7 +224,7 @@ extension ViewController: UIPickerViewDataSource, UIPickerViewDelegate{ ...@@ -228,7 +224,7 @@ extension ViewController: UIPickerViewDataSource, UIPickerViewDelegate{
if pickerView == modelPickerView { if pickerView == modelPickerView {
return SupportModel.supportedModels().count return SupportModel.supportedModels().count
} else if pickerView == threadPickerView { } else if pickerView == threadPickerView {
return threadSupport.count return platformSupport.count
} else { } else {
fatalError() fatalError()
} }
...@@ -238,7 +234,7 @@ extension ViewController: UIPickerViewDataSource, UIPickerViewDelegate{ ...@@ -238,7 +234,7 @@ extension ViewController: UIPickerViewDataSource, UIPickerViewDelegate{
if pickerView == modelPickerView { if pickerView == modelPickerView {
return SupportModel.supportedModels()[row].rawValue return SupportModel.supportedModels()[row].rawValue
} else if pickerView == threadPickerView { } else if pickerView == threadPickerView {
return threadSupport[row].1 return platformSupport[row].1
} else { } else {
fatalError() fatalError()
} }
...@@ -248,8 +244,7 @@ extension ViewController: UIPickerViewDataSource, UIPickerViewDelegate{ ...@@ -248,8 +244,7 @@ extension ViewController: UIPickerViewDataSource, UIPickerViewDelegate{
if pickerView == modelPickerView { if pickerView == modelPickerView {
self.modelType = SupportModel.supportedModels()[row] self.modelType = SupportModel.supportedModels()[row]
} else if pickerView == threadPickerView { } else if pickerView == threadPickerView {
platform = platformSupport[row].0
platform = threadSupport[row].0
} else { } else {
fatalError() fatalError()
} }
...@@ -276,25 +271,11 @@ extension ViewController: VideoCaptureDelegate{ ...@@ -276,25 +271,11 @@ extension ViewController: VideoCaptureDelegate{
func predictTexture(texture: MTLTexture){ func predictTexture(texture: MTLTexture){
runner.scaleTexture(input: texture) { (scaledTexture) in runner.scaleTexture(input: texture) { (scaledTexture) in
self.runner.predict(texture: scaledTexture, completion: { (success, resultHolder) in self.runner.predict(texture: scaledTexture, completion: { (success, resultHolder) in
// print(resultHolder!.result![0]) // print(resultHolder!.result![0])
resultHolder?.releasePointer() resultHolder?.releasePointer()
}) })
} }
} }
// @available(iOS 10.0, *)
// func videoCapture(_ capture: VideoCapture, didCaptureVideoTexture texture: MTLTexture?, timestamp: CMTime) {
//// if !bool1 {
//// DispatchQueue.main.asyncAfter(deadline: DispatchTime.init(uptimeNanoseconds: 500000000)) {
// self.predictTexture(texture: texture!)
//// }
//
//
//// bool1 = true
//// }
//
// }
} }
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
#define CONCAT2_(a, b) a ## _ ## b
#define CONCAT2(a, b) a ## b
#define FUNC(m, n, q) CONCAT3_(m, n, q)
#define FUNC_T(m, n) CONCAT2_(m, n)
#define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC_T(fetch, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
int input_height = inTexture.get_height();
const VECTOR(P, 4) input = inTexture.read(gid.xy, gid.z);
int output_to = 4 * input_width * input_height;
output[gid.z * output_to + 0 * input_width * input_height + gid.y * input_width + gid.x] = input.x;
output[gid.z * output_to + 1 * input_width * input_height + gid.y * input_width + gid.x] = input.y;
output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z;
output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w;
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
#define P float
#include "FetchKernel.inc.metal"
#undef P
#define P half
#include "FetchKernel.inc.metal"
#undef P
kernel void fetch_placeholder(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
}
kernel void fetch_placeholder_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
}
...@@ -24,6 +24,6 @@ using namespace metal; ...@@ -24,6 +24,6 @@ using namespace metal;
#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p) #define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p)
#define VECTOR(p, n) CONCAT2(p, n) #define VECTOR(p, n) CONCAT2(p, n)
#define FUNC2_(a, b) CONCAT2_(a, b)
#define FUNC3_(a, b, c) CONCAT3_(a, b, c) #define FUNC3_(a, b, c) CONCAT3_(a, b, c)
//
// PoolKernel.inc.metal
// paddle-mobile
//
// Created by liuRuiLong on 2018/12/29.
// Copyright © 2018 orange. All rights reserved.
//
#ifdef P
kernel void FUNC2_(pool, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant PoolParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
int xmin = gid.x * pm.strideX - pm.paddingX;
int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
xmin = max(xmin, 0);
int ymin = gid.y * pm.strideX - pm.paddingX;
int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
ymin = max(ymin, 0);
VECTOR(P, 4) r = 0;
if (pm.poolType == 0) {
r = inTexture.read(uint2(xmin, ymin), gid.z);
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r = fmax(r, inTexture.read(uint2(x, y), gid.z));
}
}
} else if (pm.poolType == 1) {
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r += inTexture.read(uint2(x, y), gid.z);
}
}
r /= (xmax - xmin) * (ymax - ymin);
}
outTexture.write(r, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Macro.metal"
using namespace metal;
struct PoolParam {
int ksizeX;
int ksizeY;
int strideX;
int strideY;
int paddingX;
int paddingY;
int poolType;
};
#define P float
#import "PoolKernel.inc.metal"
#undef P
#define P half
#import "PoolKernel.inc.metal"
#undef P
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
4AA1EA92214665D700D0F791 /* ShapeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA91214665D700D0F791 /* ShapeOp.swift */; }; 4AA1EA92214665D700D0F791 /* ShapeOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA91214665D700D0F791 /* ShapeOp.swift */; };
4AA1EA942146661500D0F791 /* ShapeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA932146661500D0F791 /* ShapeKernel.swift */; }; 4AA1EA942146661500D0F791 /* ShapeKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA932146661500D0F791 /* ShapeKernel.swift */; };
4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA972146666500D0F791 /* FlattenOp.swift */; }; 4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA972146666500D0F791 /* FlattenOp.swift */; };
4AA1EA9E2148D6F900D0F791 /* ConcatKernel.inc.metal in Headers */ = {isa = PBXBuildFile; fileRef = 4AA1EA9D2148D6F900D0F791 /* ConcatKernel.inc.metal */; };
4AA1EAA02148DEEE00D0F791 /* ReshapeKernel.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA9F2148DEEE00D0F791 /* ReshapeKernel.inc.metal */; }; 4AA1EAA02148DEEE00D0F791 /* ReshapeKernel.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EA9F2148DEEE00D0F791 /* ReshapeKernel.inc.metal */; };
4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA1214912CC00D0F791 /* FlattenKernel.swift */; }; 4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA1214912CC00D0F791 /* FlattenKernel.swift */; };
4AA1EAA4214A295C00D0F791 /* Split.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA3214A295C00D0F791 /* Split.inc.metal */; }; 4AA1EAA4214A295C00D0F791 /* Split.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AA1EAA3214A295C00D0F791 /* Split.inc.metal */; };
...@@ -29,8 +28,6 @@ ...@@ -29,8 +28,6 @@
4AF9287921341661005B6C3A /* Softmax.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF9287821341661005B6C3A /* Softmax.metal */; }; 4AF9287921341661005B6C3A /* Softmax.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF9287821341661005B6C3A /* Softmax.metal */; };
4AF928822135673D005B6C3A /* ConcatKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF928812135673D005B6C3A /* ConcatKernel.metal */; }; 4AF928822135673D005B6C3A /* ConcatKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF928812135673D005B6C3A /* ConcatKernel.metal */; };
4AF9288421357BE3005B6C3A /* Elementwise.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF9288321357BE3005B6C3A /* Elementwise.metal */; }; 4AF9288421357BE3005B6C3A /* Elementwise.metal in Sources */ = {isa = PBXBuildFile; fileRef = 4AF9288321357BE3005B6C3A /* Elementwise.metal */; };
C28FDF8421B7858F0054EFAC /* MobileNetCombined.swift in Sources */ = {isa = PBXBuildFile; fileRef = C28FDF8221B7858F0054EFAC /* MobileNetCombined.swift */; };
C28FDF8521B7858F0054EFAC /* YoloNet.swift in Sources */ = {isa = PBXBuildFile; fileRef = C28FDF8321B7858F0054EFAC /* YoloNet.swift */; };
C28FE02F21BA68C00054EFAC /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C28FE02C21BA68C00054EFAC /* Metal.framework */; }; C28FE02F21BA68C00054EFAC /* Metal.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C28FE02C21BA68C00054EFAC /* Metal.framework */; };
C28FE03021BA68C00054EFAC /* MetalPerformanceShaders.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C28FE02D21BA68C00054EFAC /* MetalPerformanceShaders.framework */; }; C28FE03021BA68C00054EFAC /* MetalPerformanceShaders.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C28FE02D21BA68C00054EFAC /* MetalPerformanceShaders.framework */; };
C28FE03121BA68C00054EFAC /* MetalKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C28FE02E21BA68C00054EFAC /* MetalKit.framework */; }; C28FE03121BA68C00054EFAC /* MetalKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = C28FE02E21BA68C00054EFAC /* MetalKit.framework */; };
...@@ -64,19 +61,15 @@ ...@@ -64,19 +61,15 @@
FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */; }; FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */; };
FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */; }; FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */; };
FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC1B16B220EC9A4F00678B91 /* Kernels.metal */; }; FC1B16B320EC9A4F00678B91 /* Kernels.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC1B16B220EC9A4F00678B91 /* Kernels.metal */; };
FC292C5421421B2F00CF622F /* PaddleMobileGPU.h in Headers */ = {isa = PBXBuildFile; fileRef = FC292C5321421B2E00CF622F /* PaddleMobileGPU.h */; settings = {ATTRIBUTES = (Public, ); }; }; FC1CF3F721D4B4C400F7392E /* Runner.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC1CF3F621D4B4C400F7392E /* Runner.swift */; };
FC292C5621421B4600CF622F /* PaddleMobileGPU.m in Sources */ = {isa = PBXBuildFile; fileRef = FC292C5521421B4600CF622F /* PaddleMobileGPU.m */; }; FC2BFCC221DF2F9100C262B2 /* GlobalConfig.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFCC121DF2F9100C262B2 /* GlobalConfig.swift */; };
FC292C81214255BD00CF622F /* CPUCompute.mm in Sources */ = {isa = PBXBuildFile; fileRef = FC292C7C214255BC00CF622F /* CPUCompute.mm */; }; FC2BFD4621DF685F00C262B2 /* Scale.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD4521DF685F00C262B2 /* Scale.swift */; };
FC292C82214255BD00CF622F /* MobileNetSSD.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC292C7E214255BC00CF622F /* MobileNetSSD.swift */; }; FC2BFD4A21DF81DE00C262B2 /* Kernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD4921DF81DE00C262B2 /* Kernel.swift */; };
FC292C85214257CB00CF622F /* CPUCompute.h in Headers */ = {isa = PBXBuildFile; fileRef = FC292C7D214255BC00CF622F /* CPUCompute.h */; settings = {ATTRIBUTES = (Public, ); }; }; FC2BFD4E21DF820B00C262B2 /* ConvAddBatchNormReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD4D21DF820A00C262B2 /* ConvAddBatchNormReluOp.swift */; };
FC292C872142624800CF622F /* Genet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC292C862142624800CF622F /* Genet.swift */; }; FC2BFD5121DF8E0400C262B2 /* Scale.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC2BFD5021DF8E0400C262B2 /* Scale.metal */; };
FC33B0F02147659000714A93 /* MobileNet.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC33B0EF2147659000714A93 /* MobileNet.swift */; };
FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */; }; FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */; };
FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74820F0B954007C0C6D /* ConvKernel.metal */; }; FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74820F0B954007C0C6D /* ConvKernel.metal */; };
FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */; }; FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */; };
FC4FD9752140E1DE0073E130 /* PaddleMobile.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */; };
FC4FD9792140E4980073E130 /* PaddleMobileCPU.h in Headers */ = {isa = PBXBuildFile; fileRef = FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */; settings = {ATTRIBUTES = (Public, ); }; };
FC4FD97A2140E4980073E130 /* libpaddle-mobile.a in Frameworks */ = {isa = PBXBuildFile; fileRef = FC4FD9782140E4980073E130 /* libpaddle-mobile.a */; };
FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */; }; FC5163F620EF556E00636C28 /* Texture2DTo2DArrayKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */; };
FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC60DB8820E9AAA500FF203F /* MetalExtension.swift */; }; FC60DB8920E9AAA500FF203F /* MetalExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC60DB8820E9AAA500FF203F /* MetalExtension.swift */; };
FC803BBF214CB65A0094B8E5 /* ConvAddPreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BBE214CB65A0094B8E5 /* ConvAddPreluOp.swift */; }; FC803BBF214CB65A0094B8E5 /* ConvAddPreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC803BBE214CB65A0094B8E5 /* ConvAddPreluOp.swift */; };
...@@ -86,7 +79,9 @@ ...@@ -86,7 +79,9 @@
FC803BC7214CBA820094B8E5 /* Macro.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC6214CBA820094B8E5 /* Macro.metal */; }; FC803BC7214CBA820094B8E5 /* Macro.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC6214CBA820094B8E5 /* Macro.metal */; };
FC803BC9214CFC8D0094B8E5 /* FetchKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */; }; FC803BC9214CFC8D0094B8E5 /* FetchKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */; };
FC82735920E3C04200BE430A /* OpCreator.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC82735820E3C04200BE430A /* OpCreator.swift */; }; FC82735920E3C04200BE430A /* OpCreator.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC82735820E3C04200BE430A /* OpCreator.swift */; };
FC9A19E32148C31300CD9CBF /* MobilenetSSD_AR.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9A19E22148C31300CD9CBF /* MobilenetSSD_AR.swift */; }; FC9797C921D6101D00F2FD90 /* ResizeBilinearOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9797C821D6101D00F2FD90 /* ResizeBilinearOp.swift */; };
FC9797CB21D6102D00F2FD90 /* ResizeBilinearKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9797CA21D6102D00F2FD90 /* ResizeBilinearKernel.swift */; };
FC9C2A0D21D3D185005856C6 /* FetchKernel.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = FC9C2A0C21D3D185005856C6 /* FetchKernel.inc.metal */; };
FC9D037920E229E4000F735A /* OpParam.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037820E229E4000F735A /* OpParam.swift */; }; FC9D037920E229E4000F735A /* OpParam.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037820E229E4000F735A /* OpParam.swift */; };
FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037F20E22FBB000F735A /* FeedOp.swift */; }; FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D037F20E22FBB000F735A /* FeedOp.swift */; };
FC9D038220E2312E000F735A /* FetchOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D038120E2312E000F735A /* FetchOp.swift */; }; FC9D038220E2312E000F735A /* FetchOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FC9D038120E2312E000F735A /* FetchOp.swift */; };
...@@ -97,6 +92,7 @@ ...@@ -97,6 +92,7 @@
FCA67CD52138272900BD58AA /* ConvAddMetal.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD42138272900BD58AA /* ConvAddMetal.metal */; }; FCA67CD52138272900BD58AA /* ConvAddMetal.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD42138272900BD58AA /* ConvAddMetal.metal */; };
FCA67CD7213827AC00BD58AA /* ConvAddBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */; }; FCA67CD7213827AC00BD58AA /* ConvAddBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */; };
FCA67CD92138287B00BD58AA /* ConvBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */; }; FCA67CD92138287B00BD58AA /* ConvBNReluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */; };
FCB40E5921E0DCAB0075EC91 /* FetchKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCB40E5821E0DCAB0075EC91 /* FetchKernel.swift */; };
FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */; }; FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */; };
FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */; }; FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */; };
FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */; }; FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */; };
...@@ -111,6 +107,7 @@ ...@@ -111,6 +107,7 @@
FCBCCC6D2123073A00D94F7E /* BoxcoderKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */; }; FCBCCC6D2123073A00D94F7E /* BoxcoderKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */; };
FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */; }; FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */; };
FCBCCC71212309A700D94F7E /* MulticlassNMSKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */; }; FCBCCC71212309A700D94F7E /* MulticlassNMSKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */; };
FCCED5E121D71FC000BE8D5F /* PoolKernel.inc.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCCED5E021D71FC000BE8D5F /* PoolKernel.inc.metal */; };
FCD04E6620F314C50007374F /* PoolOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6520F314C50007374F /* PoolOp.swift */; }; FCD04E6620F314C50007374F /* PoolOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6520F314C50007374F /* PoolOp.swift */; };
FCD04E6820F315020007374F /* PoolKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6720F315020007374F /* PoolKernel.swift */; }; FCD04E6820F315020007374F /* PoolKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6720F315020007374F /* PoolKernel.swift */; };
FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6920F319EC0007374F /* SoftmaxOp.swift */; }; FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCD04E6920F319EC0007374F /* SoftmaxOp.swift */; };
...@@ -136,9 +133,7 @@ ...@@ -136,9 +133,7 @@
FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */; }; FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */; };
FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCEB6849212F00DB00D2448E /* PreluKernel.metal */; }; FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */ = {isa = PBXBuildFile; fileRef = FCEB6849212F00DB00D2448E /* PreluKernel.metal */; };
FCEB684C212F093800D2448E /* PreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEB684B212F093800D2448E /* PreluOp.swift */; }; FCEB684C212F093800D2448E /* PreluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEB684B212F093800D2448E /* PreluOp.swift */; };
FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */; };
FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */; }; FCEBC0F620F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */; };
FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FCF2D73720E64E70007AC5F5 /* Kernel.swift */; };
/* End PBXBuildFile section */ /* End PBXBuildFile section */
/* Begin PBXFileReference section */ /* Begin PBXFileReference section */
...@@ -164,8 +159,6 @@ ...@@ -164,8 +159,6 @@
4AF9287821341661005B6C3A /* Softmax.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Softmax.metal; sourceTree = "<group>"; }; 4AF9287821341661005B6C3A /* Softmax.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Softmax.metal; sourceTree = "<group>"; };
4AF928812135673D005B6C3A /* ConcatKernel.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = ConcatKernel.metal; sourceTree = "<group>"; }; 4AF928812135673D005B6C3A /* ConcatKernel.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = ConcatKernel.metal; sourceTree = "<group>"; };
4AF9288321357BE3005B6C3A /* Elementwise.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Elementwise.metal; sourceTree = "<group>"; }; 4AF9288321357BE3005B6C3A /* Elementwise.metal */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.metal; path = Elementwise.metal; sourceTree = "<group>"; };
C28FDF8221B7858F0054EFAC /* MobileNetCombined.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MobileNetCombined.swift; sourceTree = "<group>"; };
C28FDF8321B7858F0054EFAC /* YoloNet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = YoloNet.swift; sourceTree = "<group>"; };
C28FE02C21BA68C00054EFAC /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; }; C28FE02C21BA68C00054EFAC /* Metal.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Metal.framework; path = System/Library/Frameworks/Metal.framework; sourceTree = SDKROOT; };
C28FE02D21BA68C00054EFAC /* MetalPerformanceShaders.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = MetalPerformanceShaders.framework; path = System/Library/Frameworks/MetalPerformanceShaders.framework; sourceTree = SDKROOT; }; C28FE02D21BA68C00054EFAC /* MetalPerformanceShaders.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = MetalPerformanceShaders.framework; path = System/Library/Frameworks/MetalPerformanceShaders.framework; sourceTree = SDKROOT; };
C28FE02E21BA68C00054EFAC /* MetalKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = MetalKit.framework; path = System/Library/Frameworks/MetalKit.framework; sourceTree = SDKROOT; }; C28FE02E21BA68C00054EFAC /* MetalKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = MetalKit.framework; path = System/Library/Frameworks/MetalKit.framework; sourceTree = SDKROOT; };
...@@ -203,19 +196,15 @@ ...@@ -203,19 +196,15 @@
FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BatchNormKernel.swift; sourceTree = "<group>"; }; FC0E2DBD20EE460D009C1FAC /* BatchNormKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BatchNormKernel.swift; sourceTree = "<group>"; };
FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddKernel.swift; sourceTree = "<group>"; }; FC0E2DBF20EE461F009C1FAC /* ElementwiseAddKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ElementwiseAddKernel.swift; sourceTree = "<group>"; };
FC1B16B220EC9A4F00678B91 /* Kernels.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Kernels.metal; sourceTree = "<group>"; }; FC1B16B220EC9A4F00678B91 /* Kernels.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Kernels.metal; sourceTree = "<group>"; };
FC292C5321421B2E00CF622F /* PaddleMobileGPU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PaddleMobileGPU.h; sourceTree = "<group>"; }; FC1CF3F621D4B4C400F7392E /* Runner.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Runner.swift; sourceTree = "<group>"; };
FC292C5521421B4600CF622F /* PaddleMobileGPU.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = PaddleMobileGPU.m; sourceTree = "<group>"; }; FC2BFCC121DF2F9100C262B2 /* GlobalConfig.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = GlobalConfig.swift; sourceTree = "<group>"; };
FC292C7C214255BC00CF622F /* CPUCompute.mm */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.objcpp; path = CPUCompute.mm; sourceTree = "<group>"; }; FC2BFD4521DF685F00C262B2 /* Scale.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Scale.swift; sourceTree = "<group>"; };
FC292C7D214255BC00CF622F /* CPUCompute.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = CPUCompute.h; sourceTree = "<group>"; }; FC2BFD4921DF81DE00C262B2 /* Kernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Kernel.swift; sourceTree = "<group>"; };
FC292C7E214255BC00CF622F /* MobileNetSSD.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MobileNetSSD.swift; sourceTree = "<group>"; }; FC2BFD4D21DF820A00C262B2 /* ConvAddBatchNormReluOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ConvAddBatchNormReluOp.swift; sourceTree = "<group>"; };
FC292C862142624800CF622F /* Genet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = Genet.swift; sourceTree = "<group>"; }; FC2BFD5021DF8E0400C262B2 /* Scale.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Scale.metal; sourceTree = "<group>"; };
FC33B0EF2147659000714A93 /* MobileNet.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MobileNet.swift; sourceTree = "<group>"; };
FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PaddleMobileUnitTest.swift; sourceTree = "<group>"; }; FC3602CB2108819F00FACB58 /* PaddleMobileUnitTest.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PaddleMobileUnitTest.swift; sourceTree = "<group>"; };
FC4CB74820F0B954007C0C6D /* ConvKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvKernel.metal; sourceTree = "<group>"; }; FC4CB74820F0B954007C0C6D /* ConvKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvKernel.metal; sourceTree = "<group>"; };
FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProgramOptimize.swift; sourceTree = "<group>"; }; FC4CB74A20F12C30007C0C6D /* ProgramOptimize.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ProgramOptimize.swift; sourceTree = "<group>"; };
FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = PaddleMobile.swift; sourceTree = "<group>"; };
FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PaddleMobileCPU.h; sourceTree = "<group>"; };
FC4FD9782140E4980073E130 /* libpaddle-mobile.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; path = "libpaddle-mobile.a"; sourceTree = "<group>"; };
FC4FD97D2140F2C30073E130 /* libstdc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libstdc++.tbd"; path = "usr/lib/libstdc++.tbd"; sourceTree = SDKROOT; }; FC4FD97D2140F2C30073E130 /* libstdc++.tbd */ = {isa = PBXFileReference; lastKnownFileType = "sourcecode.text-based-dylib-definition"; name = "libstdc++.tbd"; path = "usr/lib/libstdc++.tbd"; sourceTree = SDKROOT; };
FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture2DTo2DArrayKernel.swift; sourceTree = "<group>"; }; FC5163F520EF556E00636C28 /* Texture2DTo2DArrayKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Texture2DTo2DArrayKernel.swift; sourceTree = "<group>"; };
FC60DB8820E9AAA500FF203F /* MetalExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MetalExtension.swift; sourceTree = "<group>"; }; FC60DB8820E9AAA500FF203F /* MetalExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MetalExtension.swift; sourceTree = "<group>"; };
...@@ -226,7 +215,9 @@ ...@@ -226,7 +215,9 @@
FC803BC6214CBA820094B8E5 /* Macro.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Macro.metal; sourceTree = "<group>"; }; FC803BC6214CBA820094B8E5 /* Macro.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = Macro.metal; sourceTree = "<group>"; };
FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = FetchKernel.metal; sourceTree = "<group>"; }; FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = FetchKernel.metal; sourceTree = "<group>"; };
FC82735820E3C04200BE430A /* OpCreator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpCreator.swift; sourceTree = "<group>"; }; FC82735820E3C04200BE430A /* OpCreator.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpCreator.swift; sourceTree = "<group>"; };
FC9A19E22148C31300CD9CBF /* MobilenetSSD_AR.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MobilenetSSD_AR.swift; sourceTree = "<group>"; }; FC9797C821D6101D00F2FD90 /* ResizeBilinearOp.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ResizeBilinearOp.swift; sourceTree = "<group>"; };
FC9797CA21D6102D00F2FD90 /* ResizeBilinearKernel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ResizeBilinearKernel.swift; sourceTree = "<group>"; };
FC9C2A0C21D3D185005856C6 /* FetchKernel.inc.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = FetchKernel.inc.metal; sourceTree = "<group>"; };
FC9D037820E229E4000F735A /* OpParam.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpParam.swift; sourceTree = "<group>"; }; FC9D037820E229E4000F735A /* OpParam.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OpParam.swift; sourceTree = "<group>"; };
FC9D037F20E22FBB000F735A /* FeedOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeedOp.swift; sourceTree = "<group>"; }; FC9D037F20E22FBB000F735A /* FeedOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeedOp.swift; sourceTree = "<group>"; };
FC9D038120E2312E000F735A /* FetchOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FetchOp.swift; sourceTree = "<group>"; }; FC9D038120E2312E000F735A /* FetchOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FetchOp.swift; sourceTree = "<group>"; };
...@@ -237,6 +228,7 @@ ...@@ -237,6 +228,7 @@
FCA67CD42138272900BD58AA /* ConvAddMetal.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddMetal.metal; sourceTree = "<group>"; }; FCA67CD42138272900BD58AA /* ConvAddMetal.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddMetal.metal; sourceTree = "<group>"; };
FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddBNReluKernel.metal; sourceTree = "<group>"; }; FCA67CD6213827AC00BD58AA /* ConvAddBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvAddBNReluKernel.metal; sourceTree = "<group>"; };
FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvBNReluKernel.metal; sourceTree = "<group>"; }; FCA67CD82138287B00BD58AA /* ConvBNReluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = ConvBNReluKernel.metal; sourceTree = "<group>"; };
FCB40E5821E0DCAB0075EC91 /* FetchKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FetchKernel.swift; sourceTree = "<group>"; };
FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DwConvBNReluOp.swift; sourceTree = "<group>"; }; FCBCCC562122F41300D94F7E /* DwConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DwConvBNReluOp.swift; sourceTree = "<group>"; };
FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluOp.swift; sourceTree = "<group>"; }; FCBCCC582122F42700D94F7E /* ConvBNReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluOp.swift; sourceTree = "<group>"; };
FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluKernel.swift; sourceTree = "<group>"; }; FCBCCC5A2122F66F00D94F7E /* ConvBNReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvBNReluKernel.swift; sourceTree = "<group>"; };
...@@ -251,6 +243,7 @@ ...@@ -251,6 +243,7 @@
FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BoxcoderKernel.swift; sourceTree = "<group>"; }; FCBCCC6C2123073A00D94F7E /* BoxcoderKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BoxcoderKernel.swift; sourceTree = "<group>"; };
FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MulticlassNMSOp.swift; sourceTree = "<group>"; }; FCBCCC6E2123097100D94F7E /* MulticlassNMSOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MulticlassNMSOp.swift; sourceTree = "<group>"; };
FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MulticlassNMSKernel.swift; sourceTree = "<group>"; }; FCBCCC70212309A700D94F7E /* MulticlassNMSKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = MulticlassNMSKernel.swift; sourceTree = "<group>"; };
FCCED5E021D71FC000BE8D5F /* PoolKernel.inc.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PoolKernel.inc.metal; sourceTree = "<group>"; };
FCD04E6520F314C50007374F /* PoolOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolOp.swift; sourceTree = "<group>"; }; FCD04E6520F314C50007374F /* PoolOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolOp.swift; sourceTree = "<group>"; };
FCD04E6720F315020007374F /* PoolKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolKernel.swift; sourceTree = "<group>"; }; FCD04E6720F315020007374F /* PoolKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PoolKernel.swift; sourceTree = "<group>"; };
FCD04E6920F319EC0007374F /* SoftmaxOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SoftmaxOp.swift; sourceTree = "<group>"; }; FCD04E6920F319EC0007374F /* SoftmaxOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SoftmaxOp.swift; sourceTree = "<group>"; };
...@@ -276,9 +269,7 @@ ...@@ -276,9 +269,7 @@
FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = NMSFetchResultKernel.metal; sourceTree = "<group>"; }; FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = NMSFetchResultKernel.metal; sourceTree = "<group>"; };
FCEB6849212F00DB00D2448E /* PreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreluKernel.metal; sourceTree = "<group>"; }; FCEB6849212F00DB00D2448E /* PreluKernel.metal */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.metal; path = PreluKernel.metal; sourceTree = "<group>"; };
FCEB684B212F093800D2448E /* PreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PreluOp.swift; sourceTree = "<group>"; }; FCEB684B212F093800D2448E /* PreluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PreluOp.swift; sourceTree = "<group>"; };
FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = ConvAddBatchNormReluOp.swift; path = "paddle-mobile/Operators/ConvAddBatchNormReluOp.swift"; sourceTree = SOURCE_ROOT; };
FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddBatchNormReluKernel.swift; sourceTree = "<group>"; }; FCEBC0F520F1FE120099DBAF /* ConvAddBatchNormReluKernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ConvAddBatchNormReluKernel.swift; sourceTree = "<group>"; };
FCF2D73720E64E70007AC5F5 /* Kernel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; name = Kernel.swift; path = "paddle-mobile/Operators/Kernels/Base/Kernel.swift"; sourceTree = SOURCE_ROOT; };
/* End PBXFileReference section */ /* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */ /* Begin PBXFrameworksBuildPhase section */
...@@ -290,7 +281,6 @@ ...@@ -290,7 +281,6 @@
C28FE03021BA68C00054EFAC /* MetalPerformanceShaders.framework in Frameworks */, C28FE03021BA68C00054EFAC /* MetalPerformanceShaders.framework in Frameworks */,
C28FE03121BA68C00054EFAC /* MetalKit.framework in Frameworks */, C28FE03121BA68C00054EFAC /* MetalKit.framework in Frameworks */,
D3831F70E7E0B565B9AC22DA /* Pods_paddle_mobile.framework in Frameworks */, D3831F70E7E0B565B9AC22DA /* Pods_paddle_mobile.framework in Frameworks */,
FC4FD97A2140E4980073E130 /* libpaddle-mobile.a in Frameworks */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
...@@ -339,23 +329,8 @@ ...@@ -339,23 +329,8 @@
FC039B6C20E11C3C0081E9F8 /* paddle-mobile */ = { FC039B6C20E11C3C0081E9F8 /* paddle-mobile */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
C28FDF8221B7858F0054EFAC /* MobileNetCombined.swift */, FC2BFD4721DF818000C262B2 /* API */,
C28FDF8321B7858F0054EFAC /* YoloNet.swift */, FC2BFD4821DF818000C262B2 /* Src */,
FCE9D7B6214F869000B520C3 /* Net.swift */,
FC9A19E22148C31300CD9CBF /* MobilenetSSD_AR.swift */,
FC33B0EF2147659000714A93 /* MobileNet.swift */,
FC292C862142624800CF622F /* Genet.swift */,
FC292C7E214255BC00CF622F /* MobileNetSSD.swift */,
FC292C7C214255BC00CF622F /* CPUCompute.mm */,
FC292C7D214255BC00CF622F /* CPUCompute.h */,
FC292C5521421B4600CF622F /* PaddleMobileGPU.m */,
FC292C5321421B2E00CF622F /* PaddleMobileGPU.h */,
FC4FD9762140E4920073E130 /* CPU */,
FC4FD9742140E1DE0073E130 /* PaddleMobile.swift */,
FC039BAE20E11CC20081E9F8 /* Program */,
FC039BA320E11CBC0081E9F8 /* Operators */,
FC039B9C20E11CB20081E9F8 /* framework */,
FC039B9320E11C9A0081E9F8 /* Common */,
FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */, FC039B6D20E11C3C0081E9F8 /* paddle_mobile.h */,
FC039B6E20E11C3C0081E9F8 /* Info.plist */, FC039B6E20E11C3C0081E9F8 /* Info.plist */,
); );
...@@ -375,7 +350,7 @@ ...@@ -375,7 +350,7 @@
path = Common; path = Common;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
FC039B9C20E11CB20081E9F8 /* framework */ = { FC039B9C20E11CB20081E9F8 /* Framework */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FC039BA120E11CB70081E9F8 /* Loader.swift */, FC039BA120E11CB70081E9F8 /* Loader.swift */,
...@@ -384,7 +359,7 @@ ...@@ -384,7 +359,7 @@
FC039B9E20E11CB20081E9F8 /* Dim.swift */, FC039B9E20E11CB20081E9F8 /* Dim.swift */,
FC9D038320E23B01000F735A /* Texture.swift */, FC9D038320E23B01000F735A /* Texture.swift */,
); );
path = framework; path = Framework;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
FC039BA320E11CBC0081E9F8 /* Operators */ = { FC039BA320E11CBC0081E9F8 /* Operators */ = {
...@@ -392,7 +367,8 @@ ...@@ -392,7 +367,8 @@
children = ( children = (
FC086BA520E67E8500D85EF7 /* Kernels */, FC086BA520E67E8500D85EF7 /* Kernels */,
FCD592FA20E248EC00252966 /* Base */, FCD592FA20E248EC00252966 /* Base */,
FCEBC0F320F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift */, FC9797C821D6101D00F2FD90 /* ResizeBilinearOp.swift */,
FC2BFD4D21DF820A00C262B2 /* ConvAddBatchNormReluOp.swift */,
FC039BA420E11CBC0081E9F8 /* ConvOp.swift */, FC039BA420E11CBC0081E9F8 /* ConvOp.swift */,
FC039BA520E11CBC0081E9F8 /* ElementwiseAddOp.swift */, FC039BA520E11CBC0081E9F8 /* ElementwiseAddOp.swift */,
FC039BA720E11CBC0081E9F8 /* BatchNormOp.swift */, FC039BA720E11CBC0081E9F8 /* BatchNormOp.swift */,
...@@ -446,6 +422,7 @@ ...@@ -446,6 +422,7 @@
children = ( children = (
FCDDC6CD212FE02100E5EF74 /* Base */, FCDDC6CD212FE02100E5EF74 /* Base */,
FCEB6837212F00B100D2448E /* metal */, FCEB6837212F00B100D2448E /* metal */,
FC9797CA21D6102D00F2FD90 /* ResizeBilinearKernel.swift */,
FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */, FCDDC6C7212FA3CA00E5EF74 /* ConvTransposeKernel.swift */,
FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */, FC0E2DBB20EE45FE009C1FAC /* ConvKernel.swift */,
FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */, FC0E2DB920EE3B8D009C1FAC /* ReluKernel.swift */,
...@@ -471,17 +448,31 @@ ...@@ -471,17 +448,31 @@
FC803BC0214CB77A0094B8E5 /* ConvAddPreluKernel.swift */, FC803BC0214CB77A0094B8E5 /* ConvAddPreluKernel.swift */,
FCE3A1AA2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift */, FCE3A1AA2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift */,
FCE3A1AE2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift */, FCE3A1AE2153E8EE00C37CDE /* ElementwiseAddPreluKernel.swift */,
FC2BFD4521DF685F00C262B2 /* Scale.swift */,
FCB40E5821E0DCAB0075EC91 /* FetchKernel.swift */,
); );
path = Kernels; path = Kernels;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
FC4FD9762140E4920073E130 /* CPU */ = { FC2BFD4721DF818000C262B2 /* API */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FC4FD9782140E4980073E130 /* libpaddle-mobile.a */, FCE9D7B6214F869000B520C3 /* Net.swift */,
FC4FD9772140E4980073E130 /* PaddleMobileCPU.h */, FC1CF3F621D4B4C400F7392E /* Runner.swift */,
FC2BFCC121DF2F9100C262B2 /* GlobalConfig.swift */,
);
path = API;
sourceTree = "<group>";
};
FC2BFD4821DF818000C262B2 /* Src */ = {
isa = PBXGroup;
children = (
FC039BAE20E11CC20081E9F8 /* Program */,
FC039BA320E11CBC0081E9F8 /* Operators */,
FC039B9C20E11CB20081E9F8 /* Framework */,
FC039B9320E11C9A0081E9F8 /* Common */,
); );
path = CPU; path = Src;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
FCD592FA20E248EC00252966 /* Base */ = { FCD592FA20E248EC00252966 /* Base */ = {
...@@ -497,7 +488,7 @@ ...@@ -497,7 +488,7 @@
FCDDC6CD212FE02100E5EF74 /* Base */ = { FCDDC6CD212FE02100E5EF74 /* Base */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
FCF2D73720E64E70007AC5F5 /* Kernel.swift */, FC2BFD4921DF81DE00C262B2 /* Kernel.swift */,
); );
path = Base; path = Base;
sourceTree = "<group>"; sourceTree = "<group>";
...@@ -533,13 +524,16 @@ ...@@ -533,13 +524,16 @@
FC0226552138F33800F395E2 /* TransposeKernel.metal */, FC0226552138F33800F395E2 /* TransposeKernel.metal */,
4AA1EAAD214F5FD900D0F791 /* TransposeKernel.inc.metal */, 4AA1EAAD214F5FD900D0F791 /* TransposeKernel.inc.metal */,
FC0226572138F38D00F395E2 /* PoolKernel.metal */, FC0226572138F38D00F395E2 /* PoolKernel.metal */,
FCCED5E021D71FC000BE8D5F /* PoolKernel.inc.metal */,
FC803BC2214CB79C0094B8E5 /* ConvAddPreluKernel.metal */, FC803BC2214CB79C0094B8E5 /* ConvAddPreluKernel.metal */,
FC803BC4214CB8F00094B8E5 /* ConvAddPrelu.inc.metal */, FC803BC4214CB8F00094B8E5 /* ConvAddPrelu.inc.metal */,
FC803BC6214CBA820094B8E5 /* Macro.metal */, FC803BC6214CBA820094B8E5 /* Macro.metal */,
FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */, FC803BC8214CFC8D0094B8E5 /* FetchKernel.metal */,
FC9C2A0C21D3D185005856C6 /* FetchKernel.inc.metal */,
FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */, FCE9D7B8214FAA4800B520C3 /* NMSFetchResultKernel.metal */,
FCE3A1B02153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal */, FCE3A1B02153E90F00C37CDE /* ElementwiseAddPreluKernel.inc.metal */,
FCE3A1B22153E91900C37CDE /* ElementwiseAddPreluKernel.metal */, FCE3A1B22153E91900C37CDE /* ElementwiseAddPreluKernel.metal */,
FC2BFD5021DF8E0400C262B2 /* Scale.metal */,
); );
path = metal; path = metal;
sourceTree = "<group>"; sourceTree = "<group>";
...@@ -551,10 +545,6 @@ ...@@ -551,10 +545,6 @@
isa = PBXHeadersBuildPhase; isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
FC4FD9792140E4980073E130 /* PaddleMobileCPU.h in Headers */,
FC292C85214257CB00CF622F /* CPUCompute.h in Headers */,
FC292C5421421B2F00CF622F /* PaddleMobileGPU.h in Headers */,
4AA1EA9E2148D6F900D0F791 /* ConcatKernel.inc.metal in Headers */,
FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */, FC039B6F20E11C3C0081E9F8 /* paddle_mobile.h in Headers */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
...@@ -650,6 +640,7 @@ ...@@ -650,6 +640,7 @@
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */, FC9D038020E22FBB000F735A /* FeedOp.swift in Sources */,
FC9C2A0D21D3D185005856C6 /* FetchKernel.inc.metal in Sources */,
4AA1EAAA214F53D800D0F791 /* BoxCoder.inc.metal in Sources */, 4AA1EAAA214F53D800D0F791 /* BoxCoder.inc.metal in Sources */,
FC039B9F20E11CB20081E9F8 /* Tensor.swift in Sources */, FC039B9F20E11CB20081E9F8 /* Tensor.swift in Sources */,
FC803BC9214CFC8D0094B8E5 /* FetchKernel.metal in Sources */, FC803BC9214CFC8D0094B8E5 /* FetchKernel.metal in Sources */,
...@@ -672,20 +663,17 @@ ...@@ -672,20 +663,17 @@
FCE3A1AB2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift in Sources */, FCE3A1AB2153DE8C00C37CDE /* ConvAddAddPreluKernel.swift in Sources */,
FC9D037920E229E4000F735A /* OpParam.swift in Sources */, FC9D037920E229E4000F735A /* OpParam.swift in Sources */,
FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */, FC3602CC2108819F00FACB58 /* PaddleMobileUnitTest.swift in Sources */,
FCF2D73820E64E70007AC5F5 /* Kernel.swift in Sources */,
FCDDC6CC212FDFDB00E5EF74 /* ReluKernel.metal in Sources */, FCDDC6CC212FDFDB00E5EF74 /* ReluKernel.metal in Sources */,
FC0226562138F33800F395E2 /* TransposeKernel.metal in Sources */, FC0226562138F33800F395E2 /* TransposeKernel.metal in Sources */,
FCDDC6C6212F9FB800E5EF74 /* PreluKernel.swift in Sources */, FCDDC6C6212F9FB800E5EF74 /* PreluKernel.swift in Sources */,
FC9797CB21D6102D00F2FD90 /* ResizeBilinearKernel.swift in Sources */,
FCA67CD52138272900BD58AA /* ConvAddMetal.metal in Sources */, FCA67CD52138272900BD58AA /* ConvAddMetal.metal in Sources */,
FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */, FCBCCC5B2122F66F00D94F7E /* ConvBNReluKernel.swift in Sources */,
4AA1EA8C2146640900D0F791 /* SplitOp.swift in Sources */, 4AA1EA8C2146640900D0F791 /* SplitOp.swift in Sources */,
FC292C81214255BD00CF622F /* CPUCompute.mm in Sources */,
FCEBC0F420F1FDD90099DBAF /* ConvAddBatchNormReluOp.swift in Sources */,
4AA1EAAC214F55C800D0F791 /* Softmax.inc.metal in Sources */, 4AA1EAAC214F55C800D0F791 /* Softmax.inc.metal in Sources */,
FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */, FC0E2DC020EE461F009C1FAC /* ElementwiseAddKernel.swift in Sources */,
4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */, 4AF928772133F1DB005B6C3A /* BoxCoder.metal in Sources */,
FC803BBF214CB65A0094B8E5 /* ConvAddPreluOp.swift in Sources */, FC803BBF214CB65A0094B8E5 /* ConvAddPreluOp.swift in Sources */,
FC33B0F02147659000714A93 /* MobileNet.swift in Sources */,
FCEB684C212F093800D2448E /* PreluOp.swift in Sources */, FCEB684C212F093800D2448E /* PreluOp.swift in Sources */,
4AA1EAA8214B7AFB00D0F791 /* BilinearInterp.inc.metal in Sources */, 4AA1EAA8214B7AFB00D0F791 /* BilinearInterp.inc.metal in Sources */,
FCA67CD92138287B00BD58AA /* ConvBNReluKernel.metal in Sources */, FCA67CD92138287B00BD58AA /* ConvBNReluKernel.metal in Sources */,
...@@ -697,17 +685,19 @@ ...@@ -697,17 +685,19 @@
FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */, FC039BBA20E11CC20081E9F8 /* TensorDesc.swift in Sources */,
FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */, FC039BA020E11CB20081E9F8 /* Dim.swift in Sources */,
FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */, FC039BB820E11CC20081E9F8 /* framework.pb.swift in Sources */,
C28FDF8521B7858F0054EFAC /* YoloNet.swift in Sources */,
FC039B9920E11C9A0081E9F8 /* Types.swift in Sources */, FC039B9920E11C9A0081E9F8 /* Types.swift in Sources */,
FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */, FC4CB74920F0B954007C0C6D /* ConvKernel.metal in Sources */,
FCA3A1632132A4AC00084FE5 /* ReshapeKernel.metal in Sources */, FCA3A1632132A4AC00084FE5 /* ReshapeKernel.metal in Sources */,
FC4FD9752140E1DE0073E130 /* PaddleMobile.swift in Sources */,
FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */, FCBCCC592122F42700D94F7E /* ConvBNReluOp.swift in Sources */,
FC039BA920E11CBC0081E9F8 /* ConvOp.swift in Sources */, FC039BA920E11CBC0081E9F8 /* ConvOp.swift in Sources */,
FCCED5E121D71FC000BE8D5F /* PoolKernel.inc.metal in Sources */,
FC2BFD4A21DF81DE00C262B2 /* Kernel.swift in Sources */,
FC9D038420E23B01000F735A /* Texture.swift in Sources */, FC9D038420E23B01000F735A /* Texture.swift in Sources */,
FCE3A1B32153E91900C37CDE /* ElementwiseAddPreluKernel.metal in Sources */, FCE3A1B32153E91900C37CDE /* ElementwiseAddPreluKernel.metal in Sources */,
FC2BFD4E21DF820B00C262B2 /* ConvAddBatchNormReluOp.swift in Sources */,
4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */, 4AA1EAA2214912CD00D0F791 /* FlattenKernel.swift in Sources */,
4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */, 4AA1EA982146666500D0F791 /* FlattenOp.swift in Sources */,
FC2BFCC221DF2F9100C262B2 /* GlobalConfig.swift in Sources */,
FCBCCC652122FCD700D94F7E /* TransposeOp.swift in Sources */, FCBCCC652122FCD700D94F7E /* TransposeOp.swift in Sources */,
4AA1EAA6214B5F6800D0F791 /* Shape.metal in Sources */, 4AA1EAA6214B5F6800D0F791 /* Shape.metal in Sources */,
FCD04E6E20F31B4B0007374F /* ReshapeOp.swift in Sources */, FCD04E6E20F31B4B0007374F /* ReshapeOp.swift in Sources */,
...@@ -715,14 +705,13 @@ ...@@ -715,14 +705,13 @@
FC039BBF20E11CC20081E9F8 /* Attribute.swift in Sources */, FC039BBF20E11CC20081E9F8 /* Attribute.swift in Sources */,
4AA1EA8E2146647F00D0F791 /* SplitKernel.swift in Sources */, 4AA1EA8E2146647F00D0F791 /* SplitKernel.swift in Sources */,
FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */, FCD04E7420F3437E0007374F /* ConvAddKernel.swift in Sources */,
FC1CF3F721D4B4C400F7392E /* Runner.swift in Sources */,
FC039BB920E11CC20081E9F8 /* Scope.swift in Sources */, FC039BB920E11CC20081E9F8 /* Scope.swift in Sources */,
FC292C5621421B4600CF622F /* PaddleMobileGPU.m in Sources */,
FCD04E6620F314C50007374F /* PoolOp.swift in Sources */, FCD04E6620F314C50007374F /* PoolOp.swift in Sources */,
FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */, FCE9D7B9214FAA4800B520C3 /* NMSFetchResultKernel.metal in Sources */,
FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */, FC039BAC20E11CBC0081E9F8 /* BatchNormOp.swift in Sources */,
FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */, FCBCCC6F2123097100D94F7E /* MulticlassNMSOp.swift in Sources */,
FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */, FC039BBC20E11CC20081E9F8 /* VarDesc.swift in Sources */,
FC292C872142624800CF622F /* Genet.swift in Sources */,
FC803BC5214CB8F00094B8E5 /* ConvAddPrelu.inc.metal in Sources */, FC803BC5214CB8F00094B8E5 /* ConvAddPrelu.inc.metal in Sources */,
4AF928822135673D005B6C3A /* ConcatKernel.metal in Sources */, 4AF928822135673D005B6C3A /* ConcatKernel.metal in Sources */,
FCBCCC632122FCC000D94F7E /* TransposeKernel.swift in Sources */, FCBCCC632122FCC000D94F7E /* TransposeKernel.swift in Sources */,
...@@ -731,6 +720,7 @@ ...@@ -731,6 +720,7 @@
FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */, FC0E2DBA20EE3B8D009C1FAC /* ReluKernel.swift in Sources */,
4AA1EA862146625E00D0F791 /* BilinearInterpOp.swift in Sources */, 4AA1EA862146625E00D0F791 /* BilinearInterpOp.swift in Sources */,
FCBCCC6D2123073A00D94F7E /* BoxcoderKernel.swift in Sources */, FCBCCC6D2123073A00D94F7E /* BoxcoderKernel.swift in Sources */,
FCB40E5921E0DCAB0075EC91 /* FetchKernel.swift in Sources */,
FCBCCC69212306D300D94F7E /* ConcatKernel.swift in Sources */, FCBCCC69212306D300D94F7E /* ConcatKernel.swift in Sources */,
FCDDC6C8212FA3CA00E5EF74 /* ConvTransposeKernel.swift in Sources */, FCDDC6C8212FA3CA00E5EF74 /* ConvTransposeKernel.swift in Sources */,
FC82735920E3C04200BE430A /* OpCreator.swift in Sources */, FC82735920E3C04200BE430A /* OpCreator.swift in Sources */,
...@@ -742,20 +732,18 @@ ...@@ -742,20 +732,18 @@
FCE9D7B7214F869000B520C3 /* Net.swift in Sources */, FCE9D7B7214F869000B520C3 /* Net.swift in Sources */,
FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */, FC0E2DBE20EE460D009C1FAC /* BatchNormKernel.swift in Sources */,
FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */, FC039BAB20E11CBC0081E9F8 /* Operator.swift in Sources */,
C28FDF8421B7858F0054EFAC /* MobileNetCombined.swift in Sources */,
FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */, FCD04E6A20F319EC0007374F /* SoftmaxOp.swift in Sources */,
FC292C82214255BD00CF622F /* MobileNetSSD.swift in Sources */,
FCBCCC612122FBDF00D94F7E /* PriorBoxKernel.swift in Sources */, FCBCCC612122FBDF00D94F7E /* PriorBoxKernel.swift in Sources */,
FCBCCC5F2122FB3B00D94F7E /* PriorBoxOp.swift in Sources */, FCBCCC5F2122FB3B00D94F7E /* PriorBoxOp.swift in Sources */,
FC9D038220E2312E000F735A /* FetchOp.swift in Sources */, FC9D038220E2312E000F735A /* FetchOp.swift in Sources */,
FCA67B1721364EF000BD58AA /* ConvTransposeKernel.metal in Sources */, FCA67B1721364EF000BD58AA /* ConvTransposeKernel.metal in Sources */,
FC039BBD20E11CC20081E9F8 /* Program.swift in Sources */, FC039BBD20E11CC20081E9F8 /* Program.swift in Sources */,
FC2BFD5121DF8E0400C262B2 /* Scale.metal in Sources */,
FC039BA220E11CB70081E9F8 /* Loader.swift in Sources */, FC039BA220E11CB70081E9F8 /* Loader.swift in Sources */,
FCBCCC67212306B000D94F7E /* ConcatOp.swift in Sources */, FCBCCC67212306B000D94F7E /* ConcatOp.swift in Sources */,
FCD04E6C20F31A280007374F /* SoftmaxKernel.swift in Sources */, FCD04E6C20F31A280007374F /* SoftmaxKernel.swift in Sources */,
FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */, FCEB684A212F00DB00D2448E /* PreluKernel.metal in Sources */,
4AA1EAA02148DEEE00D0F791 /* ReshapeKernel.inc.metal in Sources */, 4AA1EAA02148DEEE00D0F791 /* ReshapeKernel.inc.metal in Sources */,
FC9A19E32148C31300CD9CBF /* MobilenetSSD_AR.swift in Sources */,
FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */, FCDDC6CF212FE14700E5EF74 /* PriorBoxKernel.metal in Sources */,
FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */, FC4CB74B20F12C30007C0C6D /* ProgramOptimize.swift in Sources */,
FCE3A1A92153DE5100C37CDE /* ConvAddAddPreluOp.swift in Sources */, FCE3A1A92153DE5100C37CDE /* ConvAddAddPreluOp.swift in Sources */,
...@@ -769,7 +757,9 @@ ...@@ -769,7 +757,9 @@
FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */, FC039BAD20E11CBC0081E9F8 /* ReluOp.swift in Sources */,
FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */, FCBCCC572122F41300D94F7E /* DwConvBNReluOp.swift in Sources */,
FC039BBE20E11CC20081E9F8 /* OpDesc.swift in Sources */, FC039BBE20E11CC20081E9F8 /* OpDesc.swift in Sources */,
FC9797C921D6101D00F2FD90 /* ResizeBilinearOp.swift in Sources */,
4AA1EA88214662BD00D0F791 /* BilinearInterpKernel.swift in Sources */, 4AA1EA88214662BD00D0F791 /* BilinearInterpKernel.swift in Sources */,
FC2BFD4621DF685F00C262B2 /* Scale.swift in Sources */,
FC039B9720E11C9A0081E9F8 /* Extensions.swift in Sources */, FC039B9720E11C9A0081E9F8 /* Extensions.swift in Sources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
...@@ -909,7 +899,7 @@ ...@@ -909,7 +899,7 @@
DYLIB_COMPATIBILITY_VERSION = 1; DYLIB_COMPATIBILITY_VERSION = 1;
DYLIB_CURRENT_VERSION = 1; DYLIB_CURRENT_VERSION = 1;
DYLIB_INSTALL_NAME_BASE = "@rpath"; DYLIB_INSTALL_NAME_BASE = "@rpath";
ENABLE_BITCODE = NO; ENABLE_BITCODE = YES;
INFOPLIST_FILE = "paddle-mobile/Info.plist"; INFOPLIST_FILE = "paddle-mobile/Info.plist";
INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
IPHONEOS_DEPLOYMENT_TARGET = 9.0; IPHONEOS_DEPLOYMENT_TARGET = 9.0;
...@@ -946,7 +936,7 @@ ...@@ -946,7 +936,7 @@
DYLIB_COMPATIBILITY_VERSION = 1; DYLIB_COMPATIBILITY_VERSION = 1;
DYLIB_CURRENT_VERSION = 1; DYLIB_CURRENT_VERSION = 1;
DYLIB_INSTALL_NAME_BASE = "@rpath"; DYLIB_INSTALL_NAME_BASE = "@rpath";
ENABLE_BITCODE = NO; ENABLE_BITCODE = YES;
INFOPLIST_FILE = "paddle-mobile/Info.plist"; INFOPLIST_FILE = "paddle-mobile/Info.plist";
INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks"; INSTALL_PATH = "$(LOCAL_LIBRARY_DIR)/Frameworks";
IPHONEOS_DEPLOYMENT_TARGET = 9.0; IPHONEOS_DEPLOYMENT_TARGET = 9.0;
......
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
LastUpgradeVersion = "1010"
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B6920E11C3C0081E9F8"
BuildableName = "paddle_mobile.framework"
BlueprintName = "paddle-mobile"
ReferencedContainer = "container:paddle-mobile.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
shouldUseLaunchSchemeArgsEnv = "YES">
<Testables>
</Testables>
<AdditionalOptions>
</AdditionalOptions>
</TestAction>
<LaunchAction
buildConfiguration = "Debug"
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
ignoresPersistentStateOnLaunch = "NO"
debugDocumentVersioning = "YES"
debugServiceExtension = "internal"
allowLocationSimulation = "YES">
<MacroExpansion>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B6920E11C3C0081E9F8"
BuildableName = "paddle_mobile.framework"
BlueprintName = "paddle-mobile"
ReferencedContainer = "container:paddle-mobile.xcodeproj">
</BuildableReference>
</MacroExpansion>
<AdditionalOptions>
</AdditionalOptions>
</LaunchAction>
<ProfileAction
buildConfiguration = "Release"
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
debugDocumentVersioning = "YES">
<MacroExpansion>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "FC039B6920E11C3C0081E9F8"
BuildableName = "paddle_mobile.framework"
BlueprintName = "paddle-mobile"
ReferencedContainer = "container:paddle-mobile.xcodeproj">
</BuildableReference>
</MacroExpansion>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
@objc public enum MetalLoadMode: Int {
case
LoadMetalInPaddleMobile = 1, // 使用 paddle-mobile 中的 metal 代码
LoadMetalInDefaultLib = 2, // 使用 main bundle 中的 metal 代码
LoadMetalInCustomMetalLib = 3 // 使用 metal 库文件
}
@objc public enum ComputePrecision: Int {
case
Float32 = 1,
Float16 = 2
}
@objc public class GlobalConfig: NSObject {
/// 单例
@objc public static let shared: GlobalConfig = GlobalConfig.init()
/// 运算精度, runner 生命周期中不可变
@objc public var computePrecision: ComputePrecision = .Float16
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Metal
import Foundation
/// 网络的基类, 参数已经给了默认值,请在子类实现中修改需要改的参数
@objc open class Net: NSObject {
/// 默认为0, 如果指定个数, 后边 except 个op不使用 GPU 运算, 中间结果会通过 fetchResult 传参过来
@objc public var except: Int = 0
/// 预处理 kernel, 如果输入图像需要预处理, 则指定预处理 kernel
@objc public var preprocessKernel: CusomKernel? = nil
// 以下四个参数为从内存中读取模型时用到的参数
/// 模型在内存中的指针
@objc public var modelPointer: UnsafeMutableRawPointer? = nil
/// 模型大小 单位: 字节
@objc public var modelSize: Int = 0
/// 权重参数在内存中的指针
@objc public var paramPointer: UnsafeMutableRawPointer? = nil
/// 权重大小 单位: 字节
@objc public var paramSize: Int = 0
// 以下两个为从文件中读取模型时用到的参数
/// 模型文件路径
@objc public var modelPath: String? = nil
/// 权重文件路径
@objc public var paramPath: String? = nil
/// 代表着 GPU 处理器
@objc public let device: MTLDevice
/// metal 代码加载方式 注意: 如果静态库只能使用 LoadMetalInDefaultLib LoadMetalInCustomMetalLib 进行 load metal 代码
@objc public var metalLoadMode: MetalLoadMode = .LoadMetalInPaddleMobile
/// 当 metalLoadMode 为 LoadMetalInCustomMetalLib 时, metal library 路径不能为空
@objc public var metalLibPath: String? = nil
/// 输入维度,按照 n h w c 方式传入
@objc public var inputDim: Dim = Dim.init(inDim: [])
@objc public init(device: MTLDevice, paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
self.paramPointer = paramPointer
self.paramSize = paramSize
self.modelPointer = modePointer
self.modelSize = modelSize
self.device = device
super.init()
}
@objc public init(device: MTLDevice) {
self.device = device
super.init()
}
@objc open func resultStr(res: ResultHolder) -> String {
fatalError()
}
@objc open func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder {
guard let inResPointer = paddleMobileRes.resultPointer else {
fatalError()
}
return ResultHolder.init(inResult: inResPointer, inCapacity: paddleMobileRes.capacity)
}
open func updateProgram(program: Program) {
}
}
...@@ -12,25 +12,22 @@ ...@@ -12,25 +12,22 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
import Metal
import MetalKit import MetalKit
import Foundation import Foundation
@objc public enum Platform: Int{ @objc public class ResultHolder: NSObject {
case CPU, GPU @objc public let result: UnsafeMutablePointer<Float32>
} @objc public let capacity: Int
class ScaleKernel: CusomKernel { init(inResult: UnsafeMutablePointer<Float32>, inCapacity: Int) {
init(device: MTLDevice, shape: Shape) { result = inResult
if computePrecision == .Float32 { capacity = inCapacity
super.init(device: device, inFunctionName: "scale", outputDim: shape, usePaddleMobileLib: false)
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "scale_half", outputDim: shape, usePaddleMobileLib: false)
} else {
fatalError(" unsupport ")
}
} }
@objc public func releasePointer() {
result.deinitialize(count: capacity)
result.deallocate()
}
} }
@objc public class Runner: NSObject { @objc public class Runner: NSObject {
...@@ -40,86 +37,77 @@ class ScaleKernel: CusomKernel { ...@@ -40,86 +37,77 @@ class ScaleKernel: CusomKernel {
var textureLoader: MTKTextureLoader? var textureLoader: MTKTextureLoader?
public let net: Net public let net: Net
let device: MTLDevice? let device: MTLDevice?
let platform: Platform
var cpuPaddleMobile: PaddleMobileCPU?
let numel: Int let numel: Int
let meansNumber: [NSNumber]
// dims num nchw /// 初始化函数
let dimsNum: [NSNumber] ///
/** /// - Parameters:
* inNet: 需要运行的网络 /// - inNet: 传入自定义的网络
* commandQueue: GPU 是需要传入 /// - commandQueue: commandQueue
* inPlatform: 需要使用的平台, GPU or CPU @objc public init(inNet: Net, commandQueue: MTLCommandQueue?) {
*/ guard inNet.inputDim.cout() == 4 else {
@objc public init(inNet: Net, commandQueue: MTLCommandQueue?, inPlatform: Platform) { fatalError(" input dim count must 4 ")
}
net = inNet net = inNet
queue = commandQueue queue = commandQueue
device = queue?.device device = queue?.device
platform = inPlatform
if let inDevice = device { if let inDevice = device {
textureLoader = MTKTextureLoader.init(device: inDevice) textureLoader = MTKTextureLoader.init(device: inDevice)
} }
if platform == .CPU { numel = net.inputDim.numel()
cpuPaddleMobile = PaddleMobileCPU.init()
}
numel = net.dim.n * net.dim.c * net.dim.h * net.dim.w
meansNumber = net.means.map { NSNumber.init(value: $0) }
dimsNum = [NSNumber.init(value: net.dim.n),
NSNumber.init(value: net.dim.c),
NSNumber.init(value: net.dim.h),
NSNumber.init(value: net.dim.w)]
} }
/** /// load 模型, 返回 true 可进行预测
* load 模型, 返回 true 可进行预测 ///
*/ /// - Returns: load 成功或失败
@objc public func load() -> Bool { @objc public func load() -> Bool {
if platform == .GPU {
guard let inDevice = device, let inQueue = queue else { guard let inDevice = device, let inQueue = queue else {
print(" paddle mobile gpu load error, need MTLCommandQueue") print(" paddle mobile gpu load error, need MTLCommandQueue")
return false return false
} }
let loader = Loader<Float32>.init() let loader = Loader<Float32>.init()
do { do {
// program = try loader.load(device: inDevice, paramPointer: net.paramPointer!, paramSize: net.paramSize,modePointer:net.modelPointer!,modelSize:net.modelSize)
program = try loader.load(device: inDevice, modelPath: net.modelPath, paraPath: net.paramPath) if let inParamPointer = net.paramPointer, let inModelPointer = net.modelPointer {
guard net.paramSize > 0 && net.modelSize > 0 else {
print(" load from memory param size or model size can't 0 ")
return false
}
program = try loader.load(device: inDevice, paramPointer: inParamPointer, paramSize: net.paramSize,modePointer:inModelPointer,modelSize:net.modelSize)
} else if let inModelPath = net.modelPath, let inParamPath = net.paramPath {
program = try loader.load(device: inDevice, modelPath: inModelPath, paraPath: inParamPath)
} else {
print(" model pointer or model file path need be specified")
return false
}
let initContext: InitContext = InitContext.init()
initContext.metalLoadMode = net.metalLoadMode
initContext.metalLibPath = net.metalLibPath
executor = try Executor<Float32>.init(inDevice: inDevice, inQueue: inQueue, inProgram: program!, initContext: initContext)
net.updateProgram(program: program!) net.updateProgram(program: program!)
executor = try Executor<Float32>.init(inDevice: inDevice, inQueue: inQueue, inProgram: program!)
} catch let error { } catch let error {
print(error) print(error)
return false return false
} }
} else {
return cpuPaddleMobile?.load(net.modelPath, andWeightsPath: net.paramPath) ?? false
}
return true return true
} }
@objc public func predict(inputPointer: UnsafeMutablePointer<Float32>, completion: @escaping ( _ success: Bool, _ result: PaddleMobileCPUResult?) -> Void) { /// 预测
///
guard let res = cpuPaddleMobile?.predictInput(inputPointer, dim: dimsNum) else { /// - Parameters:
completion(false, nil) /// - texture: 输入 texture 需要使用 getTexture 获得
return /// - completion: 结果回调, 当 success 为 true 时 result 不为 nil
}
completion(true, res)
}
/**
* GPU 版本 predict
* texture: 需要预测的 texture 需要做过预处理
* ( _ success: Bool, _ time:TimeInterval, _ resultArray: [Float32]) -> Void : 回调闭包, 三个参数分别为: 是否成功, 预测耗时, 结果数组
*/
@objc public func predict(texture: MTLTexture, completion: @escaping ( _ success: Bool, _ result: ResultHolder?) -> Void) { @objc public func predict(texture: MTLTexture, completion: @escaping ( _ success: Bool, _ result: ResultHolder?) -> Void) {
do { do {
try self.executor?.predict(input: texture, dim: [self.net.dim.n, self.net.dim.h, self.net.dim.w, self.net.dim.c], completionHandle: { [weak self] (res) in try self.executor?.predict(input: texture, dim: self.net.inputDim, completionHandle: { [weak self] (res) in
guard let SSelf = self else { guard let SSelf = self else {
fatalError( " self nil " ) fatalError( " self nil " )
} }
let result = SSelf.net.fetchResult(paddleMobileRes: res) let result = SSelf.net.fetchResult(paddleMobileRes: res)
completion(true, result) completion(true, result)
}, preProcessKernle: self.net.preprocessKernel, except: self.net.except) }, preProcessKernle: self.net.preprocessKernel, except: self.net.except)
} catch let error { } catch let error {
print(error) print(error)
completion(false, nil) completion(false, nil)
...@@ -127,59 +115,64 @@ class ScaleKernel: CusomKernel { ...@@ -127,59 +115,64 @@ class ScaleKernel: CusomKernel {
} }
} }
/** /// 清理内存, 调用此函数后, 不能再使用, 需重新 load
* CPU GPU 通用版本 predict
* cgImage: 需要预测的图片
* ( _ success: Bool, _ time:TimeInterval, _ resultArray: [Float32]) -> Void : 回调闭包, 三个参数分别为: 是否成功, 预测耗时, 结果数组
*/
// @objc public func predict(cgImage: CGImage, completion: @escaping ( _ success: Bool, _ resultArray: [Float32]) -> Void) {
// if platform == .GPU {
// getTexture(image: cgImage) { [weak self] (texture) in
// guard let SSelf = self else {
// fatalError( "" )
// }
// SSelf.predict(texture: texture, completion: completion)
// }
// } else if platform == .CPU {
// let input = preproccess(image: cgImage)
// predict(inputPointer: input, completion: completion)
// input.deinitialize(count: numel)
// input.deallocate()
// }
// }
/*
* 清理内存, 调用此函数后, 不能再使用, 需重新 load
*/
@objc public func clear() { @objc public func clear() {
if platform == .GPU { executor?.clear()
executor?.clear() executor = nil
executor = nil program = nil
program = nil
} else if platform == .CPU {
cpuPaddleMobile?.clear()
}
} }
@objc public func preproccess(image: CGImage) -> UnsafeMutablePointer<Float> { /// 获取 texture, 对 texture 进行预处理, 预测时使用
let output = UnsafeMutablePointer<Float>.allocate(capacity: numel) ///
let means = net.means.map { NSNumber.init(value: $0) } /// - Parameters:
let dims = [NSNumber.init(value: net.dim.n), /// - image: 输入图像
NSNumber.init(value: net.dim.c), /// - getTexture: 获取 texture 回调
NSNumber.init(value: net.dim.h),
NSNumber.init(value: net.dim.w)]
cpuPaddleMobile?.preprocess(image, output: output, means: means, scale: net.scale, dim: dims)
return output
}
/*
* 获取 texture, 对 texture 进行预处理, GPU 预测时使用
*/
@objc public func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) { @objc public func getTexture(image: CGImage, getTexture: @escaping (MTLTexture) -> Void) {
let texture = try? textureLoader?.newTexture(cgImage: image, options: [:]) ?! " texture loader error" let texture = try? textureLoader?.newTexture(cgImage: image, options: [:]) ?! " texture loader error"
scaleTexture(input: texture!, complete: getTexture) scaleTexture(input: texture!, complete: getTexture)
} }
/// 通过 buffer 获取 texture, 内部会使用GPU进行转换操作
///
/// - Parameters:
/// - inBuffer: 输入buffer
/// - getTexture: 结果回调
@objc public func getTexture(inBuffer: MTLBuffer, getTexture: @escaping (MTLTexture) -> Void) {
guard let inQueue = queue, let inDevice = device else {
fatalError( " queue or devcie nil " )
}
guard let buffer = inQueue.makeCommandBuffer() else {
fatalError( " make buffer error" )
}
let bufferToTextureKernel = BufferToTextureKernel.init(device: inDevice, outputDim: Shape.init(inWidth: net.inputDim[2], inHeight: net.inputDim[1], inChannel: net.inputDim[3]), metalLoadMode: net.metalLoadMode, metalLibPath: net.metalLibPath)
do {
try bufferToTextureKernel.compute(inputBuffer: inBuffer, commandBuffer: buffer)
} catch {
fatalError(" bufferToTextureKernel error ")
}
buffer.addCompletedHandler { (buffer) in
getTexture(bufferToTextureKernel.outputTexture)
}
buffer.commit()
}
/// 更新输入维度, 针对可变长输入模型
///
/// - Parameter inDim: 输入维度
@objc public func updateInputDim(inDim: Dim) {
if net.inputDim != inDim {
guard let inProgram = program else {
fatalError(" need load first ")
}
net.inputDim = inDim
net.updateProgram(program: inProgram)
}
}
public func scaleTexture(input: MTLTexture , complete: @escaping (MTLTexture) -> Void) { public func scaleTexture(input: MTLTexture , complete: @escaping (MTLTexture) -> Void) {
guard let inQueue = queue, let inDevice = device else { guard let inQueue = queue, let inDevice = device else {
...@@ -190,7 +183,7 @@ class ScaleKernel: CusomKernel { ...@@ -190,7 +183,7 @@ class ScaleKernel: CusomKernel {
fatalError( " make buffer error" ) fatalError( " make buffer error" )
} }
let scaleKernel = ScaleKernel.init(device: inDevice, shape: CusomKernel.Shape.init(inWidth: net.dim.w, inHeight: net.dim.h, inChannel: 3)) let scaleKernel = ScaleKernel.init(device: inDevice, shape: Shape.init(inWidth: net.inputDim[2], inHeight: net.inputDim[1], inChannel: 3), metalLoadMode: net.metalLoadMode, metalLibPath: net.metalLibPath)
do { do {
try scaleKernel.compute(inputTexuture: input, commandBuffer: buffer) try scaleKernel.compute(inputTexuture: input, commandBuffer: buffer)
...@@ -205,5 +198,3 @@ class ScaleKernel: CusomKernel { ...@@ -205,5 +198,3 @@ class ScaleKernel: CusomKernel {
buffer.commit() buffer.commit()
} }
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#import <CoreImage/CoreImage.h>
#import <Foundation/Foundation.h>
@interface PaddleMobileCPUResult: NSObject
@property (assign, nonatomic, readonly) float *output;
@property (assign, nonatomic, readonly) int outputSize;
-(void)releaseOutput;
@end
@interface PaddleMobileCPU : NSObject
/*
创建对象
*/
- (instancetype)init;
/*
load 模型, 开辟内存
*/
- (BOOL)load:(NSString *)modelPath andWeightsPath:(NSString *)weighsPath;
/*
加载散开形式的模型, 需传入模型的目录
*/
- (BOOL)load:(NSString *)modelAndWeightPath;
/*
* 从内存中加载模型
* */
- (BOOL)LoadCombinedMemory:(size_t)modelLen
andModelBuf:(const uint8_t *)modelBuf
andModelParamsLen:(size_t)combinedParamsLen
andCombinedParamsBuf:(const uint8_t *)combinedParamsBuf;
/*
* 对图像进行预处理, 需要外部开辟 output 内存, 外部释放 output 内存
* */
-(void)preprocess:(CGImageRef)image
output:(float *)output
means:(NSArray<NSNumber *> *)means
scale:(float)scale
dim:(NSArray<NSNumber *> *)dim;
/*
* 预测预处理后的数据, 返回结果使用结束需要调用其 realseOutput 函数进行释放
* */
- (PaddleMobileCPUResult *)predictInput:(float *)input
dim:(NSArray<NSNumber *> *)dim;
/*
进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict
*/
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale;
/*
进行预测, 默认 means 为 0, scale 为 1.0
*/
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim;
/*
清理内存
*/
- (void)clear;
@end
//
// MobileNetCombined.swift
// paddle-mobile
//
// Created by Xiao,Haichun on 2018/12/5.
// Copyright © 2018 orange. All rights reserved.
//
import Foundation
public class MobileNetCombined: Net {
@objc public override init(device: MTLDevice) {
super.init(device: device)
means = [0, 0, 0]
scale = 1
except = 0
modelPath = Bundle.main.path(forResource: "combined_mobilenet_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "combined_mobilenet_params", ofType: nil) ?! "para null"
modelDir = ""
//preprocessKernel = GenetPreProccess.init(device: device)
dim = (n: 1, h: 416, w: 416, c: 3)
}
@objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize)
means = [0, 0, 0]
scale = 1
except = 0
modelPath = ""
paramPath = ""
modelDir = ""
//preprocessKernel = GenetPreProccess.init(device: device)
dim = (n: 1, h: 416, w: 416, c: 3)
}
// class GenetPreProccess: CusomKernel {
// init(device: MTLDevice) {
// let s = CusomKernel.Shape.init(inWidth: 128, inHeight: 128, inChannel: 3)
// super.init(device: device, inFunctionName: "genet_preprocess", outputDim: s, usePaddleMobileLib: false)
// }
// }
override public func resultStr(res: ResultHolder) -> String {
// fatalError()
return " \(res.result![0]) ... "
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
import Metal
public class ResultHolder: NSObject {
@objc public let result: UnsafeMutablePointer<Float32>?
@objc public let capacity: Int
init(inResult: UnsafeMutablePointer<Float32>?, inCapacity: Int) {
result = inResult
capacity = inCapacity
}
@objc public func releasePointer() {
result?.deinitialize(count: capacity)
result?.deallocate()
}
}
public class Net: NSObject {
var except: Int = 0
var means: [Float] = []
var scale: Float = 0.0
var dim: (n: Int, h: Int, w: Int, c: Int) = (n: 0, h: 0, w: 0, c: 0)
var preprocessKernel: CusomKernel? = nil
var paramPointer: UnsafeMutableRawPointer? = nil
var paramSize: Int = 0
var modelPointer: UnsafeMutableRawPointer? = nil
var modelSize: Int = 0
var modelPath: String = ""
var paramPath: String = ""
var modelDir: String = ""
@objc public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
self.paramPointer = paramPointer
self.paramSize = paramSize
self.modelPointer = modePointer
self.modelSize = modelSize
super.init()
}
public func resultStr(res: ResultHolder) -> String {
fatalError()
}
func fetchResult(paddleMobileRes: GPUResultHolder) -> ResultHolder {
return ResultHolder.init(inResult: paddleMobileRes.resultPointer, inCapacity: paddleMobileRes.capacity)
}
@objc public init(device: MTLDevice) {
super.init()
}
func updateProgram(program: Program) {
}
}
...@@ -110,9 +110,27 @@ extension Array { ...@@ -110,9 +110,27 @@ extension Array {
return newArray return newArray
} }
} }
public static func floatArrWithBuffer(floatArrBuffer: UnsafeMutablePointer<Float32>, count: Int) -> [Float32] {
var arr: [Float32] = []
for i in 0..<count {
arr.append(floatArrBuffer[i])
}
return arr
}
}
extension UnsafeMutablePointer {
public func floatArr(count: Int) -> [Pointee]{
var arr: [Pointee] = []
for i in 0..<count {
arr.append(self[i])
}
return arr
}
} }
extension String{ extension String {
func cStr() -> UnsafePointer<Int8>? { func cStr() -> UnsafePointer<Int8>? {
return (self as NSString).utf8String return (self as NSString).utf8String
} }
......
...@@ -18,6 +18,7 @@ import CoreMedia ...@@ -18,6 +18,7 @@ import CoreMedia
fileprivate var defaultMetalLibrary: MTLLibrary? fileprivate var defaultMetalLibrary: MTLLibrary?
fileprivate var paddleMobileMetalLibrary: MTLLibrary? fileprivate var paddleMobileMetalLibrary: MTLLibrary?
fileprivate var customMetalLibrary: MTLLibrary?
extension MTLDevice { extension MTLDevice {
func defaultLibrary() -> MTLLibrary { func defaultLibrary() -> MTLLibrary {
...@@ -31,6 +32,22 @@ extension MTLDevice { ...@@ -31,6 +32,22 @@ extension MTLDevice {
} }
} }
func customLibrary(metalLibPath: String) -> MTLLibrary {
if customMetalLibrary == nil {
do {
customMetalLibrary = try makeLibrary(filepath: metalLibPath)
} catch let error {
fatalError("\(error)")
}
}
if let inMetalLib = customMetalLibrary {
return inMetalLib
} else {
fatalError(" customlib is nil ")
}
}
func paddleMobileLibrary() -> MTLLibrary { func paddleMobileLibrary() -> MTLLibrary {
if paddleMobileMetalLibrary == nil { if paddleMobileMetalLibrary == nil {
guard let path = Bundle.init(for: Kernel.self).path(forResource: "default", ofType: "metallib") else { guard let path = Bundle.init(for: Kernel.self).path(forResource: "default", ofType: "metallib") else {
...@@ -50,8 +67,19 @@ extension MTLDevice { ...@@ -50,8 +67,19 @@ extension MTLDevice {
} }
} }
func pipeLine(funcName: String, inPaddleMobileLib: Bool = true) -> MTLComputePipelineState { func pipeLine(funcName: String, metalLoadMode: MetalLoadMode, metalLibPath: String?) -> MTLComputePipelineState {
let useLib = inPaddleMobileLib ? paddleMobileLibrary() : defaultLibrary() let useLib: MTLLibrary
switch metalLoadMode {
case .LoadMetalInDefaultLib:
useLib = defaultLibrary()
case .LoadMetalInPaddleMobile:
useLib = paddleMobileLibrary()
case .LoadMetalInCustomMetalLib:
useLib = customLibrary(metalLibPath: metalLibPath ?! " can't be nil ")
default:
fatalError()
}
guard let function = useLib.makeFunction(name: funcName) else { guard let function = useLib.makeFunction(name: funcName) else {
fatalError(" function " + funcName + " not found") fatalError(" function " + funcName + " not found")
} }
...@@ -501,7 +529,7 @@ public extension MTLTexture { ...@@ -501,7 +529,7 @@ public extension MTLTexture {
} else { } else {
fatalError(" 目前还不支持其他类型 ") fatalError(" 目前还不支持其他类型 ")
} }
print(textureArray.count)
var output: [Float32] = [] var output: [Float32] = []
for s in 0..<arrayLength { for s in 0..<arrayLength {
for c in 0..<4{ for c in 0..<4{
......
...@@ -324,9 +324,10 @@ public class PaddleMobileUnitTest { ...@@ -324,9 +324,10 @@ public class PaddleMobileUnitTest {
let param = ConvAddBatchNormReluTestParam.init(inInputTexture: inputeTexture, inOutputTexture: outputTexture, inMetalParam: metalParam, inFilterBuffer: filterBuffer, inBiaseBuffer: biaseBuffer, inNewScaleBuffer: newScalueBuffer, inNewBiaseBuffer: newBiaseBuffer, inFilterSize: filterSize) let param = ConvAddBatchNormReluTestParam.init(inInputTexture: inputeTexture, inOutputTexture: outputTexture, inMetalParam: metalParam, inFilterBuffer: filterBuffer, inBiaseBuffer: biaseBuffer, inNewScaleBuffer: newScalueBuffer, inNewBiaseBuffer: newBiaseBuffer, inFilterSize: filterSize)
let initContext = InitContext.init()
initContext.metalLoadMode = .LoadMetalInDefaultLib
let convAddBnReluKernel = ConvAddBatchNormReluKernel<Float32>.init(device: device, testParam: param, initContext: initContext)
let convAddBnReluKernel = ConvAddBatchNormReluKernel<Float32>.init(device: device, testParam: param)
convAddBnReluKernel.test(commandBuffer: buffer, param: param) convAddBnReluKernel.test(commandBuffer: buffer, param: param)
......
...@@ -250,39 +250,40 @@ extension InputTexture: Variant { ...@@ -250,39 +250,40 @@ extension InputTexture: Variant {
} }
extension MTLTexture where Self: Variant { extension MTLTexture where Self: Variant {
} }
class FetchHolder: Variant { public class FetchHolder: Variant {
var resultBuffer: MTLBuffer? var resultBuffer: MTLBuffer?
var dim: [Int] public var dim: Dim
var capacity: Int public var capacity: Int
public var paddedCapacity: Int
init(inCapacity: Int, inDim: [Int]) { init(inPaddedCapacity: Int, inDim: Dim) {
capacity = inCapacity paddedCapacity = inPaddedCapacity
capacity = inDim.numel()
dim = inDim dim = inDim
} }
func initBuffer(device: MTLDevice) { public func initBuffer(device: MTLDevice) {
resultBuffer = device.makeBuffer(length: capacity * 4, options: []) resultBuffer = device.makeBuffer(length: paddedCapacity * 4, options: [])
} }
var result: UnsafeMutablePointer<Float32> { var result: UnsafeMutablePointer<Float32> {
guard let inResultBuffer = resultBuffer else { guard let inResultBuffer = resultBuffer else {
fatalError() fatalError()
} }
return inResultBuffer.contents().bindMemory(to: Float32.self, capacity: capacity) return inResultBuffer.contents().bindMemory(to: Float32.self, capacity: paddedCapacity)
} }
} }
extension FetchHolder: CustomStringConvertible, CustomDebugStringConvertible { extension FetchHolder: CustomStringConvertible, CustomDebugStringConvertible {
var description: String { public var description: String {
fatalError() fatalError()
// return "\(result)" // return "\(result)"
} }
var debugDescription: String { public var debugDescription: String {
fatalError() fatalError()
// return "\(result)" // return "\(result)"
} }
......
...@@ -14,39 +14,42 @@ ...@@ -14,39 +14,42 @@
import Foundation import Foundation
public struct Dim { @objc public class Dim: NSObject {
public init(inDim: [Int]) { private(set) var dims: [Int]
dims = inDim
} @objc public init(inDim: [Int]) {
dims = inDim
mutating func swapeDimAt(index1: Int, index2: Int) { }
dims.swapAt(index1, index2)
} public func cout() -> Int {
return dims.count
func cout() -> Int { }
return dims.count
} public func numel() -> Int {
return dims.reduce(1) { $0 * $1 }
func numel() -> Int { }
return dims.reduce(1) { $0 * $1 }
} public static func ==(left: Dim, right: Dim) -> Bool {
return left.dims == right.dims;
public static func ==(left: Dim, right: Dim) -> Bool { }
return left.dims == right.dims;
} public static func !=(left: Dim, right: Dim) -> Bool {
return left.dims != right.dims;
public subscript(index: Int) -> Int { }
return dims[index];
} public subscript(index: Int) -> Int {
return dims[index];
private(set) var dims: [Int] }
private init(){
fatalError() public override var description: String {
} return "\(dims)"
} }
extension Dim: CustomStringConvertible { func swapeDimAt(index1: Int, index2: Int) {
public var description: String { dims.swapAt(index1, index2)
return "\(dims)" }
}
private override init(){
fatalError()
}
} }
...@@ -15,19 +15,16 @@ ...@@ -15,19 +15,16 @@
import Foundation import Foundation
let testTo = 81 let testTo = 5
var isTest = false var isTest = false
let computePrecision: ComputePrecision = .Float16 @objc public class GPUResultHolder: NSObject{
@objc public let dim: [Int]
public class GPUResultHolder { @objc public let capacity: Int
public let dim: [Int] @objc public var resultPointer: UnsafeMutablePointer<Float32>?
public let capacity: Int @objc public var intermediateResults: [String : [MTLBuffer]]?
public var resultPointer: UnsafeMutablePointer<Float32>? public init(inDim: [Int], inPointer: UnsafeMutablePointer<Float32>?, inCapacity: Int, inIntermediateResults: [String : [MTLBuffer]]? = nil) {
public var intermediateResults: [String : [Variant]]?
public let elapsedTime: Double
public init(inDim: [Int], inPointer: UnsafeMutablePointer<Float32>?, inCapacity: Int, inElapsedTime: Double, inIntermediateResults: [String : [Variant]]? = nil) {
dim = inDim dim = inDim
capacity = inCapacity capacity = inCapacity
...@@ -36,64 +33,34 @@ public class GPUResultHolder { ...@@ -36,64 +33,34 @@ public class GPUResultHolder {
resultPointer?.initialize(from: inInPointer, count: inCapacity) resultPointer?.initialize(from: inInPointer, count: inCapacity)
} }
elapsedTime = inElapsedTime
intermediateResults = inIntermediateResults intermediateResults = inIntermediateResults
} }
} public override var description: String {
extension GPUResultHolder: CustomDebugStringConvertible, CustomStringConvertible {
public var debugDescription: String {
// var str = ""
// str += "Dim: \(dim) \n value:[ "
// if resultArr.count < 20 {
// for d in resultArr {
// str += " \(d) "
// }
// } else {
// for d in stride(from: 0, to: resultArr.count, by: resultArr.count/20) {
// str += " \(resultArr[d]) "
// }
// }
// str += " ]"
// return str
fatalError() fatalError()
} }
public var description: String {
return debugDescription
}
} }
public class Executor<P: PrecisionType> { public class Executor<P: PrecisionType> {
var ops: [Runable & InferShaperable] = [] var ops: [Runable & InferShaperable] = []
var preInputDim: Dim = Dim.init(inDim: [])
let program: Program let program: Program
let device: MTLDevice let device: MTLDevice
let inflightSemaphore: DispatchSemaphore let inflightSemaphore: DispatchSemaphore
let queue: MTLCommandQueue let queue: MTLCommandQueue
public init(inDevice:MTLDevice, inQueue: MTLCommandQueue, inProgram: Program) throws { init(inDevice:MTLDevice, inQueue: MTLCommandQueue, inProgram: Program, initContext: InitContext) throws {
self.inflightSemaphore = DispatchSemaphore(value: 3) self.inflightSemaphore = DispatchSemaphore(value: 1)
program = inProgram program = inProgram
device = inDevice device = inDevice
queue = inQueue queue = inQueue
// print("before for ")
//print(program.scope.vars["fea_pyramid1_mbox_conf_flat.Flatten.output.1.tmp_0"])
for block in inProgram.programDesc.blocks { for block in inProgram.programDesc.blocks {
//block.ops.count //block.ops.count
for i in 0..<block.ops.count { for i in 0..<block.ops.count {
let opDesc = block.ops[i] let opDesc = block.ops[i]
do { do {
// print("in for i \(i): ") let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: opDesc, scope: inProgram.scope, initContext: initContext)
// print(program.scope.vars["fea_pyramid1_mbox_conf_flat.Flatten.output.1.tmp_0"])
//
// if i == 56 {
// print(program.scope.vars["fea_pyramid1_mbox_conf_flat.Flatten.output.1.tmp_0"])
//
// }
let op = try OpCreator<P>.shared.creat(device: inDevice, opDesc: opDesc, scope: inProgram.scope)
ops.append(op) ops.append(op)
} catch let error { } catch let error {
throw error throw error
...@@ -102,11 +69,12 @@ public class Executor<P: PrecisionType> { ...@@ -102,11 +69,12 @@ public class Executor<P: PrecisionType> {
} }
} }
public func predict(input: MTLTexture, dim: [Int], completionHandle: @escaping (GPUResultHolder) -> Void, preProcessKernle: CusomKernel? = nil, except: Int = 0) throws { public func predict(input: MTLTexture, dim: Dim, completionHandle: @escaping (GPUResultHolder) -> Void, preProcessKernle: CusomKernel? = nil, except: Int = 0) throws {
inflightSemaphore.wait()
guard let buffer = queue.makeCommandBuffer() else { guard let buffer = queue.makeCommandBuffer() else {
throw PaddleMobileError.predictError(message: "CommandBuffer is nil") throw PaddleMobileError.predictError(message: "CommandBuffer is nil")
} }
inflightSemaphore.wait()
let resInput: MTLTexture let resInput: MTLTexture
if let inPre = preProcessKernle { if let inPre = preProcessKernle {
...@@ -120,8 +88,7 @@ public class Executor<P: PrecisionType> { ...@@ -120,8 +88,7 @@ public class Executor<P: PrecisionType> {
resInput = input resInput = input
} }
let beforeDate = Date.init() let inputTexture = InputTexture.init(inMTLTexture: resInput, inExpectDim: dim)
let inputTexture = InputTexture.init(inMTLTexture: resInput, inExpectDim: Dim.init(inDim: dim))
program.scope.setInput(input: inputTexture) program.scope.setInput(input: inputTexture)
//(ops.count - except) //(ops.count - except)
for i in 0..<(ops.count - except) { for i in 0..<(ops.count - except) {
...@@ -133,64 +100,45 @@ public class Executor<P: PrecisionType> { ...@@ -133,64 +100,45 @@ public class Executor<P: PrecisionType> {
} }
} }
var outputTextures: [String : [Variant]]? var outputTextures: [String : [MTLBuffer]]?
if except > 0 { if except > 0 {
ops[ops.count - except].computeMiddleResult(device: device, buffer: buffer) ops[ops.count - except].computeMiddleResult(device: device, buffer: buffer)
outputTextures = ops[ops.count - except].inputVariant() outputTextures = ops[ops.count - except].inputVariant()
} }
buffer.addCompletedHandler { [weak self] (commandbuffer) in buffer.addCompletedHandler { [weak self] (commandbuffer) in
// let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2]))
// print(inputArr.strideArray())
//
//// print(dim)
// writeToLibrary(fileName: "test_image_ssd_ar", array: inputArr)
// print(" write done ")
// print("write to library done")
// return
// print(inputArr)
//
// let stridableInput: [(index: Int, value: Float)] = input.stridableFloatArray()
// print(stridableInput)
//
// let _: Flo? = input.logDesc(header: "input: ", stridable: true)
// for i in 0..<self!.ops.count {
// let op = self!.ops[i]
// print(" 第 \(i) 个 op: ")
// op.delogOutput()
// }
// return;
// self!.ops[testTo - 2].delogOutput()
// self!.ops[testTo - 1].delogOutput()
// self!.ops[5].delogOutput()
// return
guard let SSelf = self else { guard let SSelf = self else {
// return
fatalError() fatalError()
} }
//将输入写进文件
/*
let inputArr = resInput.toTensor(dim: (n: dim[0], c: dim[3], h: dim[1], w: dim[2]))
print(dim)
writeToLibrary(fileName: "test_image_super", array: inputArr)
print(" write done ")
return
*/
/* 输出 op 计算结果
for op in SSelf.ops {
op.delogOutput()
}
*/
let afterDate = Date.init()
var resultHolder: GPUResultHolder var resultHolder: GPUResultHolder
if except > 0 { if except > 0 {
resultHolder = GPUResultHolder.init(inDim: [], inPointer: nil, inCapacity: 0, inElapsedTime: afterDate.timeIntervalSince(beforeDate), inIntermediateResults: outputTextures) resultHolder = GPUResultHolder.init(inDim: [], inPointer: nil, inCapacity: 0, inIntermediateResults: outputTextures)
} else { } else {
let outputVar: Variant = SSelf.program.scope.output()! let outputVar: Variant = SSelf.program.scope.output()!
let output: FetchHolder = outputVar as! FetchHolder let output: FetchHolder = outputVar as! FetchHolder
// let beforeToTensorDate = Date.init() resultHolder = GPUResultHolder.init(inDim: output.dim.dims, inPointer: output.result, inCapacity: output.capacity)
resultHolder = GPUResultHolder.init(inDim: output.dim, inPointer: output.result, inCapacity: output.capacity, inElapsedTime: afterDate.timeIntervalSince(beforeDate))
// let timeToTensor = Date.init().timeIntervalSince(beforeToTensorDate)
// print(timeToTensor)
} }
completionHandle(resultHolder) completionHandle(resultHolder)
SSelf.inflightSemaphore.signal() SSelf.inflightSemaphore.signal()
} }
buffer.commit() buffer.commit()
} }
......
...@@ -150,6 +150,9 @@ public class Loader<P: PrecisionType> { ...@@ -150,6 +150,9 @@ public class Loader<P: PrecisionType> {
let originProgramDesc = ProgramDesc.init(protoProgram: protoProgram) let originProgramDesc = ProgramDesc.init(protoProgram: protoProgram)
let programDesc = ProgramOptimize<P>.init().optimize(originProgramDesc: originProgramDesc) let programDesc = ProgramOptimize<P>.init().optimize(originProgramDesc: originProgramDesc)
// let programDesc = ProgramDesc.init(protoProgram: protoProgram)
print(programDesc) print(programDesc)
guard programDesc.blocks.count > 0 else { guard programDesc.blocks.count > 0 else {
...@@ -210,7 +213,7 @@ public class Loader<P: PrecisionType> { ...@@ -210,7 +213,7 @@ public class Loader<P: PrecisionType> {
scope[varDesc.name] = tensor scope[varDesc.name] = tensor
} else { } else {
let dim = Dim.init(inDim: tensorDesc.dims) let dim = Dim.init(inDim: tensorDesc.dims)
scope[varDesc.name] = Texture<P>.init(device: device, inDim: dim) scope[varDesc.name] = Texture.init(device: device, inDim: dim)
} }
} else { } else {
if varDesc.name == fetchKey { if varDesc.name == fetchKey {
......
...@@ -28,9 +28,7 @@ extension Tensorial { ...@@ -28,9 +28,7 @@ extension Tensorial {
} }
} }
public enum ComputePrecision {
case Float32, Float16
}
class Tensor<P: PrecisionType>: Tensorial { class Tensor<P: PrecisionType>: Tensorial {
...@@ -97,7 +95,7 @@ class Tensor<P: PrecisionType>: Tensorial { ...@@ -97,7 +95,7 @@ class Tensor<P: PrecisionType>: Tensorial {
func initBuffer(device: MTLDevice, precision: ComputePrecision = .Float16, convertToNHWC: Bool = true, withTranspose: Bool = false) { func initBuffer(device: MTLDevice, precision: ComputePrecision = .Float16, padWhenOneC: Bool = false, convertToNHWC: Bool = true, withTranspose: Bool = false) {
if convertToNHWC { if convertToNHWC {
// print(layout) // print(layout)
convert(to: DataLayout.NHWC()) convert(to: DataLayout.NHWC())
...@@ -145,7 +143,7 @@ class Tensor<P: PrecisionType>: Tensorial { ...@@ -145,7 +143,7 @@ class Tensor<P: PrecisionType>: Tensorial {
case .Float16: case .Float16:
float32ToFloat16(input: floatPointer, output: buffer.contents(), count: count) float32ToFloat16(input: floatPointer, output: buffer.contents(), count: count)
} }
} else if C == 1 { } else if C == 1 && !padWhenOneC {
buffer = device.makeBuffer(length: numel() * precisionSize) buffer = device.makeBuffer(length: numel() * precisionSize)
switch precision { switch precision {
case .Float32: case .Float32:
...@@ -238,10 +236,32 @@ class Tensor<P: PrecisionType>: Tensorial { ...@@ -238,10 +236,32 @@ class Tensor<P: PrecisionType>: Tensorial {
data.release() data.release()
} }
var n: Int {
get {
if dim.cout() == 4 {
if layout == DataLayout.NCHW() {
return dim[0]
} else if layout == DataLayout.NHWC() {
return dim[0]
} else {
fatalError(" unsupport ")
}
} else {
fatalError()
}
}
}
var width: Int { var width: Int {
get { get {
if dim.cout() == 4 { if dim.cout() == 4 {
return dim[1] if layout == DataLayout.NHWC() {
return dim[2]
} else if layout == DataLayout.NCHW() {
return dim[3]
} else {
fatalError(" unsupport ")
}
} else { } else {
fatalError() fatalError()
} }
...@@ -251,7 +271,13 @@ class Tensor<P: PrecisionType>: Tensorial { ...@@ -251,7 +271,13 @@ class Tensor<P: PrecisionType>: Tensorial {
var height: Int { var height: Int {
get { get {
if dim.cout() == 4 { if dim.cout() == 4 {
return dim[2] if layout == DataLayout.NHWC() {
return dim[1]
} else if layout == DataLayout.NCHW() {
return dim[2]
} else {
fatalError(" unsupport ")
}
} else { } else {
fatalError() fatalError()
} }
...@@ -261,7 +287,13 @@ class Tensor<P: PrecisionType>: Tensorial { ...@@ -261,7 +287,13 @@ class Tensor<P: PrecisionType>: Tensorial {
var channel: Int { var channel: Int {
get { get {
if dim.cout() == 4 { if dim.cout() == 4 {
return dim[3] if layout == DataLayout.NHWC() {
return dim[3]
} else if layout == DataLayout.NCHW() {
return dim[1]
} else {
fatalError(" unsupport ")
}
} else { } else {
fatalError() fatalError()
} }
......
...@@ -68,16 +68,20 @@ extension InputTexture { ...@@ -68,16 +68,20 @@ extension InputTexture {
.height = 1 .height = 1
.len = 1 .len = 1
*/ */
public class Texture: Tensorial {
public var dim: Dim
public class Texture<P: PrecisionType>: Tensorial {
var dim: Dim
public var tensorDim: Dim public var tensorDim: Dim
/// tensor dim pad to four
public var padToFourDim: Dim public var padToFourDim: Dim
private var textureDesc: MTLTextureDescriptor! private var textureDesc: MTLTextureDescriptor!
public var metalTexture: MTLTexture! public var metalTexture: MTLTexture!
var transpose: [Int] = [0, 1, 2, 3] var transpose: [Int] = [0, 1, 2, 3]
func elementCount() -> Int {
return metalTexture.width * metalTexture.height * metalTexture.arrayLength * 4
}
func toTensor() -> [Float32] { func toTensor() -> [Float32] {
guard padToFourDim.cout() == 4 else { guard padToFourDim.cout() == 4 else {
fatalError("- not support -") fatalError("- not support -")
...@@ -92,15 +96,15 @@ public class Texture<P: PrecisionType>: Tensorial { ...@@ -92,15 +96,15 @@ public class Texture<P: PrecisionType>: Tensorial {
return metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3])) return metalTexture.realNHWC(dim: (n: padToFourDim[0], h: padToFourDim[1], w: padToFourDim[2], c: padToFourDim[3]))
} }
func initTexture(device: MTLDevice, inTranspose: [Int] = [0, 1, 2, 3], computePrecision: ComputePrecision = .Float16) { public func initTexture(device: MTLDevice, inTranspose: [Int] = [0, 1, 2, 3], computePrecision: ComputePrecision = .Float16) {
transpose = inTranspose transpose = inTranspose
for i in 0..<(4 - tensorDim.cout()) { for i in 0..<(4 - tensorDim.cout()) {
if i != inTranspose[i] { if i != inTranspose[i] {
fatalError() fatalError()
} }
} }
let newDim = transpose.map { padToFourDim[$0] }
let newDim = transpose.map { padToFourDim[$0] }
let newLayout = transpose.map { layout.layoutWithDim[$0] } let newLayout = transpose.map { layout.layoutWithDim[$0] }
layout = DataLayout.init(newLayout) layout = DataLayout.init(newLayout)
...@@ -139,7 +143,29 @@ public class Texture<P: PrecisionType>: Tensorial { ...@@ -139,7 +143,29 @@ public class Texture<P: PrecisionType>: Tensorial {
metalTexture = device.makeTexture(descriptor: tmpTextureDes) ?! " texture nil " metalTexture = device.makeTexture(descriptor: tmpTextureDes) ?! " texture nil "
} }
public func updateDims(inTensorDim: Dim, inDim: Dim) {
var fourDim: Dim
if inDim.cout() == 4 {
fourDim = inDim
} else if inDim.cout() < 4 {
var fourDimNum: [Int] = []
for _ in 0..<(4 - inDim.cout()) {
fourDimNum.append(1)
}
fourDimNum.append(contentsOf: inDim.dims)
fourDim = Dim.init(inDim: fourDimNum)
} else {
fatalError(" not support ")
}
tensorDim = inTensorDim
dim = fourDim
padToFourDim = fourDim
}
// 初始化时 dim padToFourDim 模型中的维度(一般来说 nchw),前面补全0
init(device: MTLDevice, inDim: Dim) { init(device: MTLDevice, inDim: Dim) {
print(" in dim > \(inDim)")
var fourDim: Dim var fourDim: Dim
if inDim.cout() == 4 { if inDim.cout() == 4 {
fourDim = inDim fourDim = inDim
......
...@@ -27,19 +27,19 @@ class OpCreator<P: PrecisionType> { ...@@ -27,19 +27,19 @@ class OpCreator<P: PrecisionType> {
} }
} }
func creat(device: MTLDevice, opDesc: OpDesc, scope: Scope) throws -> Runable & InferShaperable { func creat(device: MTLDevice, opDesc: OpDesc, scope: Scope, initContext: InitContext) throws -> Runable & InferShaperable {
guard let opCreator = opCreators[opDesc.type] else { guard let opCreator = opCreators[opDesc.type] else {
throw PaddleMobileError.opError(message: "there is no " + opDesc.type + " yet") throw PaddleMobileError.opError(message: "there is no " + opDesc.type + " yet")
} }
do { do {
return try opCreator(device, opDesc, scope) return try opCreator(device, opDesc, scope, initContext)
} catch let error { } catch let error {
throw error throw error
} }
} }
let opCreators: [String : (MTLDevice, OpDesc, Scope) throws -> Runable & InferShaperable] = let opCreators: [String : (MTLDevice, OpDesc, Scope, InitContext) throws -> Runable & InferShaperable] =
[gConvType : ConvOp<P>.creat, [gConvType : ConvOp<P>.creat,
gBatchNormType : BatchNormOp<P>.creat, gBatchNormType : BatchNormOp<P>.creat,
gReluType : ReluOp<P>.creat, gReluType : ReluOp<P>.creat,
......
...@@ -31,7 +31,7 @@ protocol Runable { ...@@ -31,7 +31,7 @@ protocol Runable {
func run(device: MTLDevice, buffer: MTLCommandBuffer) throws func run(device: MTLDevice, buffer: MTLCommandBuffer) throws
func runImpl(device: MTLDevice,buffer: MTLCommandBuffer) throws func runImpl(device: MTLDevice,buffer: MTLCommandBuffer) throws
func delogOutput() func delogOutput()
func inputVariant() -> [String : [Variant]] func inputVariant() -> [String : [MTLBuffer]]
func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer) func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer)
} }
...@@ -44,7 +44,7 @@ extension Runable where Self: OperatorProtocol{ ...@@ -44,7 +44,7 @@ extension Runable where Self: OperatorProtocol{
} }
} }
func inputVariant() -> [String : [Variant]] { func inputVariant() -> [String : [MTLBuffer]] {
// return [:] // return [:]
fatalError(" op \(type) need implement inputVariant") fatalError(" op \(type) need implement inputVariant")
} }
...@@ -59,15 +59,26 @@ extension Runable where Self: OperatorProtocol{ ...@@ -59,15 +59,26 @@ extension Runable where Self: OperatorProtocol{
} }
} }
public class InitContext {
/// metal 代码加载方式
var metalLoadMode: MetalLoadMode = .LoadMetalInDefaultLib
/// 当 metalLoadMode 为 LoadMetalInCustomMetalLib 时, metal library 路径不能为空
var metalLibPath: String? = nil
init() {
metalLoadMode = .LoadMetalInDefaultLib
metalLibPath = nil
}
}
protocol Creator where Self: OperatorProtocol{ protocol Creator where Self: OperatorProtocol{
associatedtype OpType: OperatorProtocol & Runable & InferShaperable associatedtype OpType: OperatorProtocol & Runable & InferShaperable
static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws -> OpType
} }
extension Creator where Self: OperatorProtocol { extension Creator where Self: OperatorProtocol {
static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> OpType { static func creat(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws -> OpType {
do { do {
return try OpType.provide(device:device, opDesc: opDesc, inScope: inScope) return try OpType.provide(device:device, opDesc: opDesc, inScope: inScope, initContext: initContext)
} catch let error { } catch let error {
throw error throw error
} }
...@@ -89,13 +100,13 @@ protocol OperatorProtocol { ...@@ -89,13 +100,13 @@ protocol OperatorProtocol {
var attrs: [String : Attr] { get } var attrs: [String : Attr] { get }
var para: ParamType { get } var para: ParamType { get }
var kernel: KerType { get } var kernel: KerType { get }
init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws init(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws
} }
extension OperatorProtocol { extension OperatorProtocol {
static func provide(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws -> Self { static func provide(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws -> Self {
do { do {
return try Self.init(device: device, opDesc: opDesc, inScope: inScope) return try Self.init(device: device, opDesc: opDesc, inScope: inScope, initContext: initContext)
} catch let error { } catch let error {
throw error throw error
} }
...@@ -103,18 +114,7 @@ extension OperatorProtocol { ...@@ -103,18 +114,7 @@ extension OperatorProtocol {
} }
class Operator <KernelType: Computable , ParameterType>: OperatorProtocol where KernelType.ParamType == ParameterType { class Operator <KernelType: Computable , ParameterType>: OperatorProtocol where KernelType.ParamType == ParameterType {
typealias ParamType = ParameterType required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope, initContext: InitContext) throws {
typealias KerType = KernelType
let type: String
let inputs: [String : [String]]
var paraInputs: [String : [String]]
let outpus: [String : [String]]
let attrs: [String : Attr]
let para: ParamType
let scope: Scope
var kernel: KerType
required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
// print("create op: \(opDesc.type)")
type = opDesc.type type = opDesc.type
scope = inScope scope = inScope
inputs = opDesc.inputs inputs = opDesc.inputs
...@@ -126,8 +126,19 @@ class Operator <KernelType: Computable , ParameterType>: OperatorProtocol where ...@@ -126,8 +126,19 @@ class Operator <KernelType: Computable , ParameterType>: OperatorProtocol where
} catch let error { } catch let error {
throw error throw error
} }
kernel = KernelType.init(device: device, param: para) kernel = KernelType.init(device: device, param: para, initContext: initContext)
} }
typealias ParamType = ParameterType
typealias KerType = KernelType
let type: String
let inputs: [String : [String]]
var paraInputs: [String : [String]]
let outpus: [String : [String]]
let attrs: [String : Attr]
let para: ParamType
let scope: Scope
var kernel: KerType
} }
// op infos // op infos
......
...@@ -34,8 +34,8 @@ class BatchNormParam<P: PrecisionType>: OpParam { ...@@ -34,8 +34,8 @@ class BatchNormParam<P: PrecisionType>: OpParam {
throw error throw error
} }
} }
let input: Texture<P> let input: Texture
var output: Texture<P> var output: Texture
let bias: Tensor<P> let bias: Tensor<P>
let mean: Tensor<P> let mean: Tensor<P>
let scale: Tensor<P> let scale: Tensor<P>
......
...@@ -30,8 +30,8 @@ class BilinearInterpParam<P: PrecisionType>: OpParam { ...@@ -30,8 +30,8 @@ class BilinearInterpParam<P: PrecisionType>: OpParam {
fatalError() fatalError()
} }
} }
let input: Texture<P> let input: Texture
var output: Texture<P> var output: Texture
let out_h: Int let out_h: Int
let out_w: Int let out_w: Int
} }
......
...@@ -37,10 +37,10 @@ class BoxcoderParam<P: PrecisionType>: OpParam { ...@@ -37,10 +37,10 @@ class BoxcoderParam<P: PrecisionType>: OpParam {
assert(codeType == "decode_center_size") // encode_center_size is not implemented assert(codeType == "decode_center_size") // encode_center_size is not implemented
assert((targetBox.tensorDim.cout() == 3) && (targetBox.tensorDim[0] == 1)) // N must be 1 (only handle batch size = 1) assert((targetBox.tensorDim.cout() == 3) && (targetBox.tensorDim[0] == 1)) // N must be 1 (only handle batch size = 1)
} }
let priorBox: Texture<P> let priorBox: Texture
let priorBoxVar: Texture<P> let priorBoxVar: Texture
let targetBox: Texture<P> let targetBox: Texture
var output: Texture<P> var output: Texture
let codeType: String let codeType: String
let boxNormalized: Bool let boxNormalized: Bool
} }
......
...@@ -22,7 +22,7 @@ class ConcatParam<P: PrecisionType>: OpParam { ...@@ -22,7 +22,7 @@ class ConcatParam<P: PrecisionType>: OpParam {
fatalError() fatalError()
} }
for x in xlist { for x in xlist {
guard let variant = inScope[x], let v = variant as? Texture<P> else { guard let variant = inScope[x], let v = variant as? Texture else {
fatalError() fatalError()
} }
if transpose.count == 0 { if transpose.count == 0 {
...@@ -40,8 +40,8 @@ class ConcatParam<P: PrecisionType>: OpParam { ...@@ -40,8 +40,8 @@ class ConcatParam<P: PrecisionType>: OpParam {
throw error throw error
} }
} }
var input: [Texture<P>] = [] var input: [Texture] = []
var output: Texture<P> var output: Texture
var transpose: [Int] = [] var transpose: [Int] = []
let axis: Int let axis: Int
} }
......
...@@ -34,12 +34,12 @@ class ConvAddAddPreluParam<P: PrecisionType>: OpParam { ...@@ -34,12 +34,12 @@ class ConvAddAddPreluParam<P: PrecisionType>: OpParam {
} }
} }
let input: Texture<P> let input: Texture
let y: Tensor<P> let y: Tensor<P>
let filter: Tensor<P> let filter: Tensor<P>
let mode: String let mode: String
let alpha: Tensor<P> let alpha: Tensor<P>
var output: Texture<P> var output: Texture
let stride: [Int32] let stride: [Int32]
let paddings: [Int32] let paddings: [Int32]
let dilations: [Int32] let dilations: [Int32]
......
...@@ -40,7 +40,7 @@ class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam { ...@@ -40,7 +40,7 @@ class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam {
} }
} }
let input: Texture<P> let input: Texture
let variance: Tensor<P> let variance: Tensor<P>
let bias: Tensor<P> let bias: Tensor<P>
...@@ -52,7 +52,7 @@ class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam { ...@@ -52,7 +52,7 @@ class ConvAddBatchNormReluParam<P: PrecisionType>: OpParam {
var newScale: MTLBuffer? var newScale: MTLBuffer?
var newBiase: MTLBuffer? var newBiase: MTLBuffer?
var output: Texture<P> var output: Texture
let stride: [Int32] let stride: [Int32]
let paddings: [Int32] let paddings: [Int32]
let dilations: [Int32] let dilations: [Int32]
......
...@@ -32,11 +32,11 @@ class ConvAddParam<P: PrecisionType>: OpParam { ...@@ -32,11 +32,11 @@ class ConvAddParam<P: PrecisionType>: OpParam {
} }
} }
let input: Texture<P> let input: Texture
let y: Tensor<P> let y: Tensor<P>
let filter: Tensor<P> let filter: Tensor<P>
var output: Texture<P> var output: Texture
let stride: [Int32] let stride: [Int32]
let paddings: [Int32] let paddings: [Int32]
let dilations: [Int32] let dilations: [Int32]
...@@ -111,6 +111,7 @@ class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>, ...@@ -111,6 +111,7 @@ class ConvAddOp<P: PrecisionType>: Operator<ConvAddKernel<P>, ConvAddParam<P>>,
// print(biase) // print(biase)
print(" \(type) output: ") print(" \(type) output: ")
print(para.output.metalTexture)
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray()) print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
} }
} }
...@@ -33,12 +33,12 @@ class ConvAddPreluParam<P: PrecisionType>: OpParam { ...@@ -33,12 +33,12 @@ class ConvAddPreluParam<P: PrecisionType>: OpParam {
} }
} }
let input: Texture<P> let input: Texture
let y: Tensor<P> let y: Tensor<P>
let filter: Tensor<P> let filter: Tensor<P>
let mode: String let mode: String
let alpha: Tensor<P> let alpha: Tensor<P>
var output: Texture<P> var output: Texture
let stride: [Int32] let stride: [Int32]
let paddings: [Int32] let paddings: [Int32]
let dilations: [Int32] let dilations: [Int32]
......
...@@ -36,8 +36,7 @@ class ConvBNReluParam<P: PrecisionType>: OpParam { ...@@ -36,8 +36,7 @@ class ConvBNReluParam<P: PrecisionType>: OpParam {
} }
} }
let input: Texture<P> let input: Texture
let variance: Tensor<P> let variance: Tensor<P>
let bias: Tensor<P> let bias: Tensor<P>
let mean: Tensor<P> let mean: Tensor<P>
...@@ -47,7 +46,7 @@ class ConvBNReluParam<P: PrecisionType>: OpParam { ...@@ -47,7 +46,7 @@ class ConvBNReluParam<P: PrecisionType>: OpParam {
var newScale: MTLBuffer? var newScale: MTLBuffer?
var newBiase: MTLBuffer? var newBiase: MTLBuffer?
var output: Texture<P> var output: Texture
let stride: [Int32] let stride: [Int32]
let paddings: [Int32] let paddings: [Int32]
let dilations: [Int32] let dilations: [Int32]
......
...@@ -31,9 +31,9 @@ class ConvParam<P: PrecisionType>: OpParam { ...@@ -31,9 +31,9 @@ class ConvParam<P: PrecisionType>: OpParam {
} }
} }
let input: Texture<P> let input: Texture
let filter: Tensor<P> let filter: Tensor<P>
var output: Texture<P> var output: Texture
let stride: [Int32] let stride: [Int32]
let paddings: [Int32] let paddings: [Int32]
let dilations: [Int32] let dilations: [Int32]
......
...@@ -17,14 +17,6 @@ import Foundation ...@@ -17,14 +17,6 @@ import Foundation
class DepthConvOp<P: PrecisionType>: Operator<ConvKernel<P>, ConvParam<P>>, Runable, Creator, InferShaperable { class DepthConvOp<P: PrecisionType>: Operator<ConvKernel<P>, ConvParam<P>>, Runable, Creator, InferShaperable {
typealias OpType = DepthConvOp<P> typealias OpType = DepthConvOp<P>
required init(device: MTLDevice, opDesc: OpDesc, inScope: Scope) throws {
do {
try super.init(device: device, opDesc: opDesc, inScope: inScope)
} catch let error {
throw error
}
}
func inferShape() { func inferShape() {
let inDims = para.input.dim let inDims = para.input.dim
......
...@@ -32,7 +32,7 @@ class ElementwiseAddParam<P: PrecisionType>: OpParam { ...@@ -32,7 +32,7 @@ class ElementwiseAddParam<P: PrecisionType>: OpParam {
let device = inputX.metalTexture!.device let device = inputX.metalTexture!.device
inputY = Texture.init(device: device, inDim: tensorY.dim) inputY = Texture.init(device: device, inDim: tensorY.dim)
let value: [P] = Array(UnsafeBufferPointer(start: tensorY.data.pointer, count: tensorY.dim.numel())) let value: [P] = Array(UnsafeBufferPointer(start: tensorY.data.pointer, count: tensorY.dim.numel()))
inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims, transpose: [0, 1, 2, 3], inComputePrecision: computePrecision) inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims, transpose: [0, 1, 2, 3], inComputePrecision: GlobalConfig.shared.computePrecision)
} }
// required init(device: MTLDevice, param: ElementwiseAddParam<P>) { // required init(device: MTLDevice, param: ElementwiseAddParam<P>) {
...@@ -55,9 +55,9 @@ class ElementwiseAddParam<P: PrecisionType>: OpParam { ...@@ -55,9 +55,9 @@ class ElementwiseAddParam<P: PrecisionType>: OpParam {
} }
} }
var inputX: Texture<P> var inputX: Texture
var inputY: Texture<P> var inputY: Texture
var output: Texture<P> var output: Texture
var axis: Int var axis: Int
} }
......
...@@ -34,7 +34,7 @@ class ElementwiseAddPreluParam<P: PrecisionType>: OpParam { ...@@ -34,7 +34,7 @@ class ElementwiseAddPreluParam<P: PrecisionType>: OpParam {
let device = inputX.metalTexture!.device let device = inputX.metalTexture!.device
inputY = Texture.init(device: device, inDim: tensorY.dim) inputY = Texture.init(device: device, inDim: tensorY.dim)
let value: [P] = Array(UnsafeBufferPointer(start: tensorY.data.pointer, count: tensorY.dim.numel())) let value: [P] = Array(UnsafeBufferPointer(start: tensorY.data.pointer, count: tensorY.dim.numel()))
inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims, transpose: [0, 1, 2, 3], inComputePrecision: computePrecision) inputY.metalTexture = device.tensor2texture(value: value, dim: tensorY.dim.dims, transpose: [0, 1, 2, 3], inComputePrecision: GlobalConfig.shared.computePrecision)
} }
// required init(device: MTLDevice, param: ElementwiseAddParam<P>) { // required init(device: MTLDevice, param: ElementwiseAddParam<P>) {
...@@ -59,9 +59,9 @@ class ElementwiseAddPreluParam<P: PrecisionType>: OpParam { ...@@ -59,9 +59,9 @@ class ElementwiseAddPreluParam<P: PrecisionType>: OpParam {
let mode: String let mode: String
let alpha: Tensor<P> let alpha: Tensor<P>
var inputX: Texture<P> var inputX: Texture
var inputY: Texture<P> var inputY: Texture
var output: Texture<P> var output: Texture
var axis: Int var axis: Int
} }
......
...@@ -17,7 +17,7 @@ import MetalKit ...@@ -17,7 +17,7 @@ import MetalKit
import CoreMedia import CoreMedia
class FeedParam<P: PrecisionType>: OpParam{ class FeedParam<P: PrecisionType>: OpParam{
var output: Texture<P> var output: Texture
var input: InputTexture { var input: InputTexture {
return scope.input() as! InputTexture return scope.input() as! InputTexture
} }
...@@ -63,7 +63,8 @@ class FeedOp<P: PrecisionType>: Operator<Texture2DTo2DArrayKernel<P>, FeedParam< ...@@ -63,7 +63,8 @@ class FeedOp<P: PrecisionType>: Operator<Texture2DTo2DArrayKernel<P>, FeedParam<
func delogOutput() { func delogOutput() {
print(" \(type) output: ") print(" \(type) output: ")
print(para.output.metalTexture.toTensor(dim: (n: para.output.padToFourDim[0], c: para.output.padToFourDim[1], h: para.output.padToFourDim[2], w: para.output.padToFourDim[3])).strideArray()) print(para.output.metalTexture)
print(para.output.metalTexture.toTensor(dim: (n: para.output.padToFourDim[0], c: para.output.padToFourDim[3], h: para.output.padToFourDim[2], w: para.output.padToFourDim[1])).strideArray())
} }
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
import Metal
class FetchParam<P: PrecisionType>: OpParam{
var output: FetchHolder
let input: Texture
let scope: Scope
required init(opDesc: OpDesc, inScope: Scope) throws {
scope = inScope
do {
input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope)
output = FetchHolder.init(inPaddedCapacity: input.elementCount(), inDim: input.tensorDim)
scope.setOutput(output: output)
} catch let error {
throw error
}
}
//typealias ParamPrecisionType = P
}
class FetchOp<P: PrecisionType>: Operator< FetchKernel<P>, FetchParam<P>>, Runable, Creator, InferShaperable {
typealias OpType = FetchOp<P>
func inferShape() {
print(para.input.dim)
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
}
}
func delogOutput() {
print("fetch output: ")
let resArr = self.para.output.result.floatArr(count: self.para.output.capacity)
print(resArr.strideArray())
}
}
...@@ -25,8 +25,8 @@ class FlattenParam<P: PrecisionType>: OpParam { ...@@ -25,8 +25,8 @@ class FlattenParam<P: PrecisionType>: OpParam {
throw error throw error
} }
} }
let input: Texture<P> let input: Texture
var output: Texture<P> var output: Texture
let axis: Int let axis: Int
} }
......
...@@ -21,14 +21,14 @@ public protocol TestParam { ...@@ -21,14 +21,14 @@ public protocol TestParam {
public protocol Testable { public protocol Testable {
associatedtype TestParamType: TestParam associatedtype TestParamType: TestParam
func test(commandBuffer: MTLCommandBuffer, param: TestParamType) func test(commandBuffer: MTLCommandBuffer, param: TestParamType)
init(device: MTLDevice, testParam: TestParamType) init(device: MTLDevice, testParam: TestParamType, initContext: InitContext)
} }
protocol Computable { protocol Computable {
associatedtype ParamType: OpParam associatedtype ParamType: OpParam
func compute(commandBuffer: MTLCommandBuffer, param: ParamType) throws func compute(commandBuffer: MTLCommandBuffer, param: ParamType) throws
init(device: MTLDevice, param: ParamType) init(device: MTLDevice, param: ParamType, initContext: InitContext)
} }
protocol KernelProtocol { protocol KernelProtocol {
...@@ -37,37 +37,83 @@ protocol KernelProtocol { ...@@ -37,37 +37,83 @@ protocol KernelProtocol {
} }
open class Kernel { @objc open class Kernel: NSObject{
let pipline: MTLComputePipelineState let pipline: MTLComputePipelineState
let functionName: String let functionName: String
public init(device: MTLDevice, inFunctionName: String, usePaddleMobileLib: Bool = true) { public init(device: MTLDevice, inFunctionName: String, usePaddleMobileLib: Bool = false, initContext: InitContext) {
pipline = device.pipeLine(funcName: inFunctionName, inPaddleMobileLib: usePaddleMobileLib) pipline = device.pipeLine(funcName: inFunctionName, metalLoadMode: initContext.metalLoadMode, metalLibPath: initContext.metalLibPath)
functionName = inFunctionName functionName = inFunctionName
} }
} }
open class CusomKernel: Kernel { @objc public class Shape: NSObject {
public struct Shape { public let width: Int
public let width: Int public let height: Int
public let height: Int public let channel: Int
public let channel: Int @objc public init(inWidth: Int, inHeight: Int, inChannel: Int){
public init(inWidth: Int, inHeight: Int, inChannel: Int){ width = inWidth
width = inWidth height = inHeight
height = inHeight channel = inChannel
channel = inChannel }
}
open class BufferToTextureKernel: Kernel {
public let outputTexture: MTLTexture
public init(device: MTLDevice, outputDim: Shape, metalLoadMode: MetalLoadMode, metalLibPath: String?) {
let textureDesc = MTLTextureDescriptor.init()
textureDesc.textureType = .type2D
textureDesc.width = outputDim.width
textureDesc.height = outputDim.height
textureDesc.depth = (outputDim.channel + 3) / 4
if GlobalConfig.shared.computePrecision == .Float16 {
textureDesc.pixelFormat = .rgba16Float
} else if GlobalConfig.shared.computePrecision == .Float32 {
textureDesc.pixelFormat = .rgba32Float
} else {
fatalError()
} }
textureDesc.usage = [.shaderRead, .shaderWrite]
textureDesc.storageMode = .shared
outputTexture = device.makeTexture(descriptor: textureDesc) ?! " make texture error "
let initContext = InitContext.init()
initContext.metalLibPath = metalLibPath
initContext.metalLoadMode = metalLoadMode
if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "buffer_to_texture_kernel", initContext: initContext)
} else {
super.init(device: device, inFunctionName: "buffer_to_texture_kernel_half", initContext: initContext)
}
}
public func compute(inputBuffer: MTLBuffer , commandBuffer: MTLCommandBuffer) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setBuffer(inputBuffer, offset: 0, index: 0)
encoder.setTexture(outputTexture, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: outputTexture)
encoder.endEncoding()
} }
}
@objc open class CusomKernel: Kernel {
public let outputTexture: MTLTexture public let outputTexture: MTLTexture
public init(device: MTLDevice, inFunctionName: String, outputDim: Shape, usePaddleMobileLib: Bool = false) { public init(device: MTLDevice, inFunctionName: String, outputDim: Shape, metalLoadModel: MetalLoadMode, metalLibPath: String?) {
let textureDesc = MTLTextureDescriptor.init() let textureDesc = MTLTextureDescriptor.init()
textureDesc.textureType = .type2D textureDesc.textureType = .type2D
textureDesc.width = outputDim.width textureDesc.width = outputDim.width
textureDesc.height = outputDim.height textureDesc.height = outputDim.height
textureDesc.depth = (outputDim.channel + 3) / 4 textureDesc.depth = (outputDim.channel + 3) / 4
if computePrecision == .Float16 { if GlobalConfig.shared.computePrecision == .Float16 {
textureDesc.pixelFormat = .rgba16Float textureDesc.pixelFormat = .rgba16Float
} else if computePrecision == .Float32 { } else if GlobalConfig.shared.computePrecision == .Float32 {
textureDesc.pixelFormat = .rgba32Float textureDesc.pixelFormat = .rgba32Float
} else { } else {
fatalError() fatalError()
...@@ -77,7 +123,10 @@ open class CusomKernel: Kernel { ...@@ -77,7 +123,10 @@ open class CusomKernel: Kernel {
textureDesc.storageMode = .shared textureDesc.storageMode = .shared
outputTexture = device.makeTexture(descriptor: textureDesc) ?! " make texture error " outputTexture = device.makeTexture(descriptor: textureDesc) ?! " make texture error "
super.init(device: device, inFunctionName: inFunctionName, usePaddleMobileLib: usePaddleMobileLib) let context = InitContext.init()
context.metalLoadMode = metalLoadModel
context.metalLibPath = metalLibPath
super.init(device: device, inFunctionName: inFunctionName, initContext: context)
} }
public func compute(inputTexuture: MTLTexture, commandBuffer: MTLCommandBuffer) throws { public func compute(inputTexuture: MTLTexture, commandBuffer: MTLCommandBuffer) throws {
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import Foundation import Foundation
class BatchNormKernel<P: PrecisionType>: Kernel, Computable { class BatchNormKernel<P: PrecisionType>: Kernel, Computable {
required init(device: MTLDevice, param: BatchNormParam<P>) { required init(device: MTLDevice, param: BatchNormParam<P>, initContext: InitContext) {
let count = param.variance.dim.numel() let count = param.variance.dim.numel()
let varianceP = param.variance.data.pointer let varianceP = param.variance.data.pointer
let meanP = param.mean.data.pointer let meanP = param.mean.data.pointer
...@@ -27,13 +27,13 @@ class BatchNormKernel<P: PrecisionType>: Kernel, Computable { ...@@ -27,13 +27,13 @@ class BatchNormKernel<P: PrecisionType>: Kernel, Computable {
scaleP[i] = invStd * scaleP[i] scaleP[i] = invStd * scaleP[i]
} }
param.bias.initBuffer(device: device, precision: computePrecision) param.bias.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.scale.initBuffer(device: device, precision: computePrecision) param.scale.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision) param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: GlobalConfig.shared.computePrecision)
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "batchnorm") super.init(device: device, inFunctionName: "batchnorm", initContext: initContext)
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "batchnorm_half") super.init(device: device, inFunctionName: "batchnorm_half", initContext: initContext)
} else { } else {
fatalError() fatalError()
} }
......
...@@ -41,12 +41,12 @@ class BilinearInterpKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -41,12 +41,12 @@ class BilinearInterpKernel<P: PrecisionType>: Kernel, Computable{
encoder.endEncoding() encoder.endEncoding()
} }
required init(device: MTLDevice, param: BilinearInterpParam<P>) { required init(device: MTLDevice, param: BilinearInterpParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision) param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: GlobalConfig.shared.computePrecision)
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "bilinear_interp_float") super.init(device: device, inFunctionName: "bilinear_interp_float", initContext: initContext)
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "bilinear_interp_half") super.init(device: device, inFunctionName: "bilinear_interp_half", initContext: initContext)
} else { } else {
fatalError() fatalError()
} }
......
...@@ -32,12 +32,12 @@ class BoxcoderKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -32,12 +32,12 @@ class BoxcoderKernel<P: PrecisionType>: Kernel, Computable{
encoder.endEncoding() encoder.endEncoding()
} }
required init(device: MTLDevice, param: BoxcoderParam<P>) { required init(device: MTLDevice, param: BoxcoderParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: [0, 3, 1, 2], computePrecision: computePrecision) param.output.initTexture(device: device, inTranspose: [0, 3, 1, 2], computePrecision: GlobalConfig.shared.computePrecision)
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "boxcoder_float") super.init(device: device, inFunctionName: "boxcoder_float", initContext: initContext)
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "boxcoder_half") super.init(device: device, inFunctionName: "boxcoder_half", initContext: initContext)
} else { } else {
fatalError() fatalError()
} }
......
...@@ -52,8 +52,8 @@ class ConcatKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -52,8 +52,8 @@ class ConcatKernel<P: PrecisionType>: Kernel, Computable{
encoder.endEncoding() encoder.endEncoding()
} }
required init(device: MTLDevice, param: ConcatParam<P>) { required init(device: MTLDevice, param: ConcatParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: param.transpose, computePrecision: computePrecision) param.output.initTexture(device: device, inTranspose: param.transpose, computePrecision: GlobalConfig.shared.computePrecision)
let orank = param.output.tensorDim.cout() let orank = param.output.tensorDim.cout()
let num = param.input.count let num = param.input.count
assert(num <= 6) assert(num <= 6)
...@@ -133,16 +133,16 @@ class ConcatKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -133,16 +133,16 @@ class ConcatKernel<P: PrecisionType>: Kernel, Computable{
} }
} }
pm.vdim = (Int32(vdim[0]), Int32(vdim[1]), Int32(vdim[2]), Int32(vdim[3]), Int32(vdim[4]), Int32(vdim[5])) pm.vdim = (Int32(vdim[0]), Int32(vdim[1]), Int32(vdim[2]), Int32(vdim[3]), Int32(vdim[4]), Int32(vdim[5]))
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "concat_\(orank)_\(num)_\(v)_float") super.init(device: device, inFunctionName: "concat_\(orank)_\(num)_\(v)_float", initContext: initContext)
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "concat_\(orank)_\(num)_\(v)_half") super.init(device: device, inFunctionName: "concat_\(orank)_\(num)_\(v)_half", initContext: initContext)
} else { } else {
fatalError() fatalError()
} }
} }
required init(device: MTLDevice, testParam: ConcatTestParam) { required init(device: MTLDevice, testParam: ConcatTestParam, initContext: InitContext) {
super.init(device: device, inFunctionName: "concat") super.init(device: device, inFunctionName: "concat", initContext: initContext)
} }
} }
...@@ -16,99 +16,99 @@ import Foundation ...@@ -16,99 +16,99 @@ import Foundation
class ConvAddAddPreluKernel<P: PrecisionType>: Kernel, Computable { class ConvAddAddPreluKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: MetalConvParam! var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvAddAddPreluParam<P>) { required init(device: MTLDevice, param: ConvAddAddPreluParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision) param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: GlobalConfig.shared.computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision) param.filter.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.y.initBuffer(device: device, precision: computePrecision) param.y.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.alpha.initBuffer(device: device, precision: computePrecision) param.alpha.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
if computePrecision == .Float16 { if GlobalConfig.shared.computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 { if param.filter.width == 1 && param.filter.height == 1 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_half") super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_half") super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_half", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_half") super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_half", initContext: initContext)
} }
} else if param.filter.channel == 1 { } else if param.filter.channel == 1 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_half") super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_half") super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_half", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_half") super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_half", initContext: initContext)
} }
} else if param.filter.width == 3 && param.filter.height == 3 { } else if param.filter.width == 3 && param.filter.height == 3 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_half") super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_half") super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_half", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_half") super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_half", initContext: initContext)
} }
} else if param.filter.width == 1 && param.filter.height == 5 { } else if param.filter.width == 1 && param.filter.height == 5 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_half") super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_half") super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_half", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_half") super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_half", initContext: initContext)
} }
} else if param.filter.width == 5 && param.filter.height == 1 { } else if param.filter.width == 5 && param.filter.height == 1 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_half") super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_half") super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_half", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_half") super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_half", initContext: initContext)
} }
} else { } else {
fatalError(" unsupport yet ") fatalError(" unsupport yet ")
} }
} else if computePrecision == .Float32 { } else if GlobalConfig.shared.computePrecision == .Float32 {
if param.filter.width == 1 && param.filter.height == 1 { if param.filter.width == 1 && param.filter.height == 1 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_float") super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_float") super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_float", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_float") super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_float", initContext: initContext)
} }
} else if param.filter.channel == 1 { } else if param.filter.channel == 1 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_float") super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_float") super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_float", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_float") super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_float", initContext: initContext)
} }
} else if param.filter.width == 3 && param.filter.height == 3 { } else if param.filter.width == 3 && param.filter.height == 3 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_float") super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_float") super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_float", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_float") super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_float", initContext: initContext)
} }
} else if param.filter.width == 1 && param.filter.height == 5 { } else if param.filter.width == 1 && param.filter.height == 5 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_float") super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_float") super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_float", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_float") super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_float", initContext: initContext)
} }
} else if param.filter.width == 5 && param.filter.height == 1 { } else if param.filter.width == 5 && param.filter.height == 1 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_float") super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_float") super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_float", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_float") super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_float", initContext: initContext)
} }
} else { } else {
fatalError(" unsupport yet ") fatalError(" unsupport yet ")
......
...@@ -37,44 +37,44 @@ struct ConvAddBatchNormReluTestParam: TestParam { ...@@ -37,44 +37,44 @@ struct ConvAddBatchNormReluTestParam: TestParam {
} }
class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable { class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
required init(device: MTLDevice, testParam: ConvAddBatchNormReluTestParam) { required init(device: MTLDevice, testParam: ConvAddBatchNormReluTestParam, initContext: InitContext) {
if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 { if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1") super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1", initContext: initContext)
} else if testParam.filterSize.channel == 1 { } else if testParam.filterSize.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3") super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3") super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3", initContext: initContext)
} }
} }
var metalParam: MetalConvParam! var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvAddBatchNormReluParam<P>) { required init(device: MTLDevice, param: ConvAddBatchNormReluParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision) param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: GlobalConfig.shared.computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision) param.filter.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.y.initBuffer(device: device, precision: computePrecision) param.y.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.variance.initBuffer(device: device, precision: .Float32) param.variance.initBuffer(device: device, precision: .Float32)
param.mean.initBuffer(device: device, precision: .Float32) param.mean.initBuffer(device: device, precision: .Float32)
param.scale.initBuffer(device: device, precision: .Float32) param.scale.initBuffer(device: device, precision: .Float32)
param.bias.initBuffer(device: device, precision: .Float32) param.bias.initBuffer(device: device, precision: .Float32)
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
if param.filter.width == 1 && param.filter.height == 1 { if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1") super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1", initContext: initContext)
} else if param.filter.channel == 1 { } else if param.filter.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3") super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3", initContext: initContext)
} else if param.filter.width == 3 && param.filter.height == 3 { } else if param.filter.width == 3 && param.filter.height == 3 {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3") super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3", initContext: initContext)
} else { } else {
fatalError(" unsupport ") fatalError(" unsupport ")
} }
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 { if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1_half") super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_1x1_half", initContext: initContext)
} else if param.filter.channel == 1 { } else if param.filter.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3_half") super.init(device: device, inFunctionName: "depthwise_conv_add_batch_norm_relu_3x3_half", initContext: initContext)
} else if param.filter.width == 3 && param.filter.height == 3 { } else if param.filter.width == 3 && param.filter.height == 3 {
super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3_half") super.init(device: device, inFunctionName: "conv_add_batch_norm_relu_3x3_half", initContext: initContext)
} else { } else {
fatalError(" unsupport ") fatalError(" unsupport ")
} }
...@@ -120,10 +120,10 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable ...@@ -120,10 +120,10 @@ class ConvAddBatchNormReluKernel<P: PrecisionType>: Kernel, Computable, Testable
var newBiaseBuffer: MTLBuffer var newBiaseBuffer: MTLBuffer
var newScaleBuffer: MTLBuffer var newScaleBuffer: MTLBuffer
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
newBiaseBuffer = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)! newBiaseBuffer = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)!
newScaleBuffer = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)! newScaleBuffer = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)!
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
newBiaseBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)! newBiaseBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
newScaleBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)! newScaleBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
......
...@@ -16,36 +16,37 @@ import Foundation ...@@ -16,36 +16,37 @@ import Foundation
class ConvAddKernel<P: PrecisionType>: Kernel, Computable { class ConvAddKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: MetalConvParam! var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvAddParam<P>) { required init(device: MTLDevice, param: ConvAddParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision) param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: GlobalConfig.shared.computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision) let padWhenOneC = !(param.filter.channel == 1 && param.filter.n == param.input.tensorDim[1])
param.y.initBuffer(device: device, precision: computePrecision) param.filter.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision, padWhenOneC: padWhenOneC)
param.y.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
if computePrecision == .Float16 { if GlobalConfig.shared.computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 { if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_1x1_half") super.init(device: device, inFunctionName: "conv_add_1x1_half", initContext: initContext)
} else if param.filter.channel == 1 { } else if param.filter.channel == 1 && param.filter.n == param.input.tensorDim[1] {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_half") super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_half", initContext: initContext)
} else if param.filter.width == 3 && param.filter.height == 3 { } else if param.filter.width == 3 && param.filter.height == 3 {
super.init(device: device, inFunctionName: "conv_add_3x3_half") super.init(device: device, inFunctionName: "conv_add_3x3_half", initContext: initContext)
} else if param.filter.width == 1 && param.filter.height == 5 { } else if param.filter.width == 1 && param.filter.height == 5 {
super.init(device: device, inFunctionName: "conv_add_5x1_half") super.init(device: device, inFunctionName: "conv_add_5x1_half", initContext: initContext)
} else if param.filter.width == 5 && param.filter.height == 1 { } else if param.filter.width == 5 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_1x5_half") super.init(device: device, inFunctionName: "conv_add_1x5_half", initContext: initContext)
} else { } else {
fatalError(" unsupport yet ") fatalError(" unsupport yet ")
} }
} else if computePrecision == .Float32 { } else if GlobalConfig.shared.computePrecision == .Float32 {
if param.filter.width == 1 && param.filter.height == 1 { if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_1x1") super.init(device: device, inFunctionName: "conv_add_1x1", initContext: initContext)
} else if param.filter.channel == 1 { } else if param.filter.channel == 1 && param.filter.n == param.input.tensorDim[1] {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3") super.init(device: device, inFunctionName: "depthwise_conv_add_3x3", initContext: initContext)
} else if param.filter.width == 1 && param.filter.height == 5 { } else if param.filter.width == 1 && param.filter.height == 5 {
super.init(device: device, inFunctionName: "conv_add_5x1") super.init(device: device, inFunctionName: "conv_add_5x1", initContext: initContext)
} else if param.filter.width == 5 && param.filter.height == 1 { } else if param.filter.width == 5 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_add_1x5") super.init(device: device, inFunctionName: "conv_add_1x5", initContext: initContext)
} else if param.filter.width == 3 && param.filter.height == 3 { } else if param.filter.width == 3 && param.filter.height == 3 {
super.init(device: device, inFunctionName: "conv_add_3x3") super.init(device: device, inFunctionName: "conv_add_3x3", initContext: initContext)
} else { } else {
fatalError(" unsupport yet ") fatalError(" unsupport yet ")
} }
......
...@@ -16,99 +16,99 @@ import Foundation ...@@ -16,99 +16,99 @@ import Foundation
class ConvAddPreluKernel<P: PrecisionType>: Kernel, Computable { class ConvAddPreluKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: MetalConvParam! var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvAddPreluParam<P>) { required init(device: MTLDevice, param: ConvAddPreluParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision) param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: GlobalConfig.shared.computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision) param.filter.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.y.initBuffer(device: device, precision: computePrecision) param.y.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.alpha.initBuffer(device: device, precision: computePrecision) param.alpha.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
if computePrecision == .Float16 { if GlobalConfig.shared.computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 { if param.filter.width == 1 && param.filter.height == 1 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_half") super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_half") super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_half", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_half") super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_half", initContext: initContext)
} }
} else if param.filter.channel == 1 { } else if param.filter.channel == 1 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_half") super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_half") super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_half", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_half") super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_half", initContext: initContext)
} }
} else if param.filter.width == 3 && param.filter.height == 3 { } else if param.filter.width == 3 && param.filter.height == 3 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_half") super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_half") super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_half", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_half") super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_half", initContext: initContext)
} }
} else if param.filter.width == 1 && param.filter.height == 5 { } else if param.filter.width == 1 && param.filter.height == 5 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_half") super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_half") super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_half", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_half") super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_half", initContext: initContext)
} }
} else if param.filter.width == 5 && param.filter.height == 1 { } else if param.filter.width == 5 && param.filter.height == 1 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_half") super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_half", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_half") super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_half", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_half") super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_half", initContext: initContext)
} }
} else { } else {
fatalError(" unsupport yet ") fatalError(" unsupport yet ")
} }
} else if computePrecision == .Float32 { } else if GlobalConfig.shared.computePrecision == .Float32 {
if param.filter.width == 1 && param.filter.height == 1 { if param.filter.width == 1 && param.filter.height == 1 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_float") super.init(device: device, inFunctionName: "conv_add_1x1_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_float") super.init(device: device, inFunctionName: "conv_add_1x1_prelu_element_float", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_float") super.init(device: device, inFunctionName: "conv_add_1x1_prelu_other_float", initContext: initContext)
} }
} else if param.filter.channel == 1 { } else if param.filter.channel == 1 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_float") super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_float") super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_element_float", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_float") super.init(device: device, inFunctionName: "depthwise_conv_add_3x3_prelu_other_float", initContext: initContext)
} }
} else if param.filter.width == 3 && param.filter.height == 3 { } else if param.filter.width == 3 && param.filter.height == 3 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_float") super.init(device: device, inFunctionName: "conv_add_3x3_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_float") super.init(device: device, inFunctionName: "conv_add_3x3_prelu_element_float", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_float") super.init(device: device, inFunctionName: "conv_add_3x3_prelu_other_float", initContext: initContext)
} }
} else if param.filter.width == 1 && param.filter.height == 5 { } else if param.filter.width == 1 && param.filter.height == 5 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_float") super.init(device: device, inFunctionName: "conv_add_5x1_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_float") super.init(device: device, inFunctionName: "conv_add_5x1_prelu_element_float", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_float") super.init(device: device, inFunctionName: "conv_add_5x1_prelu_other_float", initContext: initContext)
} }
} else if param.filter.width == 5 && param.filter.height == 1 { } else if param.filter.width == 5 && param.filter.height == 1 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_float") super.init(device: device, inFunctionName: "conv_add_1x5_prelu_channel_float", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_float") super.init(device: device, inFunctionName: "conv_add_1x5_prelu_element_float", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_float") super.init(device: device, inFunctionName: "conv_add_1x5_prelu_other_float", initContext: initContext)
} }
} else { } else {
fatalError(" unsupport yet ") fatalError(" unsupport yet ")
......
...@@ -38,44 +38,44 @@ struct ConvBNReluTestParam: TestParam { ...@@ -38,44 +38,44 @@ struct ConvBNReluTestParam: TestParam {
} }
class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable { class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
required init(device: MTLDevice, testParam: ConvBNReluTestParam) { required init(device: MTLDevice, testParam: ConvBNReluTestParam, initContext: InitContext) {
if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 { if testParam.filterSize.width == 1 && testParam.filterSize.height == 1 {
super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1") super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1", initContext: initContext)
} else if testParam.filterSize.channel == 1 { } else if testParam.filterSize.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3") super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3") super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3", initContext: initContext)
} }
} }
var metalParam: MetalConvParam! var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvBNReluParam<P>) { required init(device: MTLDevice, param: ConvBNReluParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision) param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: GlobalConfig.shared.computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision) param.filter.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.variance.initBuffer(device: device, precision: .Float32) param.variance.initBuffer(device: device, precision: .Float32)
param.mean.initBuffer(device: device, precision: .Float32) param.mean.initBuffer(device: device, precision: .Float32)
param.scale.initBuffer(device: device, precision: .Float32) param.scale.initBuffer(device: device, precision: .Float32)
param.bias.initBuffer(device: device, precision: .Float32) param.bias.initBuffer(device: device, precision: .Float32)
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
if param.filter.width == 1 && param.filter.height == 1 { if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1") super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1", initContext: initContext)
} else if param.filter.channel == 1 { } else if param.filter.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3") super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3", initContext: initContext)
} else if param.filter.width == 3 && param.filter.height == 3 { } else if param.filter.width == 3 && param.filter.height == 3 {
super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3") super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3", initContext: initContext)
} else { } else {
fatalError(" unsupport ") fatalError(" unsupport ")
} }
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
if param.filter.width == 1 && param.filter.height == 1 { if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1_half") super.init(device: device, inFunctionName: "conv_batch_norm_relu_1x1_half", initContext: initContext)
} else if param.filter.channel == 1 { } else if param.filter.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3_half") super.init(device: device, inFunctionName: "depthwise_conv_batch_norm_relu_3x3_half", initContext: initContext)
} else if param.filter.width == 3 && param.filter.height == 3 { } else if param.filter.width == 3 && param.filter.height == 3 {
super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3_half") super.init(device: device, inFunctionName: "conv_batch_norm_relu_3x3_half", initContext: initContext)
} else { } else {
fatalError(" unsupport ") fatalError(" unsupport ")
} }
...@@ -122,10 +122,10 @@ class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable { ...@@ -122,10 +122,10 @@ class ConvBNReluKernel<P: PrecisionType>: Kernel, Computable, Testable {
var newBiaseBuffer: MTLBuffer var newBiaseBuffer: MTLBuffer
var newScaleBuffer: MTLBuffer var newScaleBuffer: MTLBuffer
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
newBiaseBuffer = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)! newBiaseBuffer = device.makeBuffer(bytes: newBiase, length: param.bias.buffer.length)!
newScaleBuffer = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)! newScaleBuffer = device.makeBuffer(bytes: newScale, length: param.scale.buffer.length)!
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
newBiaseBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)! newBiaseBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
newScaleBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)! newScaleBuffer = device.makeBuffer(length: param.bias.buffer.length / 2)!
......
...@@ -26,14 +26,14 @@ public struct MetalConvParam { ...@@ -26,14 +26,14 @@ public struct MetalConvParam {
class ConvKernel<P: PrecisionType>: Kernel, Computable { class ConvKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: MetalConvParam! var metalParam: MetalConvParam!
required init(device: MTLDevice, param: ConvParam<P>) { required init(device: MTLDevice, param: ConvParam<P>, initContext: InitContext) {
param.filter.initBuffer(device: device, precision: ComputePrecision.Float32) param.filter.initBuffer(device: device, precision: ComputePrecision.Float32)
if param.filter.width == 1 && param.filter.height == 1 { if param.filter.width == 1 && param.filter.height == 1 {
super.init(device: device, inFunctionName: "conv_1x1") super.init(device: device, inFunctionName: "conv_1x1", initContext: initContext)
} else if param.filter.channel == 1 { } else if param.filter.channel == 1 {
super.init(device: device, inFunctionName: "depthwise_conv_3x3") super.init(device: device, inFunctionName: "depthwise_conv_3x3", initContext: initContext)
} else if param.filter.width == 3 && param.filter.height == 3 { } else if param.filter.width == 3 && param.filter.height == 3 {
super.init(device: device, inFunctionName: "conv_3x3") super.init(device: device, inFunctionName: "conv_3x3", initContext: initContext)
} else { } else {
fatalError(" unsupport ") fatalError(" unsupport ")
} }
......
...@@ -30,18 +30,18 @@ struct MetalConvTransposeParam { ...@@ -30,18 +30,18 @@ struct MetalConvTransposeParam {
class ConvTransposeKernel<P: PrecisionType>: Kernel, Computable{ class ConvTransposeKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: MetalConvTransposeParam! var metalParam: MetalConvTransposeParam!
required init(device: MTLDevice, param: ConvTransposeParam<P>) { required init(device: MTLDevice, param: ConvTransposeParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision) param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: GlobalConfig.shared.computePrecision)
param.filter.initBuffer(device: device, precision: computePrecision, convertToNHWC: false, withTranspose: true) param.filter.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision, convertToNHWC: false, withTranspose: true)
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
if param.stride == [2, 2] && param.stride == [2, 2] { if param.stride == [2, 2] && param.stride == [2, 2] {
super.init(device: device, inFunctionName: "conv_transpose2x2_stride2") super.init(device: device, inFunctionName: "conv_transpose2x2_stride2", initContext: initContext)
} else { } else {
fatalError(" -- conv transpose unsupported yet -- ") fatalError(" -- conv transpose unsupported yet -- ")
} }
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
if param.stride == [2, 2] && param.stride == [2, 2] { if param.stride == [2, 2] && param.stride == [2, 2] {
super.init(device: device, inFunctionName: "conv_transpose2x2_stride2_half") super.init(device: device, inFunctionName: "conv_transpose2x2_stride2_half", initContext: initContext)
} else { } else {
fatalError(" -- conv transpose unsupported yet -- ") fatalError(" -- conv transpose unsupported yet -- ")
} }
......
...@@ -26,8 +26,8 @@ struct ElementwiseAddMetalParam { ...@@ -26,8 +26,8 @@ struct ElementwiseAddMetalParam {
class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable { class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: ElementwiseAddMetalParam var metalParam: ElementwiseAddMetalParam
required init(device: MTLDevice, param: ElementwiseAddParam<P>) { required init(device: MTLDevice, param: ElementwiseAddParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision) param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: GlobalConfig.shared.computePrecision)
metalParam = ElementwiseAddMetalParam.init() metalParam = ElementwiseAddMetalParam.init()
...@@ -50,10 +50,10 @@ class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable { ...@@ -50,10 +50,10 @@ class ElementwiseAddKernel<P: PrecisionType>: Kernel, Computable {
// print("===> elementwise_add fast!!!") // print("===> elementwise_add fast!!!")
metalParam.fast = 1 metalParam.fast = 1
} }
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "elementwise_add") super.init(device: device, inFunctionName: "elementwise_add", initContext: initContext)
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "elementwise_add_half") super.init(device: device, inFunctionName: "elementwise_add_half", initContext: initContext)
} else { } else {
fatalError() fatalError()
} }
......
...@@ -17,9 +17,9 @@ import Foundation ...@@ -17,9 +17,9 @@ import Foundation
class ElementwiseAddPreluKernel<P: PrecisionType>: Kernel, Computable { class ElementwiseAddPreluKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: ElementwiseAddMetalParam var metalParam: ElementwiseAddMetalParam
required init(device: MTLDevice, param: ElementwiseAddPreluParam<P>) { required init(device: MTLDevice, param: ElementwiseAddPreluParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: computePrecision) param.output.initTexture(device: device, inTranspose: param.inputX.transpose, computePrecision: GlobalConfig.shared.computePrecision)
param.alpha.initBuffer(device: device, precision: computePrecision) param.alpha.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
metalParam = ElementwiseAddMetalParam.init() metalParam = ElementwiseAddMetalParam.init()
...@@ -43,21 +43,21 @@ class ElementwiseAddPreluKernel<P: PrecisionType>: Kernel, Computable { ...@@ -43,21 +43,21 @@ class ElementwiseAddPreluKernel<P: PrecisionType>: Kernel, Computable {
metalParam.fast = 1 metalParam.fast = 1
} }
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "elementwise_add_channel_float") super.init(device: device, inFunctionName: "elementwise_add_channel_float", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "elementwise_add_element_float") super.init(device: device, inFunctionName: "elementwise_add_element_float", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "elementwise_add_prelu_float") super.init(device: device, inFunctionName: "elementwise_add_prelu_float", initContext: initContext)
} }
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "elementwise_add_channel_half") super.init(device: device, inFunctionName: "elementwise_add_channel_half", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "elementwise_add_channel_half") super.init(device: device, inFunctionName: "elementwise_add_channel_half", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "elementwise_add_channel_half") super.init(device: device, inFunctionName: "elementwise_add_channel_half", initContext: initContext)
} }
} else { } else {
fatalError() fatalError()
......
...@@ -13,76 +13,49 @@ ...@@ -13,76 +13,49 @@
limitations under the License. */ limitations under the License. */
import Foundation import Foundation
import Metal
class FetchParam<P: PrecisionType>: OpParam{
var output: FetchHolder
let input: Texture<P>
let scope: Scope
required init(opDesc: OpDesc, inScope: Scope) throws {
scope = inScope
do {
input = try FetchParam.inputX(inputs: opDesc.inputs, from: inScope)
output = FetchHolder.init(inCapacity: input.numel(), inDim: input.tensorDim.dims)
scope.setOutput(output: output)
} catch let error {
throw error
}
}
//typealias ParamPrecisionType = P
}
class FetchKernel<P: PrecisionType>: Kernel, Computable { class FetchKernel<P: PrecisionType>: Kernel, Computable {
func compute(commandBuffer: MTLCommandBuffer, param: FetchParam<P>) throws { required init(device: MTLDevice, param: FetchParam<P>, initContext: InitContext) {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil")
}
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setBuffer(param.output.resultBuffer!, offset: 0, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.input.metalTexture)
encoder.endEncoding()
}
required init(device: MTLDevice, param: FetchParam<P>) {
param.output.initBuffer(device: device) param.output.initBuffer(device: device)
if computePrecision == .Float16 { if GlobalConfig.shared.computePrecision == .Float16 {
if param.input.transpose == [0, 2, 3, 1] { if param.input.transpose == [0, 2, 3, 1] {
super.init(device: device, inFunctionName: "fetch_half") super.init(device: device, inFunctionName: "fetch_half", initContext: initContext)
} else if param.input.transpose == [0, 1, 2, 3] {
switch param.input.tensorDim.cout() {
case 1, 2:
super.init(device: device, inFunctionName: "fetch_1or2_half", initContext: initContext)
default:
fatalError(" not support ")
}
} else { } else {
// fatalError(" not support ") fatalError(" not support ")
super.init(device: device, inFunctionName: "fetch_placeholder_half")
print(" not support ")
} }
} else if computePrecision == .Float32 { } else if GlobalConfig.shared.computePrecision == .Float32 {
if param.input.transpose == [0, 2, 3, 1] { if param.input.transpose == [0, 2, 3, 1] {
super.init(device: device, inFunctionName: "fetch") super.init(device: device, inFunctionName: "fetch_float", initContext: initContext)
} else if param.input.transpose == [0, 1, 2, 3] {
switch param.input.tensorDim.cout() {
case 1, 2:
super.init(device: device, inFunctionName: "fetch_1or2_float", initContext: initContext)
default:
fatalError(" not support ")
}
} else { } else {
print(" not support ") fatalError(" not support ")
super.init(device: device, inFunctionName: "fetch_placeholder")
// fatalError(" not support ")
} }
} else { } else {
fatalError(" not support ") fatalError(" not support ")
} }
} }
}
class FetchOp<P: PrecisionType>: Operator< FetchKernel<P>, FetchParam<P>>, Runable, Creator, InferShaperable {
typealias OpType = FetchOp<P> func compute(commandBuffer: MTLCommandBuffer, param: FetchParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
func inferShape() { throw PaddleMobileError.predictError(message: " encode is nil")
print(para.input.dim)
}
func runImpl(device: MTLDevice, buffer: MTLCommandBuffer) throws {
do {
try kernel.compute(commandBuffer: buffer, param: para)
} catch let error {
throw error
} }
encoder.setTexture(param.input.metalTexture, index: 0)
encoder.setBuffer(param.output.resultBuffer!, offset: 0, index: 0)
encoder.dispatch(computePipline: pipline, outTexture: param.input.metalTexture)
encoder.endEncoding()
} }
} }
...@@ -26,8 +26,8 @@ class FlattenKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -26,8 +26,8 @@ class FlattenKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: FlattenMetalParam var metalParam: FlattenMetalParam
required init(device: MTLDevice, param: FlattenParam<P>) { required init(device: MTLDevice, param: FlattenParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, computePrecision: computePrecision) param.output.initTexture(device: device, computePrecision: GlobalConfig.shared.computePrecision)
var id: [Int32] = [1, 1, 1, 1] var id: [Int32] = [1, 1, 1, 1]
for i in 0..<param.input.tensorDim.cout() { for i in 0..<param.input.tensorDim.cout() {
id[4-param.input.tensorDim.cout()+i] = Int32(param.input.tensorDim[i]) id[4-param.input.tensorDim.cout()+i] = Int32(param.input.tensorDim[i])
...@@ -47,10 +47,10 @@ class FlattenKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -47,10 +47,10 @@ class FlattenKernel<P: PrecisionType>: Kernel, Computable{
let irank = param.input.tensorDim.cout() let irank = param.input.tensorDim.cout()
let orank = param.output.tensorDim.cout() let orank = param.output.tensorDim.cout()
assert(orank == 2) assert(orank == 2)
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "reshape_\(irank)_2_float") super.init(device: device, inFunctionName: "reshape_\(irank)_2_float", initContext: initContext)
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "reshape_\(irank)_2_half") super.init(device: device, inFunctionName: "reshape_\(irank)_2_half", initContext: initContext)
} else { } else {
fatalError() fatalError()
} }
......
...@@ -17,16 +17,16 @@ import Foundation ...@@ -17,16 +17,16 @@ import Foundation
class MulticlassNMSKernel<P: PrecisionType>: Kernel, Computable{ class MulticlassNMSKernel<P: PrecisionType>: Kernel, Computable{
let pipline1: MTLComputePipelineState let pipline1: MTLComputePipelineState
required init(device: MTLDevice, param: MulticlassNMSParam<P>) { required init(device: MTLDevice, param: MulticlassNMSParam<P>, initContext: InitContext) {
param.middleOutput.initBuffer(device: device) param.middleOutput.initBuffer(device: device)
param.bboxOutput.initBuffer(device: device) param.bboxOutput.initBuffer(device: device)
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
pipline1 = device.pipeLine(funcName: "nms_fetch_bbox", inPaddleMobileLib: true) pipline1 = device.pipeLine(funcName: "nms_fetch_bbox", metalLoadMode: initContext.metalLoadMode, metalLibPath: initContext.metalLibPath)
super.init(device: device, inFunctionName: "nms_fetch_result") super.init(device: device, inFunctionName: "nms_fetch_result", initContext: initContext)
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
pipline1 = device.pipeLine(funcName: "nms_fetch_bbox_half", inPaddleMobileLib: true) pipline1 = device.pipeLine(funcName: "nms_fetch_bbox_half", metalLoadMode: initContext.metalLoadMode, metalLibPath: initContext.metalLibPath)
super.init(device: device, inFunctionName: "nms_fetch_result_half") super.init(device: device, inFunctionName: "nms_fetch_result_half", initContext: initContext)
} else { } else {
fatalError( " unsupport precision " ) fatalError( " unsupport precision " )
} }
......
...@@ -26,8 +26,8 @@ struct PoolMetalParam { ...@@ -26,8 +26,8 @@ struct PoolMetalParam {
class PoolKernel<P: PrecisionType>: Kernel, Computable{ class PoolKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: PoolMetalParam var metalParam: PoolMetalParam
required init(device: MTLDevice, param: PoolParam<P>) { required init(device: MTLDevice, param: PoolParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision) param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: GlobalConfig.shared.computePrecision)
var poolType: Int32 var poolType: Int32
switch param.poolType { switch param.poolType {
...@@ -48,10 +48,10 @@ class PoolKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -48,10 +48,10 @@ class PoolKernel<P: PrecisionType>: Kernel, Computable{
poolType: poolType poolType: poolType
) )
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "pool") super.init(device: device, inFunctionName: "pool_float", initContext: initContext)
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "pool_half") super.init(device: device, inFunctionName: "pool_half", initContext: initContext)
} else { } else {
fatalError() fatalError()
} }
......
...@@ -15,24 +15,24 @@ ...@@ -15,24 +15,24 @@
import Foundation import Foundation
class PreluKernel<P: PrecisionType>: Kernel, Computable{ class PreluKernel<P: PrecisionType>: Kernel, Computable{
required init(device: MTLDevice, param: PreluParam<P>) { required init(device: MTLDevice, param: PreluParam<P>, initContext: InitContext) {
param.alpha.initBuffer(device: device, precision: computePrecision) param.alpha.initBuffer(device: device, precision: GlobalConfig.shared.computePrecision)
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision) param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: GlobalConfig.shared.computePrecision)
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "prelu_channel") super.init(device: device, inFunctionName: "prelu_channel", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "prelu_element") super.init(device: device, inFunctionName: "prelu_element", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "prelu_other") super.init(device: device, inFunctionName: "prelu_other", initContext: initContext)
} }
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
if param.mode == "channel" { if param.mode == "channel" {
super.init(device: device, inFunctionName: "prelu_channel_half") super.init(device: device, inFunctionName: "prelu_channel_half", initContext: initContext)
} else if param.mode == "element" { } else if param.mode == "element" {
super.init(device: device, inFunctionName: "prelu_element_half") super.init(device: device, inFunctionName: "prelu_element_half", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "prelu_other_half") super.init(device: device, inFunctionName: "prelu_other_half", initContext: initContext)
} }
} else { } else {
fatalError() fatalError()
......
...@@ -32,29 +32,28 @@ struct PriorBoxMetalParam { ...@@ -32,29 +32,28 @@ struct PriorBoxMetalParam {
class PriorBoxKernel<P: PrecisionType>: Kernel, Computable{ class PriorBoxKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: PriorBoxMetalParam! var metalParam: PriorBoxMetalParam!
required init(device: MTLDevice, param: PriorBoxParam<P>) { required init(device: MTLDevice, param: PriorBoxParam<P>, initContext: InitContext) {
let originDim = param.output.tensorDim; let originDim = param.output.tensorDim;
param.output.tensorDim = Dim.init(inDim: [1, originDim[0], originDim[1], originDim[2] * originDim[3]]) param.output.tensorDim = Dim.init(inDim: [1, originDim[0], originDim[1], originDim[2] * originDim[3]])
param.output.padToFourDim = Dim.init(inDim: [1, originDim[0], originDim[1], originDim[2] * originDim[3]]) param.output.padToFourDim = Dim.init(inDim: [1, originDim[0], originDim[1], originDim[2] * originDim[3]])
param.output.initTexture(device: device, inTranspose: [0, 1, 2, 3], computePrecision: computePrecision) param.output.initTexture(device: device, inTranspose: [0, 1, 2, 3], computePrecision: GlobalConfig.shared.computePrecision)
param.outputVariances.initTexture(device: device, inTranspose: [2, 0, 1, 3], computePrecision: computePrecision) param.outputVariances.initTexture(device: device, inTranspose: [2, 0, 1, 3], computePrecision: GlobalConfig.shared.computePrecision)
if GlobalConfig.shared.computePrecision == .Float32 {
if computePrecision == .Float32 {
if param.min_max_aspect_ratios_order { if param.min_max_aspect_ratios_order {
super.init(device: device, inFunctionName: "prior_box_MinMaxAspectRatiosOrder") super.init(device: device, inFunctionName: "prior_box_MinMaxAspectRatiosOrder", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "prior_box") super.init(device: device, inFunctionName: "prior_box", initContext: initContext)
} }
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
if param.min_max_aspect_ratios_order { if param.min_max_aspect_ratios_order {
super.init(device: device, inFunctionName: "prior_box_MinMaxAspectRatiosOrder_half") super.init(device: device, inFunctionName: "prior_box_MinMaxAspectRatiosOrder_half", initContext: initContext)
} else { } else {
super.init(device: device, inFunctionName: "prior_box_half") super.init(device: device, inFunctionName: "prior_box_half", initContext: initContext)
} }
} else { } else {
fatalError() fatalError()
...@@ -105,12 +104,12 @@ class PriorBoxKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -105,12 +104,12 @@ class PriorBoxKernel<P: PrecisionType>: Kernel, Computable{
} }
} }
if computePrecision == .Float16 { if GlobalConfig.shared.computePrecision == .Float16 {
let buffer = device.makeBuffer(length: outputAspectRatior.count * MemoryLayout<Float16>.size) let buffer = device.makeBuffer(length: outputAspectRatior.count * MemoryLayout<Float16>.size)
float32ToFloat16(input: &outputAspectRatior, output:(buffer?.contents())!, count: outputAspectRatior.count) float32ToFloat16(input: &outputAspectRatior, output:(buffer?.contents())!, count: outputAspectRatior.count)
param.newAspectRatios = buffer param.newAspectRatios = buffer
} else if computePrecision == .Float32 { } else if GlobalConfig.shared.computePrecision == .Float32 {
let buffer = device.makeBuffer(bytes: outputAspectRatior, length: outputAspectRatior.count * MemoryLayout<Float32>.size, options: []) let buffer = device.makeBuffer(bytes: outputAspectRatior, length: outputAspectRatior.count * MemoryLayout<Float32>.size, options: [])
param.newAspectRatios = buffer param.newAspectRatios = buffer
} else { } else {
......
...@@ -25,11 +25,11 @@ class ReluKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -25,11 +25,11 @@ class ReluKernel<P: PrecisionType>: Kernel, Computable{
encoder.endEncoding() encoder.endEncoding()
} }
required init(device: MTLDevice, param: ReluParam<P>) { required init(device: MTLDevice, param: ReluParam<P>, initContext: InitContext) {
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "relu") super.init(device: device, inFunctionName: "relu", initContext: initContext)
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "relu_half") super.init(device: device, inFunctionName: "relu_half", initContext: initContext)
} else { } else {
fatalError() fatalError()
} }
......
...@@ -31,8 +31,8 @@ class ReshapeKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -31,8 +31,8 @@ class ReshapeKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: ReshapeMetalParam var metalParam: ReshapeMetalParam
required init(device: MTLDevice, param: ReshapeParam<P>) { required init(device: MTLDevice, param: ReshapeParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, computePrecision: computePrecision) param.output.initTexture(device: device, computePrecision: GlobalConfig.shared.computePrecision)
var id: [Int32] = [1, 1, 1, 1] var id: [Int32] = [1, 1, 1, 1]
for i in 0..<param.input.tensorDim.cout() { for i in 0..<param.input.tensorDim.cout() {
id[4-param.input.tensorDim.cout()+i] = Int32(param.input.tensorDim[i]) id[4-param.input.tensorDim.cout()+i] = Int32(param.input.tensorDim[i])
...@@ -51,23 +51,23 @@ class ReshapeKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -51,23 +51,23 @@ class ReshapeKernel<P: PrecisionType>: Kernel, Computable{
) )
let irank = param.input.tensorDim.cout() let irank = param.input.tensorDim.cout()
let orank = param.output.tensorDim.cout() let orank = param.output.tensorDim.cout()
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "reshape_\(irank)_\(orank)_float") super.init(device: device, inFunctionName: "reshape_\(irank)_\(orank)_float", initContext: initContext)
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "reshape_\(irank)_\(orank)_half") super.init(device: device, inFunctionName: "reshape_\(irank)_\(orank)_half", initContext: initContext)
} else { } else {
fatalError() fatalError()
} }
} }
required init(device: MTLDevice, testParam: ReshapeTestParam) { required init(device: MTLDevice, testParam: ReshapeTestParam, initContext: InitContext) {
metalParam = ReshapeMetalParam.init( metalParam = ReshapeMetalParam.init(
idim: (0, 0, 0, 0), idim: (0, 0, 0, 0),
itrans: (0, 0, 0, 0), itrans: (0, 0, 0, 0),
odim: (0, 0, 0, 0), odim: (0, 0, 0, 0),
otrans: (0, 0, 0, 0) otrans: (0, 0, 0, 0)
) )
super.init(device: device, inFunctionName: "reshape") super.init(device: device, inFunctionName: "reshape", initContext: initContext)
} }
func compute(commandBuffer: MTLCommandBuffer, param: ReshapeParam<P>) throws { func compute(commandBuffer: MTLCommandBuffer, param: ReshapeParam<P>) throws {
......
...@@ -20,6 +20,17 @@ struct ResizeBilinearMetalParam { ...@@ -20,6 +20,17 @@ struct ResizeBilinearMetalParam {
} }
class ResizeBilinearKernel<P: PrecisionType>: Kernel, Computable{ class ResizeBilinearKernel<P: PrecisionType>: Kernel, Computable{
required init(device: MTLDevice, param: ResizeBilinearParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: GlobalConfig.shared.computePrecision)
if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "resize_bilinear", initContext: initContext)
} else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "resize_bilinear_half", initContext: initContext)
} else {
fatalError()
}
}
func compute(commandBuffer: MTLCommandBuffer, param: ResizeBilinearParam<P>) throws { func compute(commandBuffer: MTLCommandBuffer, param: ResizeBilinearParam<P>) throws {
guard let encoder = commandBuffer.makeComputeCommandEncoder() else { guard let encoder = commandBuffer.makeComputeCommandEncoder() else {
throw PaddleMobileError.predictError(message: " encode is nil") throw PaddleMobileError.predictError(message: " encode is nil")
...@@ -35,15 +46,6 @@ class ResizeBilinearKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -35,15 +46,6 @@ class ResizeBilinearKernel<P: PrecisionType>: Kernel, Computable{
encoder.endEncoding() encoder.endEncoding()
} }
required init(device: MTLDevice, param: ResizeBilinearParam<P>) {
param.output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision)
if computePrecision == .Float32 {
super.init(device: device, inFunctionName: "resize_bilinear")
} else if computePrecision == .Float16 {
super.init(device: device, inFunctionName: "resize_bilinear_half")
} else {
fatalError()
}
}
} }
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
import Foundation
class ScaleKernel: CusomKernel {
init(device: MTLDevice, shape: Shape, metalLoadMode: MetalLoadMode, metalLibPath: String?) {
if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "scale", outputDim: shape, metalLoadModel: metalLoadMode, metalLibPath: metalLibPath)
} else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "scale_half", outputDim: shape, metalLoadModel: metalLoadMode, metalLibPath: metalLibPath)
} else {
fatalError(" unsupport ")
}
}
}
...@@ -28,12 +28,12 @@ class ShapeKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -28,12 +28,12 @@ class ShapeKernel<P: PrecisionType>: Kernel, Computable{
// encoder.endEncoding() // encoder.endEncoding()
} }
required init(device: MTLDevice, param: ShapeParam<P>) { required init(device: MTLDevice, param: ShapeParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, computePrecision: computePrecision) param.output.initTexture(device: device, computePrecision: GlobalConfig.shared.computePrecision)
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "shape") super.init(device: device, inFunctionName: "shape", initContext: initContext)
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "shape_half") super.init(device: device, inFunctionName: "shape_half", initContext: initContext)
} else { } else {
fatalError() fatalError()
} }
......
...@@ -22,16 +22,16 @@ struct SoftmaxMetalParam { ...@@ -22,16 +22,16 @@ struct SoftmaxMetalParam {
class SoftmaxKernel<P: PrecisionType>: Kernel, Computable{ class SoftmaxKernel<P: PrecisionType>: Kernel, Computable{
var metalParam: SoftmaxMetalParam var metalParam: SoftmaxMetalParam
required init(device: MTLDevice, param: SoftmaxParam<P>) { required init(device: MTLDevice, param: SoftmaxParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, computePrecision: computePrecision) param.output.initTexture(device: device, computePrecision: GlobalConfig.shared.computePrecision)
metalParam = SoftmaxMetalParam.init( metalParam = SoftmaxMetalParam.init(
N: Int32(param.input.tensorDim[0]), N: Int32(param.input.tensorDim[0]),
K: Int32(param.input.tensorDim[1]) K: Int32(param.input.tensorDim[1])
) )
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "softmax_float") super.init(device: device, inFunctionName: "softmax_float", initContext: initContext)
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "softmax_half") super.init(device: device, inFunctionName: "softmax_half", initContext: initContext)
} else { } else {
fatalError() fatalError()
} }
......
...@@ -37,13 +37,13 @@ class SplitKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -37,13 +37,13 @@ class SplitKernel<P: PrecisionType>: Kernel, Computable{
encoder.endEncoding() encoder.endEncoding()
} }
required init(device: MTLDevice, param: SplitParam<P>) { required init(device: MTLDevice, param: SplitParam<P>, initContext: InitContext) {
// param.output.initTexture(device: device, computePrecision: computePrecision) // param.output.initTexture(device: device, computePrecision: computePrecision)
let num = param.outputList.count let num = param.outputList.count
let rank = param.input.tensorDim.cout() let rank = param.input.tensorDim.cout()
assert(num >= 2 && num <= 4) assert(num >= 2 && num <= 4)
for output in param.outputList { for output in param.outputList {
output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: computePrecision) output.initTexture(device: device, inTranspose: param.input.transpose, computePrecision: GlobalConfig.shared.computePrecision)
} }
smp = SplitMetalParam.init() smp = SplitMetalParam.init()
smp.idim = (Int32(param.input.dim[0]), Int32(param.input.dim[1]), Int32(param.input.dim[2]), Int32(param.input.dim[3])) smp.idim = (Int32(param.input.dim[0]), Int32(param.input.dim[1]), Int32(param.input.dim[2]), Int32(param.input.dim[3]))
...@@ -81,10 +81,10 @@ class SplitKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -81,10 +81,10 @@ class SplitKernel<P: PrecisionType>: Kernel, Computable{
if v == "normal" { if v == "normal" {
fatalError("split unsupported") fatalError("split unsupported")
} }
if computePrecision == .Float32 { if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "split_\(rank)_\(num)_\(v)_float") super.init(device: device, inFunctionName: "split_\(rank)_\(num)_\(v)_float", initContext: initContext)
} else if computePrecision == .Float16 { } else if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "split_\(rank)_\(num)_\(v)_half") super.init(device: device, inFunctionName: "split_\(rank)_\(num)_\(v)_half", initContext: initContext)
} else { } else {
fatalError() fatalError()
} }
......
...@@ -33,12 +33,12 @@ class Texture2DTo2DArrayKernel<P: PrecisionType>: Kernel, Computable{ ...@@ -33,12 +33,12 @@ class Texture2DTo2DArrayKernel<P: PrecisionType>: Kernel, Computable{
encoder.endEncoding() encoder.endEncoding()
} }
required init(device: MTLDevice, param: FeedParam<P>) { required init(device: MTLDevice, param: FeedParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: computePrecision) param.output.initTexture(device: device, inTranspose: [0, 2, 3, 1], computePrecision: GlobalConfig.shared.computePrecision)
if computePrecision == .Float16 { if GlobalConfig.shared.computePrecision == .Float16 {
super.init(device: device, inFunctionName: "texture2d_to_2d_array_half") super.init(device: device, inFunctionName: "texture2d_to_2d_array_half", initContext: initContext)
} else if computePrecision == .Float32 { } else if GlobalConfig.shared.computePrecision == .Float32 {
super.init(device: device, inFunctionName: "texture2d_to_2d_array") super.init(device: device, inFunctionName: "texture2d_to_2d_array", initContext: initContext)
} else { } else {
fatalError() fatalError()
} }
......
...@@ -22,8 +22,8 @@ struct TransposeMetalParam { ...@@ -22,8 +22,8 @@ struct TransposeMetalParam {
class TransposeKernel<P: PrecisionType>: Kernel, Computable { class TransposeKernel<P: PrecisionType>: Kernel, Computable {
var metalParam: TransposeMetalParam = TransposeMetalParam.init() var metalParam: TransposeMetalParam = TransposeMetalParam.init()
required init(device: MTLDevice, param: TransposeParam<P>) { required init(device: MTLDevice, param: TransposeParam<P>, initContext: InitContext) {
param.output.initTexture(device: device, computePrecision: computePrecision) param.output.initTexture(device: device, computePrecision: GlobalConfig.shared.computePrecision)
let rank = param.input.tensorDim.cout() let rank = param.input.tensorDim.cout()
var axis: [Int] = [0, 1, 2, 3] var axis: [Int] = [0, 1, 2, 3]
for i in 0..<param.axis.count { for i in 0..<param.axis.count {
...@@ -43,13 +43,13 @@ class TransposeKernel<P: PrecisionType>: Kernel, Computable { ...@@ -43,13 +43,13 @@ class TransposeKernel<P: PrecisionType>: Kernel, Computable {
metalParam.oC = Int32(param.output.dim[3]) metalParam.oC = Int32(param.output.dim[3])
metalParam.axis = (Int32(naxis[0]), Int32(naxis[1]), Int32(naxis[2]), Int32(naxis[3])) metalParam.axis = (Int32(naxis[0]), Int32(naxis[1]), Int32(naxis[2]), Int32(naxis[3]))
var kernelFunc = "transpose_undefined" var kernelFunc = "transpose_undefined"
if computePrecision == .Float16 { if GlobalConfig.shared.computePrecision == .Float16 {
if param.input.transpose == axis { if param.input.transpose == axis {
kernelFunc = "transpose_copy_half" kernelFunc = "transpose_copy_half"
} else { } else {
kernelFunc = "transpose_\(rank)_half" kernelFunc = "transpose_\(rank)_half"
} }
} else if computePrecision == .Float32 { } else if GlobalConfig.shared.computePrecision == .Float32 {
if param.input.transpose == axis { if param.input.transpose == axis {
kernelFunc = "transpose_copy_float" kernelFunc = "transpose_copy_float"
} else { } else {
...@@ -60,7 +60,7 @@ class TransposeKernel<P: PrecisionType>: Kernel, Computable { ...@@ -60,7 +60,7 @@ class TransposeKernel<P: PrecisionType>: Kernel, Computable {
} }
print("===========>", kernelFunc) print("===========>", kernelFunc)
print(metalParam) print(metalParam)
super.init(device: device, inFunctionName: kernelFunc) super.init(device: device, inFunctionName: kernelFunc, initContext: initContext)
} }
func compute(commandBuffer: MTLCommandBuffer, param: TransposeParam<P>) throws { func compute(commandBuffer: MTLCommandBuffer, param: TransposeParam<P>) throws {
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void batchnorm(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 * nscale [[buffer(0)]],
const device float4 * nbias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
const float4 input = inTexture.read(gid.xy, gid.z);
float4 output = input * nscale[gid.z] + nbias[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void batchnorm_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 * newScale [[buffer(0)]],
const device half4 * newBias [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
const half4 input = inTexture.read(gid.xy, gid.z);
half4 output = input * newScale[gid.z] + newBias[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
//
// BatchNormRelu.metal
// paddle-mobile
//
#include <metal_stdlib>
using namespace metal;
struct MetalConvParam {
short offsetX;
short offsetY;
short offsetZ;
ushort strideX;
ushort strideY;
};
kernel void batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *new_scale [[buffer(0)]],
const device float4 *new_biase [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
float4 input;
float4 output;
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
input = inTexture.sample(sample, gid.x, gid.y, gid.z);
output = fmax(input * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define FUNC(f, p) CONCAT2_(f, p)
#define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC(bilinear_interp, P)(texture2d_array<P, access::read> input [[texture(0)]],
texture2d_array<P, access::write> output [[texture(1)]],
constant bilinear_interp_param & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) r;
if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
r = input.read(gid.xy, gid.z);
} else {
P w = gid.x * pm.ratio_w;
P h = gid.y * pm.ratio_h;
uint w0 = w, h0 = h;
uint w1 = w0 + 1, h1 = h0 + 1;
P w1lambda = w - w0, h1lambda = h - h0;
P w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda;
if (w1 >= input.get_width()) w1 = w0;
if (h1 >= input.get_height()) h1 = h0;
VECTOR(P, 4) r0 = input.read(uint2(w0, h0), gid.z);
VECTOR(P, 4) r1 = input.read(uint2(w1, h0), gid.z);
VECTOR(P, 4) r2 = input.read(uint2(w0, h1), gid.z);
VECTOR(P, 4) r3 = input.read(uint2(w1, h1), gid.z);
r = h2lambda * (w2lambda * r0 + w1lambda * r1)
+ h1lambda * (w2lambda * r2 + w1lambda * r3);
}
output.write(r, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct bilinear_interp_param {
float ratio_h;
float ratio_w;
};
#define P float
#include "BilinearInterp.inc.metal"
#undef P
#define P half
#include "BilinearInterp.inc.metal"
#undef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define FUNC(f, p) CONCAT2_(f, p)
#define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC(boxcoder, P)(texture2d_array<P, access::read> priorBox [[texture(0)]],
texture2d_array<P, access::read> priorBoxVar [[texture(1)]],
texture2d_array<P, access::read> targetBox [[texture(2)]],
texture2d_array<P, access::write> output[[texture(3)]],
uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) p = priorBox.read(uint2(0, gid.x), gid.z);
VECTOR(P, 4) pv = priorBoxVar.read(uint2(0, gid.x), gid.z);
VECTOR(P, 4) t;
t[0] = targetBox.read(uint2(0, gid.x), gid.z)[0];
t[1] = targetBox.read(uint2(1, gid.x), gid.z)[0];
t[2] = targetBox.read(uint2(2, gid.x), gid.z)[0];
t[3] = targetBox.read(uint2(3, gid.x), gid.z)[0];
P px = (p.x + p.z) / 2;
P py = (p.y + p.w) / 2;
P pw = p.z - p.x;
P ph = p.w - p.y;
P tx = pv.x * t.x * pw + px;
P ty = pv.y * t.y * ph + py;
P tw = exp(pv.z * t.z) * pw;
P th = exp(pv.w * t.w) * ph;
VECTOR(P, 4) r;
r.x = tx - tw / 2;
r.y = ty - th / 2;
r.z = tx + tw / 2;
r.w = ty + th / 2;
output.write(r, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
#define P float
#include "BoxCoder.inc.metal"
#undef P
#define P half
#include "BoxCoder.inc.metal"
#undef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
inline void xyzn2abcd_1(int xyzn[4], int abcd[4]) {
abcd[0] = abcd[1] = abcd[2] = 0;
abcd[3] = xyzn[0] * 4 + xyzn[3];
}
inline void xyzn2abcd_2(int xyzn[4], int abcd[4]) {
abcd[0] = abcd[1] = 0;
abcd[2] = xyzn[1];
abcd[3] = xyzn[0] * 4 + xyzn[3];
}
inline void xyzn2abcd_3(int xyzn[4], int abcd[4]) {
abcd[0] = 0;
abcd[3] = xyzn[0];
abcd[2] = xyzn[1];
abcd[1] = xyzn[2] * 4 + xyzn[3];
}
inline void xyzn2abcd_4(int C, int xyzn[4], int abcd[4]) {
abcd[2] = xyzn[0];
abcd[1] = xyzn[1];
uint t = xyzn[2] * 4 + xyzn[3];
abcd[0] = t / C;
abcd[3] = t % C;
}
inline void abcd2xyzn_1(int abcd[4], int xyzn[4]) {
xyzn[1] = xyzn[2] = 0;
xyzn[0] = abcd[3] / 4;
xyzn[1] = abcd[3] % 4;
}
inline void abcd2xyzn_2(int abcd[4], int xyzn[4]) {
xyzn[2] = 0;
xyzn[1] = abcd[2];
xyzn[0] = abcd[3] / 4;
xyzn[3] = abcd[3] % 4;
}
inline void abcd2xyzn_3(int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[3];
xyzn[1] = abcd[2];
xyzn[2] = abcd[1] / 4;
xyzn[3] = abcd[1] % 4;
}
inline void abcd2xyzn_4(int C, int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[2];
xyzn[1] = abcd[1];
uint t = abcd[0] * C + abcd[3];
xyzn[2] = t / 4;
xyzn[3] = t % 4;
}
inline void xyzn2abcd(int C, int xyzn[4], int abcd[4]) {
abcd[2] = xyzn[0];
abcd[1] = xyzn[1];
uint t = xyzn[2] * 4 + xyzn[3];
abcd[0] = t / C;
abcd[3] = t % C;
}
inline void abcd2xyzn(int C, int abcd[4], int xyzn[4]) {
xyzn[0] = abcd[2];
xyzn[1] = abcd[1];
uint t = abcd[0] * C + abcd[3];
xyzn[2] = t / 4;
xyzn[3] = t % 4;
}
inline int32_t abcd2index(int32_t dim[4], int32_t abcd[4]) {
int32_t r = abcd[0];
r = r * dim[1] + abcd[1];
r = r * dim[2] + abcd[2];
r = r * dim[3] + abcd[3];
return r;
}
inline void index2abcd(int32_t dim[4], int32_t ind, int32_t abcd[4]) {
abcd[3] = ind % dim[3]; ind /= dim[3];
abcd[2] = ind % dim[2]; ind /= dim[2];
abcd[1] = ind % dim[1]; ind /= dim[1];
abcd[0] = ind;
}
inline void trans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) {
for (int i = 0; i < 4; i++) {
opos[i] = ipos[trans[i]];
}
}
inline void invtrans(int32_t trans[4], int32_t ipos[4], int32_t opos[4]) {
for (int i = 0; i < 4; i++) {
opos[trans[i]] = ipos[i];
}
}
struct MetalConvParam {
short offsetX;
short offsetY;
short offsetZ;
ushort strideX;
ushort strideY;
ushort dilationX;
ushort dilationY;
};
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d
#define CONCAT5_(a, b, c, d, e) a ## _ ## b ## _ ## c ## _ ## d ## _ ## e
#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p)
#define VECTOR(p, n) CONCAT2(p, n)
#define FUNC_R(f, r) CONCAT2_(f, r)
#if V == VX
#define VV x
#elif V == VY
#define VV y
#elif V == VZ
#define VV z
#else
#define VV normal
#endif
#if V == VNORMAL
//kernel void FUNC(concat, R, N, normal, P)(array<texture2d_array<P, access::read>, N> in [[texture(0)]],
// texture2d_array<P, access::read> out_x [[texture(N)]],
// texture2d_array<P, access::write> out [[texture(N+1)]],
// constant ConcatParam & pm [[buffer(0)]],
// uint3 gid [[thread_position_in_grid]]) {
//}
kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
texture2d_array<P, access::read> in1 [[texture(1)]],
#if N >= 3
texture2d_array<P, access::read> in2 [[texture(2)]],
#endif
#if N >= 4
texture2d_array<P, access::read> in3 [[texture(3)]],
#endif
#if N >= 5
texture2d_array<P, access::read> in4 [[texture(4)]],
#endif
#if N >= 6
texture2d_array<P, access::read> in5 [[texture(5)]],
#endif
texture2d_array<P, access::read> inx [[texture(N)]],
texture2d_array<P, access::write> out [[texture(N+1)]],
constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
ConcatParam cp = pm;
int xyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}, abcd[4], oxyzn[4];
VECTOR(P, 4) r = inx.read(gid.xy, gid.z);
for (int i = 0; i < 4; i++) {
xyzn[3] = i;
#if R == 4
xyzn2abcd_4(cp.odim[3], xyzn, abcd);
#else
FUNC_R(xyzn2abcd, R)(xyzn, abcd);
#endif
int k = abcd[cp.axis] - cp.offset;
if (k < 0) continue;
int j = 0;
for (; j < N; j++) {
if (k < cp.vdim[j]) {
break;
}
k -= cp.vdim[j];
}
if (j == N) {
continue;
}
int ta = cp.odim[cp.axis];
abcd[cp.axis] = k;
cp.odim[cp.axis] = cp.vdim[j];
#if R == 4
abcd2xyzn_4(cp.odim[3], abcd, oxyzn);
#else
FUNC_R(abcd2xyzn, R)(abcd, oxyzn);
#endif
cp.odim[cp.axis] = ta;
switch (j) {
case 0: r[i] = in0.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
case 1: r[i] = in1.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#if N >= 3
case 2: r[i] = in2.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif
#if N >= 4
case 3: r[i] = in3.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif
#if N >= 5
case 4: r[i] = in4.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif
#if N >= 6
case 5: r[i] = in5.read(uint2(oxyzn[0], oxyzn[1]), oxyzn[2])[oxyzn[3]]; break;
#endif
}
}
out.write(r, gid.xy, gid.z);
}
#endif // V == NORMAL
#if V == VX
kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
texture2d_array<P, access::read> in1 [[texture(1)]],
#if N >= 3
texture2d_array<P, access::read> in2 [[texture(2)]],
#endif // N >= 3
#if N >= 4
texture2d_array<P, access::read> in3 [[texture(3)]],
#endif // N >= 4
#if N >= 5
texture2d_array<P, access::read> in4 [[texture(4)]],
#endif // N >= 5
#if N >= 6
texture2d_array<P, access::read> in5 [[texture(5)]],
#endif // N >= 6
texture2d_array<P, access::write> out [[texture(N)]],
constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
int x = gid.x - pm.offset;
if (x < 0) return;
if (x < pm.vdim[0]) {
VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z);
return;
}
x -= pm.vdim[0];
if (x < pm.vdim[1]) {
VECTOR(P, 4) r = in1.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#if N >= 3
x -= pm.vdim[1];
if (x < pm.vdim[2]) {
VECTOR(P, 4) r = in2.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 3
#if N >= 4
x -= pm.vdim[2];
if (x < pm.vdim[3]) {
VECTOR(P, 4) r = in3.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 4
#if N >= 5
x -= pm.vdim[3];
if (x < pm.vdim[4]) {
VECTOR(P, 4) r = in4.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 5
#if N >= 6
x -= pm.vdim[4];
if (x < pm.vdim[5]) {
VECTOR(P, 4) r = in5.read(uint2(x, gid.y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 6
}
#endif // V == VX
#if V == VY
kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
texture2d_array<P, access::read> in1 [[texture(1)]],
#if N >= 3
texture2d_array<P, access::read> in2 [[texture(2)]],
#endif // N >= 3
#if N >= 4
texture2d_array<P, access::read> in3 [[texture(3)]],
#endif // N >= 4
#if N >= 5
texture2d_array<P, access::read> in4 [[texture(4)]],
#endif // N >= 5
#if N >= 6
texture2d_array<P, access::read> in5 [[texture(5)]],
#endif // N >= 6
texture2d_array<P, access::write> out [[texture(N)]],
constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
int y = gid.y - pm.offset;
if (y < 0) return;
if (y < pm.vdim[0]) {
VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z);
return;
}
y -= pm.vdim[0];
if (y < pm.vdim[1]) {
VECTOR(P, 4) r = in1.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#if N >= 3
y -= pm.vdim[1];
if (y < pm.vdim[2]) {
VECTOR(P, 4) r = in2.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 3
#if N >= 4
y -= pm.vdim[2];
if (y < pm.vdim[3]) {
VECTOR(P, 4) r = in3.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 4
#if N >= 5
y -= pm.vdim[3];
if (y < pm.vdim[4]) {
VECTOR(P, 4) r = in4.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 5
#if N >= 6
y -= pm.vdim[4];
if (y < pm.vdim[5]) {
VECTOR(P, 4) r = in5.read(uint2(gid.x, y), gid.z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 6
}
#endif // V == VY
#if V == VZ
kernel void FUNC(concat, R, N, VV, P)(texture2d_array<P, access::read> in0 [[texture(0)]],
texture2d_array<P, access::read> in1 [[texture(1)]],
#if N >= 3
texture2d_array<P, access::read> in2 [[texture(2)]],
#endif // N >= 3
#if N >= 4
texture2d_array<P, access::read> in3 [[texture(3)]],
#endif // N >= 4
#if N >= 5
texture2d_array<P, access::read> in4 [[texture(4)]],
#endif // N >= 5
#if N >= 6
texture2d_array<P, access::read> in5 [[texture(5)]],
#endif // N >= 6
texture2d_array<P, access::write> out [[texture(N)]],
constant ConcatParam & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
int z = gid.z - pm.offset;
if (z < 0) return;
if (z < pm.vdim[0]) {
VECTOR(P, 4) r = in0.read(gid.xy, gid.z);
out.write(r, gid.xy, gid.z);
return;
}
z -= pm.vdim[0];
if (z < pm.vdim[1]) {
VECTOR(P, 4) r = in1.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#if N >= 3
z -= pm.vdim[1];
if (z < pm.vdim[2]) {
VECTOR(P, 4) r = in2.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 3
#if N >= 4
z -= pm.vdim[2];
if (z < pm.vdim[3]) {
VECTOR(P, 4) r = in3.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 4
#if N >= 5
z -= pm.vdim[3];
if (z < pm.vdim[4]) {
VECTOR(P, 4) r = in4.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 5
#if N >= 6
z -= pm.vdim[4];
if (z < pm.vdim[5]) {
VECTOR(P, 4) r = in5.read(gid.xy, z);
out.write(r, gid.xy, gid.z);
return;
}
#endif // N >= 6
}
#endif // V == VZ
#undef VV
#endif // #ifdef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct ConcatParam {
int32_t odim[4];
int32_t axis;
int32_t offset;
int32_t trans[4];
int32_t vdim[6];
};
#define VNORMAL 1
#define VX 2
#define VY 3
#define VZ 4
// >> fast mode
// only support concat_{2,3,4}_{2,3,4,5,6}_y_{float,half}
// only support concat_{3,4}_{2,3,4,5,6}_x_{float,half}
// only support concat_{1,2,3,4}_{2,3,4,5,6}_z_{float,half}
// >> normal mode (loop mode)
// ssd-ar: (R=4, N=3, V=z), (R=3, N=2, V=y), (R=2, N=5, V=x), (R=3, N=5, V=x)
// ssd: (R=2, N=6, V=y), (R=3, N=6, V=y)
// genet: (R=4, N=2, V=normal)
// ssd-ar: (R=3, N=5, V=x)
#define V VX
#define R 3
#define N 5
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd-ar: (R=2, N=5, V=x)
#define V VX
#define R 2
#define N 5
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd-ar: (R=3, N=2, V=y)
#define V VY
#define R 3
#define N 2
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd-ar: (R=4, N=3, V=z)
#define V VZ
#define R 4
#define N 3
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd: (R=2, N=6, V=y)
#define V VY
#define R 2
#define N 6
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
// ssd: (R=3, N=6, V=y)
#define V VY
#define R 3
#define N 6
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
#define V VNORMAL
#define R 4
#define N 2
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
#define V VY
#define R 2
#define N 2
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
#define V VY
#define R 2
#define N 5
#define P float
#include "ConcatKernel.inc.metal"
#undef P
#define P half
#include "ConcatKernel.inc.metal"
#undef P
#undef N
#undef R
#undef V
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
kernel void conv_add_batch_norm_relu_1x1_half(
texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device half4 *new_scale [[buffer(3)]],
const device half4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void conv_add_batch_norm_relu_3x3_half(
texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device half4 *new_scale [[buffer(3)]],
const device half4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void depthwise_conv_add_batch_norm_relu_3x3_half(
texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
const device half4 *new_scale [[buffer(3)]],
const device half4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
half4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = fmax((output + float4(biase[gid.z])) * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
/*---------------------------------------------*/
kernel void conv_add_batch_norm_relu_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_add_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
const device float4 *new_scale [[buffer(3)]],
const device float4 *new_biase [[buffer(4)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = fmax((output + biase[gid.z]) * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
#pragma mark - convAdd
kernel void conv_add_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = biase[gid.z];
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = biase[gid.z];
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_5x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = biase[gid.z];
ushort dilation_y = param.dilationY;
float4 input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 2 * dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 2 * dilation_y), i);
for (int j = 0; j < 5; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_1x5(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = biase[gid.z];
ushort dilation_x = param.dilationX;
float4 input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 2 * dilation_x, posInInput.y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x + 2 * dilation_x, posInInput.y), i);
for (int j = 0; j < 5; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_add_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = biase[gid.z];
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
#pragma mark - half
kernel void conv_add_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z];
half4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z];
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
half4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i);
for (int j = 0; j < 9; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(float4(input[j]), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(float4(input[j]), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(float4(input[j]), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(float4(input[j]), float4(weight_w));
}
}
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_add_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
half4 output = biase[gid.z];
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
half4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_5x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z];
ushort dilation_y = param.dilationY;
half4 input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 2 * dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 2 * dilation_y), i);
for (int j = 0; j < 5; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_add_1x5_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
half4 output = biase[gid.z];
ushort dilation_x = param.dilationX;
half4 input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 2 * dilation_x, posInInput.y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x + 2 * dilation_x, posInInput.y), i);
for (int j = 0; j < 5; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + float4(biase[gid.z]);
outTexture.write(output, gid.xy, gid.z);
}
kernel void test_conv_add_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *biase [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
if (gid.x > 0 || gid.y > 0 || gid.z > 0) { return; }
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + biase[gid.z];
outTexture.write(output, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#include "Macro.metal"
#pragma mark - convAdd
kernel void FUNC3_(conv_add_1x1, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device VECTOR(P, 4) *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
VECTOR(P, 4) output = biase[gid.z];
VECTOR(P, 4) input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample,float2(posInInput.x, posInInput.y), i);
VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
// output = output + float4(biase[gid.z]);
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
kernel void FUNC3_(conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device VECTOR(P, 4) *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
VECTOR(P, 4) output = biase[gid.z];
ushort dilation_x = param.dilationX;
ushort dilation_y = param.dilationY;
VECTOR(P, 4) input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y - dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y - dilation_y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y + dilation_y), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y + dilation_y), i);
for (int j = 0; j < 9; ++j) {
VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
// output = output + float4(biase[gid.z]);
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
kernel void FUNC3_(conv_add_5x1, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device VECTOR(P, 4) *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
VECTOR(P, 4) output = biase[gid.z];;
ushort dilation_y = param.dilationY;
VECTOR(P, 4) input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 2 * dilation_y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - dilation_y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + dilation_y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 2 * dilation_y), i);
for (int j = 0; j < 5; ++j) {
VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
kernel void FUNC3_(conv_add_1x5, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device VECTOR(P, 4) *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 5;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
VECTOR(P, 4) output = biase[gid.z];
ushort dilation_x = param.dilationX;
VECTOR(P, 4) input[5];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 2 * dilation_x, posInInput.y), i);
input[1] = inTexture.sample(sample, float2(posInInput.x - dilation_x, posInInput.y), i);
input[2] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[3] = inTexture.sample(sample, float2(posInInput.x + dilation_x, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x + 2 * dilation_x, posInInput.y), i);
for (int j = 0; j < 5; ++j) {
VECTOR(P, 4) weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
VECTOR(P, 4) weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
VECTOR(P, 4) weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
VECTOR(P, 4) weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
kernel void FUNC3_(depthwise_conv_add_3x3, PRELU_TYPE, P)(texture2d_array<P, access::sample> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device P *weights [[buffer(1)]],
const device VECTOR(P, 4) *biase [[buffer(2)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(3)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(3)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
VECTOR(P, 4) output = biase[gid.z];
VECTOR(P, 4) inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
VECTOR(P, 4) input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(VECTOR(P, 4)(output), gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
#define P float
#define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE prelu_channel
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT prelu_element
#define PRELU_TYPE prelu_element
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER prelu_other
#define PRELU_TYPE prelu_other
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
#define P half
#define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE prelu_channel
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT prelu_element
#define PRELU_TYPE prelu_element
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER prelu_other
#define PRELU_TYPE prelu_other
#include "ConvAddPrelu.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
#pragma mark - conv bn relu
kernel void conv_batch_norm_relu_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *new_scale [[buffer(2)]],
const device float4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
const device float4 *new_scale [[buffer(2)]],
const device float4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_batch_norm_relu_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
const device float4 *new_scale [[buffer(2)]],
const device float4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = fmax(output * new_scale[gid.z] + new_biase[gid.z], 0.0);
outTexture.write(output, gid.xy, gid.z);
}
#pragma mark - half
kernel void conv_batch_norm_relu_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *new_scale [[buffer(2)]],
const device half4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(float4(input), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(float4(input), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(float4(input), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(float4(input), float4(weight_w));
}
output = fmax(output * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void conv_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
const device half4 *new_scale [[buffer(2)]],
const device half4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(float4(input[j]), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(float4(input[j]), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(float4(input[j]), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(float4(input[j]), float4(weight_w));
}
}
output = fmax(output * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void depthwise_conv_batch_norm_relu_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half *weights [[buffer(1)]],
const device half4 *new_scale [[buffer(2)]],
const device half4 *new_biase [[buffer(3)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
half4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
output = fmax(output * float4(new_scale[gid.z]) + float4(new_biase[gid.z]), 0.0);
outTexture.write(half4(output), gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
// conv
#pragma mark -- conv
kernel void conv_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(input[j], weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(input[j], weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(input[j], weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(input[j], weight_w);
}
}
outTexture.write(output, gid.xy, gid.z);
}
kernel void depthwise_conv_3x3(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
float4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
float4 input = inputs[j];
output.x += input.x * weights[weithTo + 0 * kernelHXW + j];
output.y += input.y * weights[weithTo + 1 * kernelHXW + j];
output.z += input.z * weights[weithTo + 2 * kernelHXW + j];
output.w += input.w * weights[weithTo + 3 * kernelHXW + j];
}
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_1x1(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
float4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
float4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(input, weight_x);
float4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(input, weight_y);
float4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(input, weight_z);
float4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(input, weight_w);
}
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
const ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input[9];
for (uint i = 0; i < input_arr_size; ++i) {
input[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), i);
input[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), i);
input[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), i);
input[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), i);
input[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
input[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), i);
input[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), i);
input[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), i);
input[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), i);
for (int j = 0; j < 9; ++j) {
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.x += dot(float4(input[j]), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.y += dot(float4(input[j]), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.z += dot(float4(input[j]), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + j * input_arr_size + i];
output.w += dot(float4(input[j]), float4(weight_w));
}
}
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void depthwise_conv_3x3_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
uint output_slice = gid.z;
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 9;
uint weithTo = gid.z * kernelHXW * 4;
float4 output = float4(0.0);
half4 inputs[9];
inputs[0] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y - 1), output_slice);
inputs[1] = inTexture.sample(sample, float2(posInInput.x, posInInput.y - 1), output_slice);
inputs[2] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y - 1), output_slice);
inputs[3] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y), output_slice);
inputs[4] = inTexture.sample(sample, float2(posInInput.x, posInInput.y), output_slice);
inputs[5] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y), output_slice);
inputs[6] = inTexture.sample(sample, float2(posInInput.x - 1, posInInput.y + 1), output_slice);
inputs[7] = inTexture.sample(sample, float2(posInInput.x, posInInput.y + 1), output_slice);
inputs[8] = inTexture.sample(sample, float2(posInInput.x + 1, posInInput.y + 1), output_slice);
for (int j = 0; j < 9; ++j) {
half4 input = inputs[j];
output.x += float(input.x) * float(weights[weithTo + 0 * kernelHXW + j]);
output.y += float(input.y) * float(weights[weithTo + 1 * kernelHXW + j]);
output.z += float(input.z) * float(weights[weithTo + 2 * kernelHXW + j]);
output.w += float(input.w) * float(weights[weithTo + 3 * kernelHXW + j]);
}
outTexture.write(half4(output), gid.xy, gid.z);
}
kernel void conv_1x1_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
ushort2 stride = ushort2(param.strideX, param.strideY);
ushort2 posInInput = ushort2(gid.xy) * stride + ushort2(param.offsetX, param.offsetY);
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint kernelHXW = 1;
uint input_arr_size = inTexture.get_array_size();
uint weithTo = gid.z * kernelHXW * input_arr_size * 4;
float4 output = float4(0.0);
half4 input;
for (uint i = 0; i < input_arr_size; ++i) {
input = inTexture.sample(sample, float2(posInInput.x, posInInput.y), i);
half4 weight_x = weights[weithTo + 0 * kernelHXW * input_arr_size + i];
output.x += dot(float4(input), float4(weight_x));
half4 weight_y = weights[weithTo + 1 * kernelHXW * input_arr_size + i];
output.y += dot(float4(input), float4(weight_y));
half4 weight_z = weights[weithTo + 2 * kernelHXW * input_arr_size + i];
output.z += dot(float4(input), float4(weight_z));
half4 weight_w = weights[weithTo + 3 * kernelHXW * input_arr_size + i];
output.w += dot(float4(input), float4(weight_w));
}
outTexture.write(half4(output), gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct MetalConvTransposeParam{
ushort kernelW;
ushort kernelH;
ushort strideX;
ushort strideY;
ushort paddingX;
ushort paddingY;
ushort dilationX;
ushort dilationY;
};
kernel void conv_transpose2x2_stride2(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant MetalConvTransposeParam &param [[buffer(0)]],
const device float4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
int input_array_size = inTexture.get_array_size();
int kernel_index_x = gid.x % 2;
int kernel_index_y = gid.y % 2;
int kernel_index = kernel_index_y * 2 + kernel_index_x;
int kernel_to = gid.z * input_array_size * 4 * 4 + (kernel_index * input_array_size);
int input_x = gid.x / 2;
int input_y = gid.y / 2;
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 output = float4(0.0);
for (int i = 0; i < input_array_size; ++i) {
float4 input = inTexture.sample(sample, float2(input_x, input_y), i);
float4 kernel_slice0 = weights[kernel_to + input_array_size * 4 * 0 + i];
float4 kernel_slice1 = weights[kernel_to + input_array_size * 4 * 1 + i];
float4 kernel_slice2 = weights[kernel_to + input_array_size * 4 * 2 + i];
float4 kernel_slice3 = weights[kernel_to + input_array_size * 4 * 3 + i];
output.x += dot(input, kernel_slice0);
output.y += dot(input, kernel_slice1);
output.z += dot(input, kernel_slice2);
output.w += dot(input, kernel_slice3);
}
outTexture.write(output, gid.xy, gid.z);
}
kernel void conv_transpose2x2_stride2_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant MetalConvTransposeParam &param [[buffer(0)]],
const device half4 *weights [[buffer(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
int input_array_size = inTexture.get_array_size();
int kernel_index_x = gid.x % 2;
int kernel_index_y = gid.y % 2;
int kernel_index = kernel_index_y * 2 + kernel_index_x;
int kernel_to = gid.z * input_array_size * 4 * 4 + (kernel_index * input_array_size);
int input_x = gid.x / 2;
int input_y = gid.y / 2;
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 output = float4(0.0);
for (int i = 0; i < input_array_size; ++i) {
half4 input = inTexture.sample(sample, float2(input_x, input_y), i);
half4 kernel_slice0 = weights[kernel_to + input_array_size * 4 * 0 + i];
half4 kernel_slice1 = weights[kernel_to + input_array_size * 4 * 1 + i];
half4 kernel_slice2 = weights[kernel_to + input_array_size * 4 * 2 + i];
half4 kernel_slice3 = weights[kernel_to + input_array_size * 4 * 3 + i];
output.x += dot(float4(input), float4(kernel_slice0));
output.y += dot(float4(input), float4(kernel_slice1));
output.z += dot(float4(input), float4(kernel_slice2));
output.w += dot(float4(input), float4(kernel_slice3));
}
outTexture.write(half4(output), gid.xy, gid.z);
}
//kernel void conv_transpose(texture2d_array<float, access::sample> inTexture [[texture(0)]],
// texture2d_array<float, access::write> outTexture [[texture(1)]],
// constant MetalConvTransposeParam &param [[buffer(0)]],
// const device float4 *weights [[buffer(1)]],
// uint3 gid [[thread_position_in_grid]]){
// if (gid.x >= outTexture.get_width() ||
// gid.y >= outTexture.get_height() ||
// gid.z >= outTexture.get_array_size()) {
// return;
// }
//
// int input_array_size = inTexture.get_array_size();
//
// uint kernel_one_output_slice = input_array_size * param.kernelW * param.kernelH;
//
// uint kernel_stride_z = gid.z * 4 * (kernel_one_output_slice);
//
// constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
//
// float4 output;
//
// for (int w = 0; w < param.kernelW; ++w) {
// int top = gid.x - w * param.dilationX + param.paddingX;
// int input_x = top / param.strideX;
// if (top < 0 || input_x >= int(inTexture.get_width())) {
// continue;
// }
//
// for (int h = 0; h < param.kernelH; ++h) {
// int top_y = gid.y - h * param.dilationY + param.paddingY;
// int input_y = top_y / param.strideY;
// if (top_y < 0 || input_y >= int(inTexture.get_height())) {
// continue;
// }
//
// uint kernel_index = (w * param.kernelH + h) * inTexture.get_array_size();
//
// for (int slice = 0; slice < input_array_size; ++slice) {
//
// float4 input;
// float4 kernel_slice = weights[kernel_stride_z + 0 * kernel_one_output_slice + kernel_index + slice];
// float4 kernel_slice1 = weights[kernel_stride_z + 1 * kernel_one_output_slice + kernel_index + slice];
//
// float4 kernel_slice2 = weights[kernel_stride_z + 2 * kernel_one_output_slice + kernel_index + slice];
//
// float4 kernel_slice3 = weights[kernel_stride_z + 3 * kernel_one_output_slice + kernel_index + slice];
//
// input = inTexture.sample(sample, float2(input_x, input_y), slice);
// output.x += dot(input, kernel_slice);
// output.y += dot(input, kernel_slice1);
// output.z += dot(input, kernel_slice2);
// output.w += dot(input, kernel_slice3);
// }
// }
// }
//
// outTexture.write(output, gid.xy, gid.z);
//}
//
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct ElementwiseAddParam {
int32_t fast;
int32_t axis;
int32_t ylen;
int32_t xdim[4];
int32_t xtrans[4];
int32_t ydim[4];
int32_t ytrans[4];
};
kernel void elementwise_add(texture2d_array<float, access::read> inputX [[texture(0)]],
texture2d_array<float, access::read> inputY [[texture(1)]],
texture2d_array<float, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
float4 rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
} else {
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
}
float4 r = rx + ry;
outTexture.write(r, gid.xy, gid.z);
}
kernel void elementwise_add_half(texture2d_array<half, access::read> inputX [[texture(0)]],
texture2d_array<half, access::read> inputY [[texture(1)]],
texture2d_array<half, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
half4 rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
} else {
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
}
half4 r = rx + ry;
outTexture.write(r, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#include <metal_stdlib>
#include "Macro.metal"
using namespace metal;
kernel void FUNC3_(elementwise_add, PRELU_TYPE, P)(texture2d_array<P, access::read> inputX [[texture(0)]],
texture2d_array<P, access::read> inputY [[texture(1)]],
texture2d_array<P, access::write> outTexture [[texture(2)]],
constant ElementwiseAddParam &pm [[buffer(0)]],
#ifdef PRELU_CHANNEL
const device VECTOR(P, 4) *alpha [[buffer(1)]],
#endif
#ifdef PRELU_ELEMENT
const device VECTOR(P, 4) *alpha [[buffer(1)]],
#endif
#ifdef PRELU_OTHER
const device P *alpha [[buffer(1)]],
#endif
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
VECTOR(P, 4) rx, ry;
if (pm.fast == 1) {
rx = inputX.read(gid.xy, gid.z);
ry = inputY.read(gid.xy, gid.z);
} else {
rx = inputX.read(gid.xy, gid.z);
int32_t x_xyzn[4] = {int32_t(gid.x), int32_t(gid.y), int32_t(gid.z), 0}, x_abcd[4], t_abcd[4];
int32_t y_abcd[4] = {0, 0, 0, 0}, y_xyzn[4];
int32_t xtrans[4] = {pm.xtrans[0], pm.xtrans[1], pm.xtrans[2], pm.xtrans[3]};
int32_t ytrans[4] = {pm.ytrans[0], pm.ytrans[1], pm.ytrans[2], pm.ytrans[3]};
int32_t yshift = 4 - pm.ylen - pm.axis;
for (int n = 0; n < 4; n++) {
x_xyzn[3] = n;
xyzn2abcd(pm.xdim[3], x_xyzn, x_abcd);
invtrans(xtrans, x_abcd, t_abcd);
for (int k = pm.axis; k < (pm.axis + pm.ylen); k++) {
y_abcd[yshift+k] = t_abcd[k];
}
trans(ytrans, y_abcd, t_abcd);
abcd2xyzn(pm.ydim[3], t_abcd, y_xyzn);
ry[n] = inputY.read(uint2(y_xyzn[0], y_xyzn[1]), y_xyzn[2])[y_xyzn[3]];
}
}
VECTOR(P, 4) output = rx + ry;
#ifdef PRELU_CHANNEL
VECTOR(P, 4) alpha_value = alpha[gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_ELEMENT
int alpha_to = (gid.y * outTexture.get_width() + gid.x) * outTexture.get_array_size();
VECTOR(P, 4) alpha_value = alpha[alpha_to + gid.z];
output.x = output.x > 0 ? output.x : (alpha_value.x * output.x);
output.y = output.y > 0 ? output.y : (alpha_value.y * output.y);
output.z = output.z > 0 ? output.z : (alpha_value.z * output.z);
output.w = output.w > 0 ? output.w : (alpha_value.w * output.w);
#endif
#ifdef PRELU_OTHER
P alpha_value = alpha[0];
output.x = output.x > 0 ? output.x : (alpha_value * output.x);
output.y = output.y > 0 ? output.y : (alpha_value * output.y);
output.z = output.z > 0 ? output.z : (alpha_value * output.z);
output.w = output.w > 0 ? output.w : (alpha_value * output.w);
#endif
outTexture.write(output, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct ElementwiseAddParam {
int32_t fast;
int32_t axis;
int32_t ylen;
int32_t xdim[4];
int32_t xtrans[4];
int32_t ydim[4];
int32_t ytrans[4];
};
#define P float
#define PRELU_CHANNEL prelu_channel
#define PRELU_TYPE channel
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT element
#define PRELU_TYPE prelu_element
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER other
#define PRELU_TYPE prelu_other
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
#define P half
#define PRELU_CHANNEL channel
#define PRELU_TYPE channel
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_CHANNEL
#define PRELU_ELEMENT element
#define PRELU_TYPE prelu_element
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_ELEMENT
#define PRELU_OTHER other
#define PRELU_TYPE prelu_other
#include "ElementwiseAddPreluKernel.inc.metal"
#undef PRELU_TYPE
#undef PRELU_OTHER
#undef P
...@@ -12,34 +12,40 @@ ...@@ -12,34 +12,40 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <metal_stdlib> #ifdef P
using namespace metal;
kernel void fetch(texture2d_array<float, access::read> inTexture [[texture(0)]], #define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
device float *output [[buffer(0)]], #define CONCAT2_(a, b) a ## _ ## b
uint3 gid [[thread_position_in_grid]]) { #define CONCAT2(a, b) a ## b
#define FUNC(m, n, q) CONCAT3_(m, n, q)
#define FUNC_T(m, n) CONCAT2_(m, n)
#define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC_T(fetch, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() || if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() || gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) { gid.z >= inTexture.get_array_size()) {
return; return;
} }
int input_width = inTexture.get_width(); int input_width = inTexture.get_width();
int input_height = inTexture.get_height(); int input_height = inTexture.get_height();
const float4 input = inTexture.read(gid.xy, gid.z); const VECTOR(P, 4) input = inTexture.read(gid.xy, gid.z);
int output_to = 4 * input_width * input_height; int output_to = 4 * input_width * input_height;
output[gid.z * output_to + 0 * input_width * input_height + gid.y * input_width + gid.x] = input.x; output[gid.z * output_to + 0 * input_width * input_height + gid.y * input_width + gid.x] = input.x;
output[gid.z * output_to + 1 * input_width * input_height + gid.y * input_width + gid.x] = input.y; output[gid.z * output_to + 1 * input_width * input_height + gid.y * input_width + gid.x] = input.y;
output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z; output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z;
output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w; output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w;
} }
kernel void FUNC(fetch, 1or2, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
kernel void fetch_half(texture2d_array<half, access::read> inTexture [[texture(0)]], device float4 *output [[buffer(0)]],
device float * output [[buffer(0)]], uint3 gid [[thread_position_in_grid]]) {
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() || if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() || gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) { gid.z >= inTexture.get_array_size()) {
...@@ -47,25 +53,9 @@ kernel void fetch_half(texture2d_array<half, access::read> inTexture [[texture(0 ...@@ -47,25 +53,9 @@ kernel void fetch_half(texture2d_array<half, access::read> inTexture [[texture(0
} }
int input_width = inTexture.get_width(); int input_width = inTexture.get_width();
int input_height = inTexture.get_height(); const VECTOR(P, 4) input = inTexture.read(gid.xy, gid.z);
const half4 input = inTexture.read(gid.xy, gid.z); output[gid.y * input_width + gid.x] = float4(input);
int output_to = 4 * input_width * input_height;
output[gid.z * output_to + 0 * input_width * input_height + gid.y * input_width + gid.x] = input.x;
output[gid.z * output_to + 1 * input_width * input_height + gid.y * input_width + gid.x] = input.y;
output[gid.z * output_to + 2 * input_width * input_height + gid.y * input_width + gid.x] = input.z;
output[gid.z * output_to + 3 * input_width * input_height + gid.y * input_width + gid.x] = input.w;
}
kernel void fetch_placeholder(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
}
kernel void fetch_placeholder_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
} }
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
#define P float
#include "FetchKernel.inc.metal"
#undef P
#define P half
#include "FetchKernel.inc.metal"
#undef P
kernel void fetch_placeholder(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
}
kernel void fetch_placeholder_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
// 占位函数, 啥也没干
kernel void place_holder(texture2d<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
}
struct OutputDim {
ushort width;
ushort height;
ushort strideX;
ushort strideY;
};
kernel void resize(texture2d<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant OutputDim &params [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
const uint2 pos = gid.xy * uint2(params.strideX, params.strideY);
const half4 input = inTexture.read(pos);
outTexture.write(half4(input.x, input.y, input.z, input.w), gid.xy, gid.z);
}
kernel void texture2d_to_2d_array(texture2d<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height()){
return;
}
const float4 input = inTexture.read(gid.xy);
outTexture.write(input, gid.xy, 0);
}
kernel void texture2d_to_2d_array_half(texture2d<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height()){
return;
}
const half4 input = inTexture.read(gid.xy);
outTexture.write(input, gid.xy, 0);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d
#define CONCAT5_(a, b, c, d, e) a ## _ ## b ## _ ## c ## _ ## d ## _ ## e
#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p)
#define VECTOR(p, n) CONCAT2(p, n)
#define FUNC2_(a, b) CONCAT2_(a, b)
#define FUNC3_(a, b, c) CONCAT3_(a, b, c)
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void nms_fetch_result(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
const float4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = input.x;
}
kernel void nms_fetch_result_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
device float *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
const half4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = input.x;
}
kernel void nms_fetch_bbox(texture2d_array<float, access::read> inTexture [[texture(0)]],
device float4 *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
// int input_height = inTexture.get_height();
const float4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = input;
}
kernel void nms_fetch_bbox_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
device float4 *output [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= inTexture.get_width() ||
gid.y >= inTexture.get_height() ||
gid.z >= inTexture.get_array_size()) {
return;
}
int input_width = inTexture.get_width();
// int input_height = inTexture.get_height();
const half4 input = inTexture.read(gid.xy, gid.z);
output[gid.y * input_width + gid.x] = float4(input);
}
...@@ -12,22 +12,10 @@ ...@@ -12,22 +12,10 @@
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <metal_stdlib> #ifdef P
#include "Common.metal"
using namespace metal;
struct PoolParam { kernel void FUNC2_(pool, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
int ksizeX; texture2d_array<P, access::write> outTexture [[texture(1)]],
int ksizeY;
int strideX;
int strideY;
int paddingX;
int paddingY;
int poolType;
};
kernel void pool(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant PoolParam &pm [[buffer(0)]], constant PoolParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) { uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() || if (gid.x >= outTexture.get_width() ||
...@@ -40,7 +28,7 @@ kernel void pool(texture2d_array<float, access::read> inTexture [[texture(0)]], ...@@ -40,7 +28,7 @@ kernel void pool(texture2d_array<float, access::read> inTexture [[texture(0)]],
int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height())); int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
ymin = max(ymin, 0); ymin = max(ymin, 0);
float4 r = 0; VECTOR(P, 4) r = 0;
if (pm.poolType == 0) { if (pm.poolType == 0) {
r = inTexture.read(uint2(xmin, ymin), gid.z); r = inTexture.read(uint2(xmin, ymin), gid.z);
for (int x = xmin; x < xmax; x++) { for (int x = xmin; x < xmax; x++) {
...@@ -54,40 +42,9 @@ kernel void pool(texture2d_array<float, access::read> inTexture [[texture(0)]], ...@@ -54,40 +42,9 @@ kernel void pool(texture2d_array<float, access::read> inTexture [[texture(0)]],
r += inTexture.read(uint2(x, y), gid.z); r += inTexture.read(uint2(x, y), gid.z);
} }
} }
r /= pm.ksizeX * pm.ksizeY; r /= (xmax - xmin) * (ymax - ymin);
} }
outTexture.write(r, gid.xy, gid.z); outTexture.write(r, gid.xy, gid.z);
} }
kernel void pool_half(texture2d_array<half, access::read> inTexture [[texture(0)]], #endif
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant PoolParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
int xmin = gid.x * pm.strideX - pm.paddingX;
int xmax = min(xmin + pm.ksizeX, int(inTexture.get_width()));
xmin = max(xmin, 0);
int ymin = gid.y * pm.strideX - pm.paddingX;
int ymax = min(ymin + pm.ksizeX, int(inTexture.get_height()));
ymin = max(ymin, 0);
half4 r = 0;
if (pm.poolType == 0) {
r = inTexture.read(uint2(xmin, ymin), gid.z);
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r = fmax(r, inTexture.read(uint2(x, y), gid.z));
}
}
} else if (pm.poolType == 1) {
for (int x = xmin; x < xmax; x++) {
for (int y = ymin; y < ymax; y++) {
r += inTexture.read(uint2(x, y), gid.z);
}
}
r /= pm.ksizeX * pm.ksizeY;
}
outTexture.write(r, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Macro.metal"
using namespace metal;
struct PoolParam {
int ksizeX;
int ksizeY;
int strideX;
int strideY;
int paddingX;
int paddingY;
int poolType;
};
#define P half
#include "PoolKernel.inc.metal"
#undef P
#define P float
#include "PoolKernel.inc.metal"
#undef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void prelu_channel(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
float4 alpha_value = alpha[gid.z];
float4 output;
output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_element(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
int alpha_to = (gid.y * inTexture.get_width() + gid.x) * inTexture.get_array_size();
float4 alpha_value = alpha[alpha_to + gid.z];
float4 output;
output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_other(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
const device float *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
float alpha_value = alpha[0];
float4 output;
output.x = input.x > 0 ? input.x : (alpha_value * input.x);
output.y = input.y > 0 ? input.y : (alpha_value * input.y);
output.z = input.z > 0 ? input.z : (alpha_value * input.z);
output.w = input.w > 0 ? input.w : (alpha_value * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_channel_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
half4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
half4 alpha_value = alpha[gid.z];
half4 output;
output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_element_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half4 *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
half4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
int alpha_to = (gid.y * inTexture.get_width() + gid.x) * inTexture.get_array_size();
half4 alpha_value = alpha[alpha_to + gid.z];
half4 output;
output.x = input.x > 0 ? input.x : (alpha_value.x * input.x);
output.y = input.y > 0 ? input.y : (alpha_value.y * input.y);
output.z = input.z > 0 ? input.z : (alpha_value.z * input.z);
output.w = input.w > 0 ? input.w : (alpha_value.w * input.w);
outTexture.write(output, gid.xy, gid.z);
}
kernel void prelu_other_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
const device half *alpha [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]){
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) {
return;
}
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
half4 input = inTexture.sample(sample, float2(gid.x, gid.y), gid.z);
half alpha_value = alpha[0];
half4 output;
output.x = input.x > 0 ? input.x : (alpha_value * input.x);
output.y = input.y > 0 ? input.y : (alpha_value * input.y);
output.z = input.z > 0 ? input.z : (alpha_value * input.z);
output.w = input.w > 0 ? input.w : (alpha_value * input.w);
outTexture.write(output, gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct PriorBoxMetalParam {
float offset;
float stepWidth;
float stepHeight;
float minSize;
float maxSize;
float imageWidth;
float imageHeight;
bool clip;
uint numPriors;
uint aspecRatiosSize;
uint minSizeSize;
uint maxSizeSize;
};
kernel void prior_box(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outBoxTexture [[texture(1)]],
texture2d_array<float, access::write> varianceTexture [[texture(2)]],
const device float *aspect_ratios [[buffer(0)]],
constant PriorBoxMetalParam &param [[buffer(1)]],
const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() ||
gid.y >= outBoxTexture.get_height() ||
gid.z >= outBoxTexture.get_array_size()) return;
float center_x = (gid.x + param.offset) * param.stepWidth;
float center_y = (gid.y + param.offset) * param.stepHeight;
float box_width, box_height;
if (gid.z < param.aspecRatiosSize) {
float ar = aspect_ratios[gid.z];
box_width = param.minSize * sqrt(ar) / 2;
box_height = param.minSize / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(res, gid.xy, gid.z);
} else if (gid.z >= param.aspecRatiosSize) {
if (param.maxSizeSize > 0) {
box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / param.imageWidth;
max_box.y = (center_y - box_height) / param.imageHeight;
max_box.z = (center_x + box_width) / param.imageWidth;
max_box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outBoxTexture.write(max_box, gid.xy, gid.z);
}
}
float4 variance = variances[0];
if (gid.z < param.numPriors) {
float4 variances_output;
variances_output.x = variance.x;
variances_output.y = variance.y;
variances_output.z = variance.z;
variances_output.w = variance.w;
varianceTexture.write(variances_output, gid.xy, gid.z);
}
}
kernel void prior_box_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outBoxTexture [[texture(1)]],
texture2d_array<half, access::write> varianceTexture [[texture(2)]],
const device half *aspect_ratios [[buffer(0)]],
constant PriorBoxMetalParam &param [[buffer(1)]],
const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() ||
gid.y >= outBoxTexture.get_height() ||
gid.z >= outBoxTexture.get_array_size()) return;
float center_x = (gid.x + param.offset) * param.stepWidth;
float center_y = (gid.y + param.offset) * param.stepHeight;
float box_width, box_height;
if (gid.z < param.aspecRatiosSize) {
half ar = aspect_ratios[gid.z];
box_width = param.minSize * sqrt(ar) / 2;
box_height = param.minSize / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(half4(res), gid.xy, gid.z);
} else if (gid.z >= param.aspecRatiosSize) {
if (param.maxSizeSize > 0) {
box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / param.imageWidth;
max_box.y = (center_y - box_height) / param.imageHeight;
max_box.z = (center_x + box_width) / param.imageWidth;
max_box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outBoxTexture.write(half4(max_box), gid.xy, gid.z);
}
}
float4 variance = variances[0];
if (gid.z < param.numPriors) {
float4 variances_output;
variances_output.x = variance.x;
variances_output.y = variance.y;
variances_output.z = variance.z;
variances_output.w = variance.w;
varianceTexture.write(half4(variances_output), gid.xy, gid.z);
}
}
kernel void prior_box_MinMaxAspectRatiosOrder(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outBoxTexture [[texture(1)]],
texture2d_array<float, access::write> varianceTexture [[texture(2)]],
const device float *aspect_ratios [[buffer(0)]],
constant PriorBoxMetalParam &param [[buffer(1)]],
const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() ||
gid.y >= outBoxTexture.get_height() ||
gid.z >= outBoxTexture.get_array_size()) return;
float center_x = (gid.x + param.offset) * param.stepWidth;
float center_y = (gid.y + param.offset) * param.stepHeight;
float box_width, box_height;
if (gid.z == 0) {
box_width = box_height = param.minSize / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(res, gid.xy, gid.z);
}
if (gid.z == 1 && param.maxSizeSize > 0) {
box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / param.imageWidth;
max_box.y = (center_y - box_height) / param.imageHeight;
max_box.z = (center_x + box_width) / param.imageWidth;
max_box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outBoxTexture.write(res, gid.xy, gid.z);
}
int aspect_to = 0;
if (param.maxSizeSize > 0) {
aspect_to = gid.z - 2;
} else {
aspect_to = gid.z - 1;
}
if (aspect_to >= 0 && aspect_to < int(param.aspecRatiosSize)) {
int skip = 0;
for (int i = 0; i < aspect_to + 1; ++i) {
if (fabs(aspect_ratios[i] - 1.) < 1e-6) {
skip += 1;
}
}
aspect_to += skip;
float ar = aspect_ratios[aspect_to];
box_width = param.minSize * sqrt(ar) / 2;
box_height = param.minSize / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(res, gid.xy, gid.z);
}
float4 variance = variances[0];
if (gid.z < param.numPriors) {
float4 variances_output;
variances_output.x = variance.x;
variances_output.y = variance.y;
variances_output.z = variance.z;
variances_output.w = variance.w;
varianceTexture.write(variances_output, gid.xy, gid.z);
}
}
kernel void prior_box_MinMaxAspectRatiosOrder_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outBoxTexture [[texture(1)]],
texture2d_array<half, access::write> varianceTexture [[texture(2)]],
const device half *aspect_ratios [[buffer(0)]],
constant PriorBoxMetalParam &param [[buffer(1)]],
const device float4 *variances [[buffer(2)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outBoxTexture.get_width() ||
gid.y >= outBoxTexture.get_height() ||
gid.z >= outBoxTexture.get_array_size()) return;
float center_x = (gid.x + param.offset) * param.stepWidth;
float center_y = (gid.y + param.offset) * param.stepHeight;
float box_width, box_height;
if (gid.z == 0) {
box_width = box_height = param.minSize / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(half4(res), gid.xy, gid.z);
}
if (gid.z == 1 && param.maxSizeSize > 0) {
box_width = box_height = sqrt(param.minSize * param.maxSize) / 2;
float4 max_box;
max_box.x = (center_x - box_width) / param.imageWidth;
max_box.y = (center_y - box_height) / param.imageHeight;
max_box.z = (center_x + box_width) / param.imageWidth;
max_box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = min(max(max_box, 0.0), 1.0);
} else {
res = max_box;
}
outBoxTexture.write(half4(res), gid.xy, gid.z);
}
int aspect_to = 0;
if (param.maxSizeSize > 0) {
aspect_to = gid.z - 2;
} else {
aspect_to = gid.z - 1;
}
if (aspect_to > 0 && aspect_to < int(param.aspecRatiosSize) && fabs(aspect_ratios[aspect_to] - 1.) > 1e-6) {
float ar = aspect_ratios[aspect_to];
box_width = param.minSize * sqrt(ar) / 2;
box_height = param.minSize / sqrt(ar) / 2;
float4 box;
box.x = (center_x - box_width) / param.imageWidth;
box.y = (center_y - box_height) / param.imageHeight;
box.z = (center_x + box_width) / param.imageWidth;
box.w = (center_y + box_height) / param.imageHeight;
float4 res;
if (param.clip) {
res = fmin(fmax(box, 0.0), 1.0);
} else {
res = box;
}
outBoxTexture.write(half4(res), gid.xy, gid.z);
}
float4 variance = variances[0];
if (gid.z < param.numPriors) {
float4 variances_output;
variances_output.x = variance.x;
variances_output.y = variance.y;
variances_output.z = variance.z;
variances_output.w = variance.w;
varianceTexture.write(half4(variances_output), gid.xy, gid.z);
}
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void relu_half(texture2d_array<half, access::sample> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
const half4 input = inTexture.read(gid.xy, gid.z);
const float4 relu = fmax((float4)input, 0.0);
outTexture.write(half4(relu), gid.xy, gid.z);
}
kernel void relu(texture2d_array<float, access::sample> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
constexpr sampler s(coord::pixel, filter::nearest, address::clamp_to_zero);
const float4 input = inTexture.read(gid.xy, gid.z);
const float4 relu = fmax((float4)input, 0.0);
outTexture.write(float4(relu), gid.xy, gid.z);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d
#define FUNC(f, r1, r2, p) CONCAT4_(f, r1, r2, p)
#define VECTOR(p, n) CONCAT2(p, n)
#define FUNC_R(f, r) CONCAT2_(f, r)
kernel void FUNC(reshape, RIN, ROUT, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant ReshapeParam &rp [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
int oxyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0}, oabcd[4], ixyzn[4], iabcd[4];
ReshapeParam lrp = rp;
int oC = lrp.odim[lrp.otrans[3]];
int iC = lrp.idim[lrp.itrans[3]];
int count = lrp.odim[0] * lrp.odim[1] * lrp.odim[2] * lrp.odim[3];
VECTOR(P, 4) r;
for (int n = 0; n < 4; n++) {
oxyzn[3] = n;
#if ROUT == 4
xyzn2abcd_4(oC, oxyzn, oabcd);
#else
FUNC_R(xyzn2abcd, ROUT)(oxyzn, oabcd);
#endif
int tabcd[4];
invtrans(lrp.otrans, oabcd, tabcd);
int index = abcd2index(lrp.odim, tabcd);
if (index < count) {
index2abcd(lrp.idim, index, tabcd);
trans(lrp.itrans, tabcd, iabcd);
#if RIN == 4
abcd2xyzn_4(iC, iabcd, ixyzn);
#else
FUNC_R(abcd2xyzn, RIN)(iabcd, ixyzn);
#endif
r[n] = inTexture.read(uint2(ixyzn[0], ixyzn[1]), ixyzn[2])[ixyzn[3]];
} else {
r[n] = 0;
}
}
outTexture.write(r, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONRITIONS OF ANY KINR, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct ReshapeParam {
int32_t idim[4];
int32_t itrans[4];
int32_t odim[4];
int32_t otrans[4];
};
#define P float
#define RIN 4
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#define RIN 3
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#define RIN 2
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#define RIN 1
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#undef P
#define P half
#define RIN 4
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#define RIN 3
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#define RIN 2
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#define RIN 1
#define ROUT 4
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 3
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 2
#include "ReshapeKernel.inc.metal"
#undef ROUT
#define ROUT 1
#include "ReshapeKernel.inc.metal"
#undef ROUT
#undef RIN
#undef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct resize_bilinear_param {
// int32_t out_h;
// int32_t out_w;
float ratio_h;
float ratio_w;
};
kernel void resize_bilinear(texture2d_array<float, access::read> input [[texture(0)]],
texture2d_array<float, access::write> output [[texture(2)]],
constant resize_bilinear_param & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
float4 r;
if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
r = input.read(gid.xy, gid.z);
} else {
float w = gid.x * pm.ratio_w;
float h = gid.y * pm.ratio_h;
uint w0 = w, h0 = h;
uint w1 = w0 + 1, h1 = h0 + 1;
float w1lambda = w - w0, h1lambda = h - h0;
float w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda;
if (w1 >= input.get_width()) w1 = w0;
if (h1 >= input.get_height()) h1 = h0;
float4 r0 = input.read(uint2(w0, h0), gid.z);
float4 r1 = input.read(uint2(w1, h0), gid.z);
float4 r2 = input.read(uint2(w0, h1), gid.z);
float4 r3 = input.read(uint2(w1, h1), gid.z);
r = h2lambda * (w2lambda * r0 + w1lambda * r1) + h1lambda * (w2lambda * r2 + w1lambda * r3);
}
output.write(r, gid.xy, gid.z);
}
kernel void resize_bilinear_half(texture2d_array<half, access::read> input [[texture(0)]],
texture2d_array<half, access::write> output [[texture(2)]],
constant resize_bilinear_param & pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
half4 r;
if ((input.get_width() == output.get_width()) && (input.get_height() == output.get_height())) {
r = input.read(gid.xy, gid.z);
} else {
half w = gid.x * pm.ratio_w;
half h = gid.y * pm.ratio_h;
uint w0 = w, h0 = h;
uint w1 = w0 + 1, h1 = h0 + 1;
half w1lambda = w - w0, h1lambda = h - h0;
half w2lambda = 1.0 - w1lambda, h2lambda = 1.0 - h1lambda;
if (w1 >= input.get_width()) w1 = w0;
if (h1 >= input.get_height()) h1 = h0;
half4 r0 = input.read(uint2(w0, h0), gid.z);
half4 r1 = input.read(uint2(w1, h0), gid.z);
half4 r2 = input.read(uint2(w0, h1), gid.z);
half4 r3 = input.read(uint2(w1, h1), gid.z);
r = h2lambda * (w2lambda * r0 + w1lambda * r1) + h1lambda * (w2lambda * r2 + w1lambda * r3);
}
output.write(r, gid.xy, gid.z);
output.write(r, gid.xy, gid.z);
}
//
// Scale.metal
// paddle-mobile
//
// Created by liuRuiLong on 2019/1/4.
// Copyright © 2019 orange. All rights reserved.
//
#include <metal_stdlib>
using namespace metal;
kernel void scale(texture2d<float, access::sample> inTexture [[texture(0)]], texture2d<float, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) return;
float w_stride = inTexture.get_width() / outTexture.get_width();
float h_stride = inTexture.get_height() / outTexture.get_height();
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x * w_stride, gid.y * h_stride), 0);
outTexture.write(input, gid);
}
kernel void scale_half(texture2d<float, access::sample> inTexture [[texture(0)]], texture2d<half, access::write> outTexture [[texture(1)]], uint2 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height()) return;
float w_stride = inTexture.get_width() / outTexture.get_width();
float h_stride = inTexture.get_height() / outTexture.get_height();
constexpr sampler sample(coord::pixel, filter::nearest, address::clamp_to_zero);
float4 input = inTexture.sample(sample, float2(gid.x * w_stride, gid.y * h_stride), 0);
outTexture.write(half4(input), gid);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
kernel void shape() {
}
kernel void shape_half() {
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define FUNC(f, p) CONCAT2_(f, p)
#define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC(softmax, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant SoftmaxParam &sp [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
if (gid.x >= outTexture.get_width() ||
gid.y >= outTexture.get_height() ||
gid.z >= outTexture.get_array_size()) return;
// int zsize = inTexture.get_array_size();
P maxv = inTexture.read(uint2(0, gid.y), 0)[0];
int group = sp.K / 4;
int remain = sp.K % 4;
for (int x = 0; x < group; x++) {
VECTOR(P, 4) r = inTexture.read(uint2(x, gid.y), 0);
maxv = max(maxv, max(r[0], max(r[1], max(r[2], r[3]))));
}
if (remain > 0) {
VECTOR(P, 4) r = inTexture.read(uint2(group, gid.y), 0);
for (int i = 0; i < remain; i++) {
maxv = max(maxv, r[i]);
}
}
VECTOR(P, 4) rsum = {0, 0, 0, 0};
for (int x = 0; x < group; x++) {
VECTOR(P, 4) r = inTexture.read(uint2(x, gid.y), 0);
rsum += exp(r - maxv);
}
P sum = rsum[0] + rsum[1] + rsum[2] + rsum[3];
if (remain > 0) {
VECTOR(P, 4) r = inTexture.read(uint2(group, gid.y), 0);
for (int i = 0; i < remain; i++) {
sum += exp(r[i] - maxv);
}
}
VECTOR(P, 4) rr = inTexture.read(gid.xy, gid.z);
rr = exp(rr - maxv) / sum;
outTexture.write(rr, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
using namespace metal;
struct SoftmaxParam {
int N;
int K;
};
#define P float
#include "Softmax.inc.metal"
#undef P
#define P half
#include "Softmax.inc.metal"
#undef P
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
#define CONCAT4_(a, b, c, d) a ## _ ## b ## _ ## c ## _ ## d
#define CONCAT5_(a, b, c, d, e) a ## _ ## b ## _ ## c ## _ ## d ## _ ## e
#define FUNC(f, r, n, v, p) CONCAT5_(f, r, n, v, p)
#define VECTOR(p, n) CONCAT2(p, n)
#define FUNC_R(f, r) CONCAT2_(f, r)
#if V == VX
#define VV x
#elif V == VY
#define VV y
#elif V == VZ
#define VV z
#else
#define VV normal
#endif
#if V == VY
kernel void FUNC(split, R, N, VV, P)(texture2d_array<P, access::read> input [[texture(0)]],
texture2d_array<P, access::write> out1 [[texture(1)]],
texture2d_array<P, access::write> out2 [[texture(2)]],
#if N >= 3
texture2d_array<P, access::write> out3 [[texture(3)]],
#endif // N >= 3
#if N >= 4
texture2d_array<P, access::write> out4 [[texture(4)]],
#endif // N >= 4
constant SplitParam &sp [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) r = input.read(gid.xy, gid.z);
int y = gid.y - sp.offset;
if (y < sp.vdim[0]) {
out1.write(r, gid.xy, gid.z);
return;
}
y -= sp.vdim[0];
if (y < sp.vdim[1]) {
out2.write(r, uint2(gid.x, y), gid.z);
return;
}
#if N >= 3
y -= sp.vdim[1];
if (y < sp.vdim[2]) {
out3.write(r, uint2(gid.x, y), gid.z);
return;
}
#endif // N >= 3
#if N >= 4
y -= sp.vdim[2];
if (y < sp.vdim[3]) {
out4.write(r, uint2(gid.x, y), gid.z);
return;
}
#endif // N >= 4
}
#endif // V == VY
#if V == VX
kernel void FUNC(split, R, N, VV, P)(texture2d_array<P, access::read> input [[texture(0)]],
texture2d_array<P, access::write> out1 [[texture(1)]],
texture2d_array<P, access::write> out2 [[texture(2)]],
#if N >= 3
texture2d_array<P, access::write> out3 [[texture(3)]],
#endif // N >= 3
#if N >= 4
texture2d_array<P, access::write> out4 [[texture(4)]],
#endif // N >= 4
constant SplitParam &sp [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) r = input.read(gid.xy, gid.z);
int x = gid.x;
if (x < sp.vdim[0]) {
out1.write(r, gid.xy, gid.z);
return;
}
x -= sp.vdim[0];
if (x < sp.vdim[1]) {
out2.write(r, uint2(x, gid.y), gid.z);
return;
}
#if N >= 3
x -= sp.vdim[1];
if (x < sp.vdim[2]) {
out3.write(r, uint2(x, gid.y), gid.z);
return;
}
#endif // N >= 3
#if N >= 4
x -= sp.vdim[2];
if (x < sp.vdim[3]) {
out4.write(r, uint2(x, gid.y), gid.z);
return;
}
#endif // N >= 4
}
#endif // V == VX
#undef VV
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct SplitParam {
int32_t idim[4];
int32_t axis;
int32_t offset;
int32_t trans[4];
int32_t vdim[4];
};
#define VNORMAL 1
#define VX 2
#define VY 3
#define VZ 4
// only support split_{2, 3, 4}_{2, 3, 4}_y_{float, half}
// only support split_{3, 4}_{2, 3, 4}_x_{float, half}
//// ssd-ar: (R=3, N=2, V=y)
#define V VY
#define R 3
#define N 2
#define P float
#include "Split.inc.metal"
#undef P
#define P half
#include "Split.inc.metal"
#undef P
#undef N
#undef R
#undef V
//// ssd-ar: (R=2, N=2, V=y)
#define V VY
#define R 2
#define N 2
#define P float
#include "Split.inc.metal"
#undef P
#define P half
#include "Split.inc.metal"
#undef P
#undef N
#undef R
#undef V
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef P
#define CONCAT2(a, b) a ## b
#define CONCAT2_(a, b) a ## _ ## b
#define CONCAT3_(a, b, c) a ## _ ## b ## _ ## c
#define FUNC(f, r, p) CONCAT3_(f, r, p)
#define VECTOR(p, n) CONCAT2(p, n)
kernel void FUNC(transpose, R, P)(texture2d_array<P, access::read> inTexture [[texture(0)]],
texture2d_array<P, access::write> outTexture [[texture(1)]],
constant TransposeParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
VECTOR(P, 4) r;
int oxyzn[4] = {int(gid.x), int(gid.y), int(gid.z), 0};
int iabcd[4], oabcd[4], ixyzn[4];
for (int n = 0; n < 4; n++) {
oxyzn[3] = n;
#if R == 4
xyzn2abcd_4(pm.oC, oxyzn, iabcd);
#endif // R == 4
#if R == 3
xyzn2abcd_3(oxyzn, oabcd);
#endif // R == 3
#if R == 2
xyzn2abcd_2(oxyzn, oabcd);
#endif // R == 2
iabcd[pm.axis[0]] = oabcd[0];
iabcd[pm.axis[1]] = oabcd[1];
iabcd[pm.axis[2]] = oabcd[2];
iabcd[pm.axis[3]] = oabcd[3];
#if R == 4
abcd2xyzn_4(pm.iC, iabcd, ixyzn);
#endif // R == 4
#if R == 3
abcd2xyzn_3(iabcd, ixyzn);
#endif // R == 3
#if R == 2
abcd2xyzn_2(iabcd, ixyzn);
#endif // R == 2
r[n] = inTexture.read(uint2(ixyzn[0], ixyzn[1]), ixyzn[2])[ixyzn[3]];
}
outTexture.write(r, gid.xy, gid.z);
}
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <metal_stdlib>
#include "Common.metal"
using namespace metal;
struct TransposeParam {
int iC;
int oC;
int axis[4];
};
kernel void transpose_copy_float(texture2d_array<float, access::read> inTexture [[texture(0)]],
texture2d_array<float, access::write> outTexture [[texture(1)]],
constant TransposeParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
outTexture.write(inTexture.read(gid.xy, gid.z), gid.xy, gid.z);
}
kernel void transpose_copy_half(texture2d_array<half, access::read> inTexture [[texture(0)]],
texture2d_array<half, access::write> outTexture [[texture(1)]],
constant TransposeParam &pm [[buffer(0)]],
uint3 gid [[thread_position_in_grid]]) {
outTexture.write(inTexture.read(gid.xy, gid.z), gid.xy, gid.z);
}
#define R 4
#define P float
#include "TransposeKernel.inc.metal"
#undef P
#define P half
#include "TransposeKernel.inc.metal"
#undef P
#undef R
#define R 3
#define P float
#include "TransposeKernel.inc.metal"
#undef P
#define P half
#include "TransposeKernel.inc.metal"
#undef P
#undef R
#define R 2
#define P float
#include "TransposeKernel.inc.metal"
#undef P
#define P half
#include "TransposeKernel.inc.metal"
#undef P
#undef R
...@@ -22,24 +22,27 @@ class MulticlassNMSParam<P: PrecisionType>: OpParam { ...@@ -22,24 +22,27 @@ class MulticlassNMSParam<P: PrecisionType>: OpParam {
bboxes = try MulticlassNMSParam.getFirstTensor(key: "BBoxes", map: opDesc.inputs, from: inScope) bboxes = try MulticlassNMSParam.getFirstTensor(key: "BBoxes", map: opDesc.inputs, from: inScope)
output = try MulticlassNMSParam.outputOut(outputs: opDesc.outputs, from: inScope) output = try MulticlassNMSParam.outputOut(outputs: opDesc.outputs, from: inScope)
middleOutput = FetchHolder.init(inCapacity: scores.tensorDim.numel(), inDim: scores.tensorDim.dims) middleOutput = FetchHolder.init(inPaddedCapacity: scores.tensorDim.numel(), inDim: scores.tensorDim)
bboxOutput = FetchHolder.init(inCapacity: bboxes.tensorDim.numel(), inDim: bboxes.tensorDim.dims) bboxOutput = FetchHolder.init(inPaddedCapacity: bboxes.tensorDim.numel(), inDim: bboxes.tensorDim)
} catch let error { } catch let error {
throw error throw error
} }
} }
var bboxOutput: FetchHolder var bboxOutput: FetchHolder
var middleOutput: FetchHolder var middleOutput: FetchHolder
let scores: Texture<P> let scores: Texture
let bboxes: Texture<P> let bboxes: Texture
var output: Texture<P> var output: Texture
} }
class MulticlassNMSOp<P: PrecisionType>: Operator<MulticlassNMSKernel<P>, MulticlassNMSParam<P>>, Runable, Creator, InferShaperable{ class MulticlassNMSOp<P: PrecisionType>: Operator<MulticlassNMSKernel<P>, MulticlassNMSParam<P>>, Runable, Creator, InferShaperable{
func inputVariant() -> [String : [Variant]] { func inputVariant() -> [String : [MTLBuffer]] {
return ["Scores" : [para.middleOutput], "BBoxes" : [para.bboxOutput]] guard let scoreBuffer = para.middleOutput.resultBuffer, let bboxBuffer = para.middleOutput.resultBuffer else {
fatalError()
}
return ["Scores" : [scoreBuffer], "BBoxes" : [bboxBuffer]]
} }
func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer) { func computeMiddleResult(device: MTLDevice, buffer: MTLCommandBuffer) {
......
...@@ -32,8 +32,8 @@ class PoolParam<P: PrecisionType>: OpParam { ...@@ -32,8 +32,8 @@ class PoolParam<P: PrecisionType>: OpParam {
} }
// let buffer = input.metalTexture.buffer.contents().assumingMemoryBound(to: P.self) // let buffer = input.metalTexture.buffer.contents().assumingMemoryBound(to: P.self)
} }
let input: Texture<P> let input: Texture
var output: Texture<P> var output: Texture
var ksize: [Int32] var ksize: [Int32]
var stride: [Int32] var stride: [Int32]
var padding: [Int32] var padding: [Int32]
......
...@@ -28,8 +28,8 @@ class PreluParam<P: PrecisionType>: OpParam { ...@@ -28,8 +28,8 @@ class PreluParam<P: PrecisionType>: OpParam {
} }
let mode: String let mode: String
let alpha: Tensor<P> let alpha: Tensor<P>
let input: Texture<P> let input: Texture
var output: Texture<P> var output: Texture
} }
class PreluOp<P: PrecisionType>: Operator<PreluKernel<P>, PreluParam<P>>, Runable, Creator, InferShaperable{ class PreluOp<P: PrecisionType>: Operator<PreluKernel<P>, PreluParam<P>>, Runable, Creator, InferShaperable{
......
...@@ -53,10 +53,10 @@ class PriorBoxParam<P: PrecisionType>: OpParam { ...@@ -53,10 +53,10 @@ class PriorBoxParam<P: PrecisionType>: OpParam {
var stepH: Float32 var stepH: Float32
let offset: Float32 let offset: Float32
let input: Texture<P> let input: Texture
let inputImage: Texture<P> let inputImage: Texture
var output: Texture<P> var output: Texture
let outputVariances: Texture<P> let outputVariances: Texture
} }
class PriorBoxOp<P: PrecisionType>: Operator<PriorBoxKernel<P>, PriorBoxParam<P>>, Runable, Creator, InferShaperable{ class PriorBoxOp<P: PrecisionType>: Operator<PriorBoxKernel<P>, PriorBoxParam<P>>, Runable, Creator, InferShaperable{
......
...@@ -25,8 +25,8 @@ class ReluParam<P: PrecisionType>: OpParam { ...@@ -25,8 +25,8 @@ class ReluParam<P: PrecisionType>: OpParam {
throw error throw error
} }
} }
let input: Texture<P> let input: Texture
var output: Texture<P> var output: Texture
} }
class ReluOp<P: PrecisionType>: Operator<ReluKernel<P>, ReluParam<P>>, Runable, Creator, InferShaperable{ class ReluOp<P: PrecisionType>: Operator<ReluKernel<P>, ReluParam<P>>, Runable, Creator, InferShaperable{
...@@ -47,10 +47,11 @@ class ReluOp<P: PrecisionType>: Operator<ReluKernel<P>, ReluParam<P>>, Runable, ...@@ -47,10 +47,11 @@ class ReluOp<P: PrecisionType>: Operator<ReluKernel<P>, ReluParam<P>>, Runable,
func delogOutput() { func delogOutput() {
print(" \(type) output: ") print(" \(type) output: ")
print(para.output.metalTexture)
print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray()) print(para.output.metalTexture.toTensor(dim: (n: para.output.tensorDim[0], c: para.output.tensorDim[1], h: para.output.tensorDim[2], w: para.output.tensorDim[3])).strideArray())
let device = para.output.metalTexture!.device // let device = para.output.metalTexture!.device
let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose) // let outputArray: [Float32] = device.texture2tensor(texture: para.output.metalTexture, dim: para.output.tensorDim.dims, transpose: para.output.transpose)
print(outputArray.strideArray()) // print(outputArray.strideArray())
} }
} }
......
...@@ -48,9 +48,9 @@ class ReshapeParam<P: PrecisionType>: OpParam { ...@@ -48,9 +48,9 @@ class ReshapeParam<P: PrecisionType>: OpParam {
throw error throw error
} }
} }
let input: Texture<P> let input: Texture
let shape: [Int32] let shape: [Int32]
var output: Texture<P> var output: Texture
} }
class ReshapeOp<P: PrecisionType>: Operator<ReshapeKernel<P>, ReshapeParam<P>>, Runable, Creator, InferShaperable{ class ReshapeOp<P: PrecisionType>: Operator<ReshapeKernel<P>, ReshapeParam<P>>, Runable, Creator, InferShaperable{
......
...@@ -29,8 +29,8 @@ class ResizeBilinearParam<P: PrecisionType>: OpParam { ...@@ -29,8 +29,8 @@ class ResizeBilinearParam<P: PrecisionType>: OpParam {
throw error throw error
} }
} }
let input: Texture<P> let input: Texture
var output: Texture<P> var output: Texture
let out_h: Int32 let out_h: Int32
let out_w: Int32 let out_w: Int32
} }
......
...@@ -24,8 +24,8 @@ class ShapeParam<P: PrecisionType>: OpParam { ...@@ -24,8 +24,8 @@ class ShapeParam<P: PrecisionType>: OpParam {
throw error throw error
} }
} }
var output: Texture<P> var output: Texture
let input: Texture<P> let input: Texture
} }
class ShapeOp<P: PrecisionType>: Operator<ShapeKernel<P>, ShapeParam<P>>, Runable, Creator, InferShaperable{ class ShapeOp<P: PrecisionType>: Operator<ShapeKernel<P>, ShapeParam<P>>, Runable, Creator, InferShaperable{
......
...@@ -32,8 +32,8 @@ class SoftmaxParam<P: PrecisionType>: OpParam { ...@@ -32,8 +32,8 @@ class SoftmaxParam<P: PrecisionType>: OpParam {
throw error throw error
} }
} }
let input: Texture<P> let input: Texture
var output: Texture<P> var output: Texture
} }
class SoftmaxOp<P: PrecisionType>: Operator<SoftmaxKernel<P>, SoftmaxParam<P>>, Runable, Creator, InferShaperable{ class SoftmaxOp<P: PrecisionType>: Operator<SoftmaxKernel<P>, SoftmaxParam<P>>, Runable, Creator, InferShaperable{
......
...@@ -19,7 +19,7 @@ class SplitParam<P: PrecisionType>: OpParam { ...@@ -19,7 +19,7 @@ class SplitParam<P: PrecisionType>: OpParam {
required init(opDesc: OpDesc, inScope: Scope) throws { required init(opDesc: OpDesc, inScope: Scope) throws {
do { do {
input = try SplitParam.inputX(inputs: opDesc.inputs, from: inScope) input = try SplitParam.inputX(inputs: opDesc.inputs, from: inScope)
output = Texture<P>.init(device: input.metalTexture!.device, inDim: input.dim) output = Texture.init(device: input.metalTexture!.device, inDim: input.dim)
axis = try SplitParam.getAttr(key: "axis", attrs: opDesc.attrs) axis = try SplitParam.getAttr(key: "axis", attrs: opDesc.attrs)
sections = try SplitParam.getAttr(key: "sections", attrs: opDesc.attrs) sections = try SplitParam.getAttr(key: "sections", attrs: opDesc.attrs)
if axis < 0 { if axis < 0 {
...@@ -29,7 +29,7 @@ class SplitParam<P: PrecisionType>: OpParam { ...@@ -29,7 +29,7 @@ class SplitParam<P: PrecisionType>: OpParam {
fatalError() fatalError()
} }
for out in outlist { for out in outlist {
guard let variant = inScope[out], let v = variant as? Texture<P> else { guard let variant = inScope[out], let v = variant as? Texture else {
fatalError() fatalError()
} }
outputList.append(v) outputList.append(v)
...@@ -41,9 +41,9 @@ class SplitParam<P: PrecisionType>: OpParam { ...@@ -41,9 +41,9 @@ class SplitParam<P: PrecisionType>: OpParam {
} }
var axis: Int var axis: Int
let input: Texture<P> let input: Texture
var output: Texture<P> var output: Texture
var outputList: [Texture<P>] = [] var outputList: [Texture] = []
var sections: [Int32] = [] var sections: [Int32] = []
} }
......
...@@ -26,8 +26,8 @@ class TransposeParam<P: PrecisionType>: OpParam { ...@@ -26,8 +26,8 @@ class TransposeParam<P: PrecisionType>: OpParam {
throw error throw error
} }
} }
let input: Texture<P> let input: Texture
var output: Texture<P> var output: Texture
let axis: [Int32] let axis: [Int32]
} }
......
...@@ -14,10 +14,10 @@ ...@@ -14,10 +14,10 @@
import Foundation import Foundation
class BlockDesc { public class BlockDesc {
let index: Int let index: Int
let parentIndex: Int let parentIndex: Int
let vars: [VarDesc] public let vars: [VarDesc]
let ops: [OpDesc] let ops: [OpDesc]
init(block: PaddleMobile_Framework_Proto_BlockDesc) { init(block: PaddleMobile_Framework_Proto_BlockDesc) {
index = Int(block.idx) index = Int(block.idx)
...@@ -45,7 +45,7 @@ class BlockDesc { ...@@ -45,7 +45,7 @@ class BlockDesc {
} }
extension BlockDesc: CustomStringConvertible, CustomDebugStringConvertible { extension BlockDesc: CustomStringConvertible, CustomDebugStringConvertible {
var description: String { public var description: String {
var str = "" var str = ""
for i in 0..<ops.count { for i in 0..<ops.count {
...@@ -61,9 +61,7 @@ extension BlockDesc: CustomStringConvertible, CustomDebugStringConvertible { ...@@ -61,9 +61,7 @@ extension BlockDesc: CustomStringConvertible, CustomDebugStringConvertible {
return str return str
} }
var debugDescription: String { public var debugDescription: String {
return description return description
} }
} }
...@@ -14,10 +14,10 @@ ...@@ -14,10 +14,10 @@
import Foundation import Foundation
public class Program { @objc public class Program: NSObject {
let paramPath: String public let paramPath: String
let programDesc: ProgramDesc public let programDesc: ProgramDesc
let scope: Scope public let scope: Scope
init(inProgramDesc: ProgramDesc, inParamPath: String, inScope: Scope) { init(inProgramDesc: ProgramDesc, inParamPath: String, inScope: Scope) {
programDesc = inProgramDesc programDesc = inProgramDesc
paramPath = inParamPath paramPath = inParamPath
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
import Foundation import Foundation
public class ProgramDesc { public class ProgramDesc {
var blocks: [BlockDesc] = [] public var blocks: [BlockDesc] = []
init(protoProgram: PaddleMobile_Framework_Proto_ProgramDesc) { init(protoProgram: PaddleMobile_Framework_Proto_ProgramDesc) {
for block in protoProgram.blocks { for block in protoProgram.blocks {
self.blocks.append(BlockDesc.init(block: block)) self.blocks.append(BlockDesc.init(block: block))
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import Foundation import Foundation
class Scope { public class Scope {
let feedKey: String let feedKey: String
let fetchKey: String let fetchKey: String
func setInput(input: Variant) { func setInput(input: Variant) {
...@@ -29,7 +29,7 @@ class Scope { ...@@ -29,7 +29,7 @@ class Scope {
return vars[feedKey]; return vars[feedKey];
} }
func output() -> Variant? { public func output() -> Variant? {
return vars[fetchKey]; return vars[fetchKey];
} }
...@@ -38,7 +38,7 @@ class Scope { ...@@ -38,7 +38,7 @@ class Scope {
fetchKey = inFetchKey fetchKey = inFetchKey
} }
var vars: [String : Variant] = [:] public var vars: [String : Variant] = [:]
subscript(key: String) -> Variant?{ subscript(key: String) -> Variant?{
get { get {
return vars[key] return vars[key]
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import Foundation import Foundation
enum VarTypeType: Int { public enum VarTypeType: Int {
case ErrorType = -1, case ErrorType = -1,
Bool = 0, Bool = 0,
Int16 = 1, Int16 = 1,
...@@ -56,10 +56,10 @@ enum VarTypeType: Int { ...@@ -56,10 +56,10 @@ enum VarTypeType: Int {
} }
} }
class VarDesc { public class VarDesc {
let name: String public let name: String
let persistable: Bool public let persistable: Bool
let type: VarTypeType public let type: VarTypeType
let tensorDesc: TensorDesc? let tensorDesc: TensorDesc?
init(protoVarDesc: PaddleMobile_Framework_Proto_VarDesc) { init(protoVarDesc: PaddleMobile_Framework_Proto_VarDesc) {
type = VarTypeType.init(rawValue: protoVarDesc.type.type.rawValue) ?? .ErrorType type = VarTypeType.init(rawValue: protoVarDesc.type.type.rawValue) ?? .ErrorType
...@@ -79,7 +79,7 @@ class VarDesc { ...@@ -79,7 +79,7 @@ class VarDesc {
} }
extension VarDesc: CustomStringConvertible, CustomDebugStringConvertible { extension VarDesc: CustomStringConvertible, CustomDebugStringConvertible {
var description: String { public var description: String {
var str = "" var str = ""
str += "var name \(name): \n" str += "var name \(name): \n"
if let inTensorDesc = tensorDesc { if let inTensorDesc = tensorDesc {
...@@ -93,7 +93,7 @@ extension VarDesc: CustomStringConvertible, CustomDebugStringConvertible { ...@@ -93,7 +93,7 @@ extension VarDesc: CustomStringConvertible, CustomDebugStringConvertible {
return str return str
} }
var debugDescription: String { public var debugDescription: String {
return description return description
} }
} }
//
// YoloNet.swift
// paddle-mobile
//
// Created by Xiao,Haichun on 2018/12/5.
// Copyright © 2018 orange. All rights reserved.
//
import Foundation
import Metal
public class YoloNet: Net {
@objc public override init(device: MTLDevice) {
super.init(device: device)
means = [0, 0, 0]
scale = 1
except = 0
modelPath = Bundle.main.path(forResource: "yolo_model", ofType: nil) ?! "model null"
paramPath = Bundle.main.path(forResource: "yolo_params", ofType: nil) ?! "para null"
modelDir = ""
//preprocessKernel = GenetPreProccess.init(device: device)
dim = (n: 1, h: 224, w: 224, c: 3)
}
@objc override public init(device: MTLDevice,paramPointer: UnsafeMutableRawPointer, paramSize:Int, modePointer: UnsafeMutableRawPointer, modelSize: Int) {
super.init(device:device,paramPointer:paramPointer,paramSize:paramSize,modePointer:modePointer,modelSize:modelSize)
means = [0, 0, 0]
scale = 1
except = 0
modelPath = ""
paramPath = ""
modelDir = ""
//preprocessKernel = GenetPreProccess.init(device: device)
dim = (n: 1, h: 224, w: 224, c: 3)
}
// class GenetPreProccess: CusomKernel {
// init(device: MTLDevice) {
// let s = CusomKernel.Shape.init(inWidth: 128, inHeight: 128, inChannel: 3)
// super.init(device: device, inFunctionName: "genet_preprocess", outputDim: s, usePaddleMobileLib: false)
// }
// }
override public func resultStr(res: ResultHolder) -> String {
// fatalError()
return " \(res.result![0]) ... "
}
}
...@@ -14,9 +14,6 @@ ...@@ -14,9 +14,6 @@
#pragma once #pragma once
#import "PaddleMobileCPU.h"
#import "CPUCompute.h"
#import "PaddleMobileGPU.h"
#import <UIKit/UIKit.h> #import <UIKit/UIKit.h>
//! Project version number for paddle_mobile. //! Project version number for paddle_mobile.
......
...@@ -66,6 +66,7 @@ const char *G_OP_TYPE_CONV_TRANSPOSE = "conv2d_transpose"; ...@@ -66,6 +66,7 @@ const char *G_OP_TYPE_CONV_TRANSPOSE = "conv2d_transpose";
const char *G_OP_TYPE_PRELU = "prelu"; const char *G_OP_TYPE_PRELU = "prelu";
const char *G_OP_TYPE_LOOKUP_TABLE = "lookup_table"; const char *G_OP_TYPE_LOOKUP_TABLE = "lookup_table";
const char *G_OP_TYPE_GRU = "gru"; const char *G_OP_TYPE_GRU = "gru";
const char *G_OP_TYPE_GRU_UNIT = "gru_unit";
const char *G_OP_TYPE_CRF = "crf_decoding"; const char *G_OP_TYPE_CRF = "crf_decoding";
const char *G_OP_TYPE_BILINEAR_INTERP = "bilinear_interp"; const char *G_OP_TYPE_BILINEAR_INTERP = "bilinear_interp";
const char *G_OP_TYPE_FLATTEN = "flatten"; const char *G_OP_TYPE_FLATTEN = "flatten";
...@@ -149,6 +150,9 @@ std::unordered_map< ...@@ -149,6 +150,9 @@ std::unordered_map<
{G_OP_TYPE_GRU, {G_OP_TYPE_GRU,
{{"Input", "H0", "Weight", "Bias"}, {{"Input", "H0", "Weight", "Bias"},
{"BatchGate", "BatchResetHiddenPrev", "BatchHidden", "Hidden"}}}, {"BatchGate", "BatchResetHiddenPrev", "BatchHidden", "Hidden"}}},
{G_OP_TYPE_GRU_UNIT,
{{"Input", "HiddenPrev", "Weight", "Bias"},
{"Gate", "ResetHiddenPrev", "Hidden"}}},
{G_OP_TYPE_CRF, {{"Emission", "Transition", "Label"}, {"ViterbiPath"}}}, {G_OP_TYPE_CRF, {{"Emission", "Transition", "Label"}, {"ViterbiPath"}}},
{G_OP_TYPE_BILINEAR_INTERP, {{"OutSize", "X"}, {"Out"}}}, {G_OP_TYPE_BILINEAR_INTERP, {{"OutSize", "X"}, {"Out"}}},
{G_OP_TYPE_FLATTEN, {{"X"}, {"Out"}}}, {G_OP_TYPE_FLATTEN, {{"X"}, {"Out"}}},
......
...@@ -110,6 +110,10 @@ enum PoolingType { ...@@ -110,6 +110,10 @@ enum PoolingType {
FIRST = 3, FIRST = 3,
}; };
struct PaddleMobileConfigInternal {
bool load_when_predict = false;
};
extern const char *G_OP_TYPE_CONV; extern const char *G_OP_TYPE_CONV;
extern const char *G_OP_TYPE_BATCHNORM; extern const char *G_OP_TYPE_BATCHNORM;
extern const char *G_OP_TYPE_BOX_CODER; extern const char *G_OP_TYPE_BOX_CODER;
......
...@@ -38,12 +38,15 @@ namespace framework { ...@@ -38,12 +38,15 @@ namespace framework {
#pragma mark - executor #pragma mark - executor
template <typename Device, typename T> template <typename Device, typename T>
Executor<Device, T>::Executor(const Program<Device> &program, int batch_size, Executor<Device, T>::Executor(const Program<Device> &program,
const bool use_optimize, const bool lod_mode) paddle_mobile::PaddleMobileConfigInternal config,
int batch_size, const bool use_optimize,
const bool lod_mode)
: program_(program), : program_(program),
batch_size_(batch_size), batch_size_(batch_size),
use_optimize_(use_optimize), use_optimize_(use_optimize),
lod_mode_(lod_mode) { lod_mode_(lod_mode),
config_(config) {
DLOG << "executor in lod mode: " << lod_mode_; DLOG << "executor in lod mode: " << lod_mode_;
Variable *variable_ptr = program_.scope->Var("batch_size"); Variable *variable_ptr = program_.scope->Var("batch_size");
...@@ -212,10 +215,17 @@ void Executor<Device, T>::InitCombineMemory() { ...@@ -212,10 +215,17 @@ void Executor<Device, T>::InitCombineMemory() {
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") { if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue; continue;
} }
DLOG << " init combine memory persistable: " << var_desc->Name();
LoadMemory(reinterpret_cast<void **>(&data), var_desc, tensor); LoadMemory(reinterpret_cast<void **>(&data), var_desc, tensor);
} else { } else {
if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) { if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
DLOG << " init combine memory no persistable in lod: "
<< var_desc->Name();
varInputMemory(var_desc, var, tensor); varInputMemory(var_desc, var, tensor);
} else {
DLOG << " init combine memory no persistable: " << var_desc->Name();
} }
} }
} }
...@@ -226,6 +236,34 @@ void Executor<Device, T>::InitCombineMemory() { ...@@ -226,6 +236,34 @@ void Executor<Device, T>::InitCombineMemory() {
LOG(kLOG_INFO) << "init combine memory finish"; LOG(kLOG_INFO) << "init combine memory finish";
} }
template <typename Device, typename T>
void Executor<Device, T>::InitNoPersistableMemory(const Tensor &input_tensor) {
for (const auto &block : program_desc_->Blocks()) {
for (const auto &var_desc : block->Vars()) {
auto var = program_.scope->Var(var_desc->Name());
auto tensor = var->template GetMutable<LoDTensor>();
if (var_desc->Persistable()) {
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue;
}
} else {
if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
DDim tensor_dim = tensor->dims();
DDim new_dim =
make_ddim({tensor_dim[0], tensor_dim[1], input_tensor.dims()[2],
input_tensor.dims()[3]});
tensor->Resize(new_dim);
tensor->template mutable_data<T>();
}
}
}
}
std::shared_ptr<LoDTensor> output = GetOutput("fetch");
output->Resize(input_tensor.dims());
output->mutable_data<T>();
}
template <typename Device, typename T> template <typename Device, typename T>
bool Executor<Device, T>::varInputMemory( bool Executor<Device, T>::varInputMemory(
const std::shared_ptr<VarDesc> &var_desc, Variable *var, const std::shared_ptr<VarDesc> &var_desc, Variable *var,
...@@ -297,7 +335,16 @@ void Executor<Device, T>::SetInput(const Tensor &input, ...@@ -297,7 +335,16 @@ void Executor<Device, T>::SetInput(const Tensor &input,
auto *target_var = program_.scope->FindVar(var_name); auto *target_var = program_.scope->FindVar(var_name);
PADDLE_MOBILE_ENFORCE(target_var != nullptr, "Variable %s is not exist", PADDLE_MOBILE_ENFORCE(target_var != nullptr, "Variable %s is not exist",
var_name.c_str()); var_name.c_str());
auto *target_tensor = target_var->template GetMutable<LoDTensor>(); auto *target_tensor = target_var->template GetMutable<LoDTensor>();
if (config_.load_when_predict) {
if (input_dim_last_ != input.dims()) {
InitNoPersistableMemory(input);
input_dim_last_ = input.dims();
}
}
target_tensor->Resize(input.dims()); target_tensor->Resize(input.dims());
target_tensor->ShareDataWith(input); target_tensor->ShareDataWith(input);
} }
...@@ -309,6 +356,14 @@ void Executor<Device, T>::SetInput(const LoDTensor &input, ...@@ -309,6 +356,14 @@ void Executor<Device, T>::SetInput(const LoDTensor &input,
PADDLE_MOBILE_ENFORCE(target_var != nullptr, "Variable %s is not exist", PADDLE_MOBILE_ENFORCE(target_var != nullptr, "Variable %s is not exist",
var_name.c_str()); var_name.c_str());
auto *target_tensor = target_var->template GetMutable<LoDTensor>(); auto *target_tensor = target_var->template GetMutable<LoDTensor>();
if (config_.load_when_predict) {
if (input_dim_last_ != input.dims()) {
InitNoPersistableMemory(*target_tensor);
input_dim_last_ = input.dims();
}
}
target_tensor->Resize(input.dims()); target_tensor->Resize(input.dims());
target_tensor->ShareDataWith(input); target_tensor->ShareDataWith(input);
target_tensor->set_lod(input.lod()); target_tensor->set_lod(input.lod());
...@@ -453,6 +508,70 @@ void Executor<Device, T>::Predict_To(int end) { ...@@ -453,6 +508,70 @@ void Executor<Device, T>::Predict_To(int end) {
#endif #endif
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
template <>
void Executor<GPU_CL, float>::InitNoPersistableMemory(
const Tensor &input_tensor) {
DLOG << "CL InitNoPersistableMemory ";
for (const auto &block : program_desc_->Blocks()) {
for (const auto &var_desc : block->Vars()) {
auto var = program_.scope->Var(var_desc->Name());
auto cl_image = var->template GetMutable<CLImage>();
if (var_desc->Persistable()) {
if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
continue;
}
} else {
if (var_desc->Type() == VARTYPE_TYPE_LOD_TENSOR) {
cl_context context = program_.scope->GetCLScpoe()->Context();
cl_command_queue command_queue =
program_.scope->GetCLScpoe()->CommandQueue();
DDim tensor_dim = cl_image->dims();
DDim new_dim =
make_ddim({tensor_dim[0], tensor_dim[1], input_tensor.dims()[2],
input_tensor.dims()[3]});
cl_image->Resize(new_dim);
cl_image->InitEmptyImage(context, command_queue, new_dim);
}
}
}
}
std::shared_ptr<LoDTensor> output = GetOutput("fetch");
output->Resize(input_tensor.dims());
output->mutable_data<float>();
}
template <>
void Executor<GPU_CL, float>::SetInput(const Tensor &input,
const std::string &var_name) {
auto *target_var = program_.scope->FindVar(var_name);
PADDLE_MOBILE_ENFORCE(target_var != nullptr, "Variable %s is not exist",
var_name.c_str());
auto *target_tensor = target_var->template GetMutable<LoDTensor>();
DLOG << "config_.load_when_predict " << config_.load_when_predict;
DLOG << "target_tensor->IsInitialized() " << target_tensor->IsInitialized();
DLOG << "target_tensor->dims() " << target_tensor->dims();
DLOG << "input.dims() " << input.dims();
DLOG << "input_dim_last_ " << input_dim_last_;
if (config_.load_when_predict) {
if (input_dim_last_ != input.dims()) {
DLOG << "SetInput ---- > resize1";
target_tensor->Resize(input.dims());
target_tensor->mutable_data<float>();
InitNoPersistableMemory(*target_tensor);
}
} else {
DLOG << "SetInput ---- > resize2";
target_tensor->Resize(input.dims());
DLOG << "SetInput ---- > ShareDataWith";
}
target_tensor->ShareDataWith(input);
auto &dim = input.dims();
input_dim_last_ = static_cast<DDim>(dim);
}
template <typename Device, typename T> template <typename Device, typename T>
void Executor<Device, T>::LoadMemory(const VarDesc var_desc, float *tensorInput, void Executor<Device, T>::LoadMemory(const VarDesc var_desc, float *tensorInput,
char **data) {} char **data) {}
...@@ -588,6 +707,8 @@ void Executor<GPU_CL, float>::InitMemory() { ...@@ -588,6 +707,8 @@ void Executor<GPU_CL, float>::InitMemory() {
template <> template <>
void Executor<GPU_CL, float>::InitCombineMemory() { void Executor<GPU_CL, float>::InitCombineMemory() {
DLOG << "CL InitCombineMemory---- "
<< "config_.load_when_predict: " << config_.load_when_predict;
char *origin_data = nullptr; char *origin_data = nullptr;
bool self_alloc = false; bool self_alloc = false;
if (program_.combined_params_buf && program_.combined_params_len) { if (program_.combined_params_buf && program_.combined_params_len) {
......
...@@ -32,7 +32,8 @@ namespace framework { ...@@ -32,7 +32,8 @@ namespace framework {
template <typename Device, typename T = float> template <typename Device, typename T = float>
class Executor { class Executor {
public: public:
Executor(const Program<Device> &program, int batch_size = 1, Executor(const Program<Device> &program,
paddle_mobile::PaddleMobileConfigInternal config, int batch_size = 1,
const bool use_optimize = true, const bool lod_mode = false); const bool use_optimize = true, const bool lod_mode = false);
PMStatus Predict(const std::vector<std::pair<std::string, Tensor>> &inputs); PMStatus Predict(const std::vector<std::pair<std::string, Tensor>> &inputs);
...@@ -64,6 +65,7 @@ class Executor { ...@@ -64,6 +65,7 @@ class Executor {
LoDTensor *tensor) const; LoDTensor *tensor) const;
void InitMemory(); void InitMemory();
void InitCombineMemory(); void InitCombineMemory();
void InitNoPersistableMemory(const Tensor &input_tensor);
void LoadMemory(void **data, const std::shared_ptr<VarDesc> var_desc, void LoadMemory(void **data, const std::shared_ptr<VarDesc> var_desc,
LoDTensor *tensor); LoDTensor *tensor);
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
...@@ -73,14 +75,17 @@ class Executor { ...@@ -73,14 +75,17 @@ class Executor {
int batch_size_; int batch_size_;
bool use_optimize_; bool use_optimize_;
bool lod_mode_; bool lod_mode_;
PaddleMobileConfigInternal config_;
Program<Device> program_; Program<Device> program_;
std::shared_ptr<ProgramDesc> program_desc_; std::shared_ptr<ProgramDesc> program_desc_;
typedef std::shared_ptr<OperatorBase<Device>> OperatorBasePtr; typedef std::shared_ptr<OperatorBase<Device>> OperatorBasePtr;
std::vector<std::vector<OperatorBasePtr>> ops_of_block_; std::vector<std::vector<OperatorBasePtr>> ops_of_block_;
// operators list // operators list
std::vector<OperatorBasePtr> ops_list_; std::vector<OperatorBasePtr> ops_list_;
// for super resoltion
DDim input_dim_last_;
#ifdef PADDLE_MOBILE_PROFILE #ifdef PADDLE_MOBILE_PROFILE
struct ProfInfo { struct ProfInfo {
int tid = 0; int tid = 0;
......
...@@ -45,8 +45,8 @@ PMStatus PaddleMobile<Device, T>::Load(const std::string &dirname, ...@@ -45,8 +45,8 @@ PMStatus PaddleMobile<Device, T>::Load(const std::string &dirname,
if (executor_.get() == nullptr) { if (executor_.get() == nullptr) {
executor_ = std::make_shared<framework::Executor<Device, T>>( executor_ = std::make_shared<framework::Executor<Device, T>>(
loader_->Load(dirname, optimize, quantification), batch_size, optimize, loader_->Load(dirname, optimize, quantification), config_, batch_size,
lod_mode); optimize, lod_mode);
} else { } else {
LOG(kLOG_INFO) << "executor inited"; LOG(kLOG_INFO) << "executor inited";
} }
...@@ -67,7 +67,7 @@ PMStatus PaddleMobile<Device, T>::Load(const std::string &model_path, ...@@ -67,7 +67,7 @@ PMStatus PaddleMobile<Device, T>::Load(const std::string &model_path,
if (executor_.get() == nullptr) { if (executor_.get() == nullptr) {
executor_ = std::make_shared<framework::Executor<Device, T>>( executor_ = std::make_shared<framework::Executor<Device, T>>(
loader_->Load(model_path, para_path, optimize, quantification), loader_->Load(model_path, para_path, optimize, quantification), config_,
batch_size, optimize, lod_mode); batch_size, optimize, lod_mode);
} else { } else {
LOG(kLOG_INFO) << "executor inited"; LOG(kLOG_INFO) << "executor inited";
...@@ -106,7 +106,7 @@ bool PaddleMobile<Device, T>::LoadCombinedMemory( ...@@ -106,7 +106,7 @@ bool PaddleMobile<Device, T>::LoadCombinedMemory(
loader_->LoadCombinedMemory(model_len, model_buf, combined_params_len, loader_->LoadCombinedMemory(model_len, model_buf, combined_params_len,
combined_params_buf, optimize, combined_params_buf, optimize,
quantification), quantification),
batch_size, optimize, lod_mode); config_, batch_size, optimize, lod_mode);
} else { } else {
LOG(kLOG_INFO) << "executor inited"; LOG(kLOG_INFO) << "executor inited";
} }
......
...@@ -33,6 +33,13 @@ namespace paddle_mobile { ...@@ -33,6 +33,13 @@ namespace paddle_mobile {
template <typename Device, typename T = float> template <typename Device, typename T = float>
class PaddleMobile { class PaddleMobile {
public: public:
PaddleMobile(PaddleMobileConfigInternal config) : config_(config) {
#ifndef PADDLE_MOBILE_CL
bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Device>::value;
PADDLE_MOBILE_ENFORCE(!is_gpu, "Please recompile with GPU_CL is on");
#endif
}
PaddleMobile() { PaddleMobile() {
#ifndef PADDLE_MOBILE_CL #ifndef PADDLE_MOBILE_CL
bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Device>::value; bool is_gpu = std::is_same<DeviceType<kGPU_CL>, Device>::value;
...@@ -99,6 +106,7 @@ class PaddleMobile { ...@@ -99,6 +106,7 @@ class PaddleMobile {
private: private:
std::shared_ptr<framework::Loader<Device, T>> loader_; std::shared_ptr<framework::Loader<Device, T>> loader_;
std::shared_ptr<framework::Executor<Device, T>> executor_; std::shared_ptr<framework::Executor<Device, T>> executor_;
PaddleMobileConfigInternal config_;
}; };
} // namespace paddle_mobile } // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef GRU_UNIT_OP
#include "operators/gru_unit_op.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
void GruUnitOp<DeviceType, T>::InferShape() const {
auto input_dims = this->param_.InputInput()->dims();
auto hidden_prev_dims = this->param_.InputHiddenPrev()->dims();
auto weight_dims = this->param_.InputWeight()->dims();
int batch_size = input_dims[0];
int input_size = input_dims[1];
int frame_size = hidden_prev_dims[1];
int weight_height = weight_dims[0];
int weight_width = weight_dims[1];
PADDLE_MOBILE_ENFORCE(
(input_size == frame_size * 3),
"The input_size must be 3 times of frame_size in GRUUnitOp.");
PADDLE_MOBILE_ENFORCE(
(weight_height == frame_size),
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
PADDLE_MOBILE_ENFORCE(
(weight_width == frame_size * 3),
"The shape of Weight matrix must be [frame_size, frame_size * 3].");
if (this->param_.InputBias()) {
auto bias_dims = this->param_.InputBias()->dims();
int bias_height = bias_dims[0];
int bias_width = bias_dims[1];
PADDLE_MOBILE_ENFORCE((bias_height == 1),
"The shape of Bias must be [1, frame_size * 3].");
PADDLE_MOBILE_ENFORCE((bias_width == frame_size * 3),
"The shape of Bias must be [1, frame_size * 3].");
}
this->param_.OutGate()->Resize({batch_size, frame_size * 3});
this->param_.OutResetHiddenPrev()->Resize({batch_size, frame_size});
this->param_.OutHidden()->Resize({batch_size, frame_size});
}
} // namespace operators
} // namespace paddle_mobile
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
REGISTER_OPERATOR_CPU(gru_unit, ops::GruUnitOp);
#endif
#ifdef PADDLE_MOBILE_MALI_GPU
#endif
#ifdef PADDLE_MOBILE_FPGA
#endif
#ifdef PADDLE_MOBILE_CL
#endif
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef GRU_UNIT_OP
#pragma once
#include "framework/operator.h"
#include "operators/kernel/gru_unit_kernel.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class GruUnitOp : public framework::OperatorWithKernel<
DeviceType, GruUnitParam<DeviceType>,
operators::GruUnitKernel<DeviceType, T>> {
public:
GruUnitOp(const std::string &type, const VariableNameMap &inputs,
const VariableNameMap &outputs, const AttributeMap &attrs,
std::shared_ptr<Scope> scope)
: framework::OperatorWithKernel<DeviceType, GruUnitParam<DeviceType>,
operators::GruUnitKernel<DeviceType, T>>(
type, inputs, outputs, attrs, scope){};
void InferShape() const override;
};
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -37,7 +37,7 @@ struct ActivationCompute<float, Act> { ...@@ -37,7 +37,7 @@ struct ActivationCompute<float, Act> {
size_t loop = remain >> 4; size_t loop = remain >> 4;
remain = remain & 0xF; remain = remain & 0xF;
#pragma omp parallel for #pragma omp parallel for
for (size_t i = 0; i < loop; ++i) { for (size_t i = 0; i < loop; ++i) {
const float *local_x = x + (i << 4); const float *local_x = x + (i << 4);
float *local_y = y + (i << 4); float *local_y = y + (i << 4);
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef GRU_UNIT_OP
#include "operators/kernel/gru_unit_kernel.h"
#include "operators/kernel/central-arm-func/gru_unit_arm_func.h"
namespace paddle_mobile {
namespace operators {
template <>
bool GruUnitKernel<CPU, float>::Init(GruUnitParam<CPU> *param) {
return true;
}
template <>
void GruUnitKernel<CPU, float>::Compute(const GruUnitParam<CPU> &param) {
GruUnitCompute<float>(param);
}
template class GruUnitKernel<CPU, float>;
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef GRU_UNIT_OP
#pragma once
#include <operators/math/gru_compute.h>
#include "operators/kernel/activation_kernel.h"
#include "operators/math/gemm.h"
#include "operators/math/math_function.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename P>
void GruUnitCompute(const GruUnitParam<CPU>& param) {
auto* input = param.InputInput();
auto* hidden_prev = param.InputHiddenPrev();
auto* weight = param.InputWeight();
auto* bias = param.InputBias();
auto* gate = param.OutGate();
auto* reset_hidden_prev = param.OutResetHiddenPrev();
auto* hidden = param.OutHidden();
if (bias) {
math::RowwiseAdd<CPU, float> add_bias;
add_bias(*gate, *bias, gate);
}
int batch_size = input->dims()[0];
int frame_size = hidden_prev->dims()[1];
const P* weight_data = weight->data<P>();
math::GRUMetaValue<P> gru_value;
gru_value.gate_weight = const_cast<P*>(weight_data);
gru_value.state_weight =
const_cast<P*>(weight_data + 2 * frame_size * frame_size);
gru_value.output_value = hidden->data<P>();
gru_value.prev_out_value = const_cast<P*>(hidden_prev->data<P>());
gru_value.gate_value = gate->data<P>();
gru_value.reset_output_value = reset_hidden_prev->data<P>();
auto active_node = math::GetActivationType(param.Activation());
auto active_gate = math::GetActivationType(param.GateActivation());
math::GRUUnitFunctor<CPU, float>::compute(gru_value, frame_size, batch_size,
active_node, active_gate);
}
} // namespace operators
} // namespace paddle_mobile
#endif
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef GRU_UNIT_OP
#pragma once
#include "framework/operator.h"
#include "operators/op_param.h"
namespace paddle_mobile {
namespace operators {
template <typename DeviceType, typename T>
class GruUnitKernel
: public framework::OpKernelBase<DeviceType, GruUnitParam<DeviceType>> {
public:
void Compute(const GruUnitParam<DeviceType>& param);
bool Init(GruUnitParam<DeviceType>* param);
};
} // namespace operators
} // namespace paddle_mobile
#endif
...@@ -45,6 +45,19 @@ inline ActivationType GetActivationType(const std::string &type) { ...@@ -45,6 +45,19 @@ inline ActivationType GetActivationType(const std::string &type) {
PADDLE_MOBILE_THROW_EXCEPTION("Not support activation type."); PADDLE_MOBILE_THROW_EXCEPTION("Not support activation type.");
} }
inline ActivationType GetActivationType(const int type) {
if (type == 0) {
return ActivationType::IDENTITY;
} else if (type == 1) {
return ActivationType::SIGMOID;
} else if (type == 2) {
return ActivationType::TANH;
} else if (type == 3) {
return ActivationType::RELU;
}
PADDLE_MOBILE_THROW_EXCEPTION("Not support activation type.");
}
#if defined(__ARM_NEON__) || defined(__ARM_NEON) #if defined(__ARM_NEON__) || defined(__ARM_NEON)
template <ActivationType Act = IDENTITY> template <ActivationType Act = IDENTITY>
inline float32x4_t vActiveq_f32(const float32x4_t &x) { inline float32x4_t vActiveq_f32(const float32x4_t &x) {
......
...@@ -270,7 +270,6 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input, ...@@ -270,7 +270,6 @@ void DepthwiseConv3x3s1p1(const framework::Tensor *input,
if (if_bias) { if (if_bias) {
bias_data = bias->data<float>(); bias_data = bias->data<float>();
} }
float32x4_t zero = vdupq_n_f32(0.0); float32x4_t zero = vdupq_n_f32(0.0);
for (int b = 0; b < batch_size; ++b) { for (int b = 0; b < batch_size; ++b) {
......
...@@ -1188,6 +1188,10 @@ void Gemm::WriteWithBnAddRelu(int mc, int nc, float *c, float *C, int ldc, ...@@ -1188,6 +1188,10 @@ void Gemm::WriteWithBnAddRelu(int mc, int nc, float *c, float *C, int ldc,
} }
} }
void Gemm::VectorKernel(int m, int n, int k, float alpha, const float *A,
int lda, const float *B, int ldb, float beta, float *C,
int ldc, bool relu) {}
#else #else
void Gemm::AddDot4x4(int k, const float *a, const float *b, float *c, int ldc) { void Gemm::AddDot4x4(int k, const float *a, const float *b, float *c, int ldc) {
......
...@@ -74,6 +74,13 @@ class OpParam { ...@@ -74,6 +74,13 @@ class OpParam {
static T *InputH0From(const VariableNameMap &inputs, const Scope &scope) { static T *InputH0From(const VariableNameMap &inputs, const Scope &scope) {
return GetVarValue<T>("H0", inputs, scope); return GetVarValue<T>("H0", inputs, scope);
} }
template <typename T>
static T *InputHiddenPrevFrom(const VariableNameMap &inputs,
const Scope &scope) {
return GetVarValue<T>("HiddenPrev", inputs, scope);
}
template <typename T> template <typename T>
static T *InputAlphaFrom(const VariableNameMap &inputs, const Scope &scope) { static T *InputAlphaFrom(const VariableNameMap &inputs, const Scope &scope) {
return GetVarValue<T>("Alpha", inputs, scope); return GetVarValue<T>("Alpha", inputs, scope);
...@@ -214,6 +221,11 @@ class OpParam { ...@@ -214,6 +221,11 @@ class OpParam {
return GetVarValue<T>("BatchGate", outputs, scope); return GetVarValue<T>("BatchGate", outputs, scope);
} }
template <typename T>
static T *OutputGateFrom(const VariableNameMap &outputs, const Scope &scope) {
return GetVarValue<T>("Gate", outputs, scope);
}
template <typename T> template <typename T>
static T *OutputViterbiPathFrom(const VariableNameMap &outputs, static T *OutputViterbiPathFrom(const VariableNameMap &outputs,
const Scope &scope) { const Scope &scope) {
...@@ -225,6 +237,12 @@ class OpParam { ...@@ -225,6 +237,12 @@ class OpParam {
return GetVarValue<T>("BatchResetHiddenPrev", outputs, scope); return GetVarValue<T>("BatchResetHiddenPrev", outputs, scope);
} }
template <typename T>
static T *OutputResetHiddenPrevFrom(const VariableNameMap &outputs,
const Scope &scope) {
return GetVarValue<T>("ResetHiddenPrev", outputs, scope);
}
template <typename T> template <typename T>
static T *OutputBatchHiddenFrom(const VariableNameMap &outputs, static T *OutputBatchHiddenFrom(const VariableNameMap &outputs,
const Scope &scope) { const Scope &scope) {
...@@ -2444,6 +2462,51 @@ class GruParam : public OpParam { ...@@ -2444,6 +2462,51 @@ class GruParam : public OpParam {
}; };
#endif #endif
#ifdef GRU_UNIT_OP
template <typename Dtype>
class GruUnitParam : public OpParam {
typedef typename DtypeTensorTrait<Dtype>::gtype GType;
public:
GruUnitParam(const VariableNameMap &inputs, const VariableNameMap &outputs,
const AttributeMap &attrs, const Scope &scope) {
input_input_ = InputFrom<GType>(inputs, scope);
input_hidden_prev_ = InputHiddenPrevFrom<GType>(inputs, scope);
input_bias_ = InputBiasFrom<GType>(inputs, scope);
input_weight_ = InputWeightFrom<GType>(inputs, scope);
output_gate_ = OutputGateFrom<GType>(outputs, scope);
output_reset_hidden_prev_ =
OutputResetHiddenPrevFrom<GType>(outputs, scope);
output_hidden_ = OutputHiddenFrom<GType>(outputs, scope);
activation_ = GetAttr<int>("activation", attrs);
gate_activation_ = GetAttr<int>("gate_activation", attrs);
}
const GType *InputInput() const { return input_input_; }
const GType *InputWeight() const { return input_weight_; }
const GType *InputHiddenPrev() const { return input_hidden_prev_; }
const GType *InputBias() const { return input_bias_; }
const int &Activation() const { return activation_; }
const int &GateActivation() const { return gate_activation_; }
GType *OutGate() const { return output_gate_; }
GType *OutResetHiddenPrev() const { return output_reset_hidden_prev_; }
GType *OutHidden() const { return output_hidden_; }
private:
GType *input_input_;
GType *input_hidden_prev_;
GType *input_bias_;
GType *input_weight_;
GType *output_gate_;
GType *output_reset_hidden_prev_;
GType *output_hidden_;
int activation_;
int gate_activation_;
};
#endif
#ifdef FLATTEN_OP #ifdef FLATTEN_OP
template <typename Dtype> template <typename Dtype>
class FlattenParam : public OpParam { class FlattenParam : public OpParam {
......
...@@ -23,10 +23,11 @@ int main() { ...@@ -23,10 +23,11 @@ int main() {
// ../../../test/models/googlenet // ../../../test/models/googlenet
// ../../../test/models/mobilenet // ../../../test/models/mobilenet
// auto program = loader.Load(g_mobilenet_ssd, true); std::string g_super = "../models/superresoltion";
// auto program = loader.Load(g_super, true);
// auto program = loader.Load(std::string(g_ocr) + "/model", auto program = loader.Load(std::string(g_super) + "/model",
// std::string(g_ocr) + "/params", false); std::string(g_super) + "/params", false);
// program.originProgram->Description("program desc: "); // program.originProgram->Description("program desc: ");
return 0; return 0;
......
...@@ -18,7 +18,10 @@ limitations under the License. */ ...@@ -18,7 +18,10 @@ limitations under the License. */
#include "../test_include.h" #include "../test_include.h"
int main() { int main() {
paddle_mobile::PaddleMobile<paddle_mobile::GPU_CL> paddle_mobile; paddle_mobile::PaddleMobileConfigInternal config;
config.load_when_predict = true;
paddle_mobile::PaddleMobile<paddle_mobile::CPU> paddle_mobile(config);
// paddle_mobile.SetThreadNum(4); // paddle_mobile.SetThreadNum(4);
auto time1 = paddle_mobile::time(); auto time1 = paddle_mobile::time();
#ifdef PADDLE_MOBILE_CL #ifdef PADDLE_MOBILE_CL
...@@ -27,38 +30,121 @@ int main() { ...@@ -27,38 +30,121 @@ int main() {
auto isok = paddle_mobile.Load(std::string(g_super) + "/model", auto isok = paddle_mobile.Load(std::string(g_super) + "/model",
std::string(g_super) + "/params", true, false, std::string(g_super) + "/params", true, false,
1, true); 1, false);
// auto isok = paddle_mobile.Load(std::string(g_mobilenet_mul), true); // auto isok = paddle_mobile.Load(std::string(g_mobilenet_mul), true);
if (isok) { if (isok) {
auto time2 = paddle_mobile::time(); auto time2 = paddle_mobile::time();
std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms" std::cout << "load cost :" << paddle_mobile::time_diff(time1, time2) << "ms"
<< std::endl; << std::endl;
// 300*300
// std::vector<float> input;
// std::vector<int64_t> dims{1, 1, 300, 300};
// GetInput<float>(g_test_image_1x3x224x224, &input, dims);
//
// std::vector<float> vec_result;
std::vector<float> input; auto time3 = paddle_mobile::time();
std::vector<int64_t> dims{1, 1, 300, 300}; int max = 1;
GetInput<float>(g_yolo_img, &input, dims);
std::vector<float> vec_result; // for (int i = 0; i < max; ++i) {
// auto time5 = paddle_mobile::time();
// vec_result = paddle_mobile.Predict(input, dims);
// auto time6 = paddle_mobile::time();
// std::cout << "300 predict cost :第" << i << ": "
// << paddle_mobile::time_diff(time5, time6) << "ms" <<
// std::endl;
// }
// auto time4 = paddle_mobile::time();
//
// std::cout << "300 predict cost :"
// << paddle_mobile::time_diff(time3, time4) / max << "ms"
// << std::endl;
// auto biggest =
// std::max_element(std::begin(vec_result), std::end(vec_result));
// std::cout << "300 Max element is " << *biggest << " at position "
// << std::distance(std::begin(vec_result), biggest) <<
// std::endl;
//
// // 500*500
// std::vector<float> vec_result2;
//
// std::vector<float> input2;
// std::vector<int64_t> dims2{1, 1, 500, 500};
// GetInput<float>(g_test_image_1x3x224x224, &input2, dims2);
//
// time3 = paddle_mobile::time();
// for (int i = 0; i < max; ++i) {
// auto time5 = paddle_mobile::time();
// vec_result2 = paddle_mobile.Predict(input2, dims2);
// auto time6 = paddle_mobile::time();
// std::cout << "500 predict cost :第" << i << ": "
// << paddle_mobile::time_diff(time5, time6) << "ms" <<
// std::endl;
// }
//
// time4 = paddle_mobile::time();
// std::cout << "500 predict cost :"
// << paddle_mobile::time_diff(time3, time4) / max << "ms"
// << std::endl;
// biggest = std::max_element(std::begin(vec_result2),
// std::end(vec_result2)); std::cout << "500 Max element is " << *biggest
// << " at position "
// << std::distance(std::begin(vec_result2), biggest) <<
// std::endl;
//
// // 1000*1000
//
// std::vector<float> vec_result3;
// std::vector<float> input3;
// std::vector<int64_t> dims3{1, 1, 1000, 1000};
// GetInput<float>(g_test_image_1x3x224x224, &input3, dims3);
//
// time3 = paddle_mobile::time();
//
// for (int i = 0; i < max; ++i) {
// auto time5 = paddle_mobile::time();
// vec_result3 = paddle_mobile.Predict(input3, dims3);
// auto time6 = paddle_mobile::time();
// std::cout << "1000*1000 predict cost :第" << i << ": "
// << paddle_mobile::time_diff(time5, time6) << "ms" <<
// std::endl;
// }
// time4 = paddle_mobile::time();
// std::cout << "1000*1000 predict cost :"
// << paddle_mobile::time_diff(time3, time4) / max << "ms"
// << std::endl;
// biggest = std::max_element(std::begin(vec_result3),
// std::end(vec_result3)); std::cout << "1000*1000 Max element is " <<
// *biggest << " at position "
// << std::distance(std::begin(vec_result3), biggest) <<
// std::endl;
auto time3 = paddle_mobile::time(); // 224*224
int max = 10; std::vector<float> vec_result4;
std::vector<float> input4;
std::vector<int64_t> dims4{1, 1, 300, 300};
GetInput<float>(g_test_image_1x3x224x224, &input4, dims4);
time3 = paddle_mobile::time();
for (int i = 0; i < max; ++i) { for (int i = 0; i < max; ++i) {
vec_result = paddle_mobile.Predict(input, dims); auto time5 = paddle_mobile::time();
vec_result4 = paddle_mobile.Predict(input4, dims4);
auto time6 = paddle_mobile::time();
std::cout << "224*224 predict cost :第" << i << ": "
<< paddle_mobile::time_diff(time5, time6) << "ms" << std::endl;
} }
auto time4 = paddle_mobile::time();
std::cout << "predict cost :" auto time4 = paddle_mobile::time();
std::cout << "224*224 predict cost :"
<< paddle_mobile::time_diff(time3, time4) / max << "ms" << paddle_mobile::time_diff(time3, time4) / max << "ms"
<< std::endl; << std::endl;
std::vector<float>::iterator biggest = // biggest = std::max_element(std::begin(vec_result4),
std::max_element(std::begin(vec_result), std::end(vec_result)); // std::end(vec_result4)); std::cout << "224*224 Max element is " <<
std::cout << " Max element is " << *biggest << " at position " // *biggest << " at position "
<< std::distance(std::begin(vec_result), biggest) << std::endl; // << std::distance(std::begin(vec_result4), biggest) <<
// std::endl;
} }
std::cout << "如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana "
"是否存在?"
<< std::endl;
return 0; return 0;
} }
...@@ -63,6 +63,7 @@ static const char *g_imgfssd_ar = "../images/test_image_ssd_ar"; ...@@ -63,6 +63,7 @@ static const char *g_imgfssd_ar = "../images/test_image_ssd_ar";
static const char *g_imgfssd_ar1 = "../images/003_0001.txt"; static const char *g_imgfssd_ar1 = "../images/003_0001.txt";
static const char *g_img = "../images/img.bin"; static const char *g_img = "../images/img.bin";
static const char *g_yolo_img = "../images/in_put_1_3_416_416_2"; static const char *g_yolo_img = "../images/in_put_1_3_416_416_2";
static const char *g_super_img = "../images/mingren_input_data";
static const char *g_mobilenet_img = "../images/image"; static const char *g_mobilenet_img = "../images/image";
using paddle_mobile::framework::DDim; using paddle_mobile::framework::DDim;
......
#!/usr/bin/env bash
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
function print_usage() {
echo "\n${RED}Usage${NONE}:
${BOLD}${SCRIPT_NAME}${NONE} [Option] [Network]"
echo "\n${RED}Option${NONE}: required, specify the target platform
${BLUE}android_armv7${NONE}: run build for android armv7 platform
${BLUE}android_armv8${NONE}: run build for android armv8 platform
${BLUE}ios${NONE}: run build for apple ios platform
${BLUE}linux_armv7${NONE}: run build for linux armv7 platform
${BLUE}linux_armv8${NONE}: run build for linux armv8 platform
"
echo "\n${RED}Network${NONE}: optional, for deep compressing the framework size
${BLUE}googlenet${NONE}: build only googlenet support
${BLUE}mobilenet${NONE}: build only mobilenet support
${BLUE}yolo${NONE}: build only yolo support
${BLUE}squeezenet${NONE}: build only squeezenet support
${BLUE}resnet${NONE}: build only resnet support
${BLUE}mobilenetssd${NONE}: build only mobilenetssd support
${BLUE}nlp${NONE}: build only nlp model support
${BLUE}mobilenetfssd${NONE}: build only mobilenetfssd support
${BLUE}genet${NONE}: build only genet support
${BLUE}super${NONE}: build only super support
"
}
function init() {
RED='\033[0;31m'
BLUE='\033[0;34m'
BOLD='\033[1m'
NONE='\033[0m'
PADDLE_MOBILE_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}")/../" && pwd )"
if [ -z "${SCRIPT_NAME}" ]; then
SCRIPT_NAME=$0
fi
}
function check_ndk() {
if [ -z "${NDK_ROOT}" ]; then
echo "Should set NDK_ROOT as your android ndk path, such as\n"
echo " export NDK_ROOT=~/android-ndk-r14b\n"
exit -1
fi
}
function build_android_armv7_cpu_only() {
rm -rf ../build/armeabi-v7a
cmake .. \
-B"../build/armeabi-v7a" \
-DANDROID_ABI="armeabi-v7a with NEON" \
-DCMAKE_BUILD_TYPE="MinSizeRel" \
-DCMAKE_TOOLCHAIN_FILE="./tools/android-cmake/android.toolchain.cmake" \
-DANDROID_PLATFORM="android-22" \
-DANDROID_STL=c++_static \
-DANDROID=true \
-DWITH_LOGGING=OFF \
-DGPU_MALI=OFF \
-DGPU_CL=OFF \
-DFPGA=OFF
cd ../build/armeabi-v7a && make -j 8
cd -
}
function build_android_armv7_gpu() {
rm -rf ../build/armeabi-v7a
cmake .. \
-B"../build/armeabi-v7a" \
-DANDROID_ABI="armeabi-v7a with NEON" \
-DCMAKE_BUILD_TYPE="MinSizeRel" \
-DCMAKE_TOOLCHAIN_FILE="./tools/android-cmake/android.toolchain.cmake" \
-DANDROID_PLATFORM="android-22" \
-DANDROID_STL=c++_static \
-DANDROID=true \
-DWITH_LOGGING=OFF \
-DGPU_MALI=ON \
-DGPU_CL=ON \
-DFPGA=OFF
cd ../build/armeabi-v7a && make -j 8
cd -
}
function build_android_armv8_cpu_only() {
rm -rf ../build/arm64-v8a
cmake .. \
-B"../build/arm64-v8a" \
-DANDROID_ABI="arm64-v8a" \
-DCMAKE_BUILD_TYPE="MinSizeRel" \
-DCMAKE_TOOLCHAIN_FILE="./tools/android-cmake/android.toolchain.cmake" \
-DANDROID_PLATFORM="android-22" \
-DANDROID_STL=c++_static \
-DANDROID=true \
-DWITH_LOGGING=OFF \
-DGPU_MALI=OFF \
-DGPU_CL=OFF \
-DFPGA=OFF
cd ../build/arm64-v8a && make -j 1
cd -
}
function build_android_armv8_gpu() {
rm -rf ../build/arm64-v8a
cmake .. \
-B"../build/arm64-v8a" \
-DANDROID_ABI="arm64-v8a" \
-DCMAKE_BUILD_TYPE="MinSizeRel" \
-DCMAKE_TOOLCHAIN_FILE="./tools/android-cmake/android.toolchain.cmake" \
-DANDROID_PLATFORM="android-22" \
-DANDROID_STL=c++_static \
-DANDROID=true \
-DWITH_LOGGING=OFF \
-DGPU_MALI=ON \
-DGPU_CL=ON \
-DFPGA=OFF
cd ../build/arm64-v8a && make -j 8
cd -
}
function build_ios_armv8_cpu_only() {
rm -rf ../build/ios
cmake .. \
-B"../build/ios" \
-DCMAKE_BUILD_TYPE="MinSizeRel" \
-DCMAKE_TOOLCHAIN_FILE="./tools/ios-cmake/ios.toolchain.cmake" \
-DIOS_PLATFORM=OS \
-DIOS_ARCH="${IOS_ARCH}" \
-DIS_IOS=true \
-DGPU_MALI=OFF \
-DGPU_CL=OFF \
-DFPGA=OFF
cd ../build/ios && make -j 8
cd -
}
function build_ios_armv8_gpu() {
rm -rf ../build/ios
cmake .. \
-B"../build/ios" \
-DCMAKE_BUILD_TYPE="MinSizeRel" \
-DCMAKE_TOOLCHAIN_FILE="./tools/ios-cmake/ios.toolchain.cmake" \
-DIOS_PLATFORM=OS \
-DIOS_ARCH="${IOS_ARCH}" \
-DIS_IOS=true \
-DGPU_MALI=OFF \
-DGPU_CL=ON \
-DFPGA=OFF
cd ../build/ios && make -j 8
cd -
}
function build_linux_armv7_cpu_only() {
rm -rf ../build/armv7_linux
cmake .. \
-B"../build/armv7_linux" \
-DCMAKE_BUILD_TYPE="MinSizeRel" \
-DCMAKE_TOOLCHAIN_FILE="./tools/toolchains/arm-linux-gnueabihf.cmake" \
-DGPU_MALI=OFF \
-DGPU_CL=OFF \
-DFPGA=OFF
cd ../build/armv7_linux && make -j 8
cd -
}
function build_linux_armv7_gpu() {
rm -rf ../build/armv7_linux
cmake .. \
-B"../build/armv7_linux" \
-DCMAKE_BUILD_TYPE="MinSizeRel" \
-DCMAKE_TOOLCHAIN_FILE="./tools/toolchains/arm-linux-gnueabihf.cmake" \
-DGPU_MALI=ON \
-DGPU_CL=ON \
-DFPGA=OFF
cd ../build/armv7_linux && make -j 8
cd -
}
function build_android_armv7() {
check_ndk
build_android_armv7_cpu_only
# build_android_armv7_gpu
}
function build_android_armv8() {
check_ndk
build_android_armv8_cpu_only
# build_android_armv8_gpu
}
function build_ios() {
build_ios_armv8_cpu_only
# build_ios_armv8_gpu
}
function build_linux_armv7() {
check_ndk
build_linux_armv7_cpu_only
# build_linux_armv7_gpu
}
function main() {
local CMD=$1
init
case $CMD in
android_armv7)
build_android_armv7
;;
android_armv8)
build_android_armv8
;;
ios)
build_ios
;;
linux_armv7)
build_linux_armv7
;;
*)
print_usage
exit 0
;;
esac
}
main $@
...@@ -256,6 +256,7 @@ if(NOT FOUND_MATCH) ...@@ -256,6 +256,7 @@ if(NOT FOUND_MATCH)
set(IM2SEQUENCE_OP ON) set(IM2SEQUENCE_OP ON)
set(LOOKUP_OP ON) set(LOOKUP_OP ON)
set(GRU_OP ON) set(GRU_OP ON)
set(GRU_UNIT_OP ON)
set(CRF_OP ON) set(CRF_OP ON)
set(BILINEAR_INTERP_OP ON) set(BILINEAR_INTERP_OP ON)
set(SPLIT_OP ON) set(SPLIT_OP ON)
...@@ -450,6 +451,10 @@ if (GRU_OP) ...@@ -450,6 +451,10 @@ if (GRU_OP)
add_definitions(-DGRU_OP) add_definitions(-DGRU_OP)
endif() endif()
if (GRU_UNIT_OP)
add_definitions(-DGRU_UNIT_OP)
endif()
if (CRF_OP) if (CRF_OP)
add_definitions(-DCRF_OP) add_definitions(-DCRF_OP)
endif() endif()
......
# coding=utf-8 # coding=utf-8
import os import os
path = "yolo_v2_tofile_source/" # 文件夹目录 path = "mobilenet/" # 文件夹目录
to_file_path = "yolo_v2_tofile_combined/params" to_file_path = "mobilenet_combine/params"
files = os.listdir(path) # 得到文件夹下的所有文件名称 files = os.listdir(path) # 得到文件夹下的所有文件名称
files.sort(cmp=None, key=str.lower) files.sort(cmp=None, key=str.lower)
to_file = open(to_file_path, "wb") to_file = open(to_file_path, "wb")
for file in files: # 遍历文件夹 for file in files: # 遍历文件夹
if not os.path.isdir(file): # 判断是否是文件夹,不是文件夹才打开 if not os.path.isdir(file) and file != ".DS_Store": # 判断是否是文件夹,不是文件夹才打开
f = open(path + "/" + file) # 打开文件 f = open(path + "/" + file) # 打开文件
name = f.name name = f.name
print 'name: ' + name print 'name: ' + name
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册