提交 5cc7ac26 编写于 作者: L luxuhui

fix mace run tools for darwin

N/A
Signed-off-by: NLuxuhui <luxuhui@xiaomi.com>
上级 b71da971
...@@ -59,6 +59,14 @@ jobs: ...@@ -59,6 +59,14 @@ jobs:
env: TYPE=Ops-Test env: TYPE=Ops-Test
os: osx os: osx
osx_image: xcode7.2 osx_image: xcode7.2
- stage: Unit Test
script:
- echo "Ops Test On Darwin"
- python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=False --enable_openmp=false --target_abis=host || exit 1;
- bazel build "//mace/ops:ops_test" --config=ios --config=optimization_darwin --define openmp=false --define quantize=true --define neon=true --config symbol_hidden || exit 1;
env: TYPE=Ops-Test
os: osx
osx_image: xcode7.2
- stage: Unit Test - stage: Unit Test
script: script:
- echo "Ops Test Without NEON" - echo "Ops Test Without NEON"
...@@ -80,6 +88,13 @@ jobs: ...@@ -80,6 +88,13 @@ jobs:
env: TYPE=Ops-Benchmark env: TYPE=Ops-Benchmark
os: osx os: osx
osx_image: xcode7.2 osx_image: xcode7.2
- stage: Unit Test
script:
- python tools/bazel_adb_run.py --target="//mace/ops:ops_benchmark" --run_target=False --enable_openmp=false --target_abis=host || exit 1;
- bazel build "//mace/ops:ops_benchmark" --config=ios --config=optimization_darwin --define openmp=false --define quantize=true --define neon=true --config symbol_hidden || exit 1;
env: TYPE=Ops-Benchmark
os: osx
osx_image: xcode7.2
- stage: Unit Test - stage: Unit Test
script: script:
- DYNAMIC_LIB_PATH="bazel-bin/mace/libmace/libmace.so" - DYNAMIC_LIB_PATH="bazel-bin/mace/libmace/libmace.so"
...@@ -114,7 +129,7 @@ jobs: ...@@ -114,7 +129,7 @@ jobs:
- python tools/bazel_adb_run.py --target="//mace/test:mace_api_test" --run_target=False --target_abis=armeabi-v7a || exit 1 - python tools/bazel_adb_run.py --target="//mace/test:mace_api_test" --run_target=False --target_abis=armeabi-v7a || exit 1
- python tools/bazel_adb_run.py --target="//mace/test:mace_api_mt_test" --run_target=False --target_abis=armeabi-v7a || exit 1 - python tools/bazel_adb_run.py --target="//mace/test:mace_api_mt_test" --run_target=False --target_abis=armeabi-v7a || exit 1
- echo 'Extra Test' - echo 'Extra Test'
- python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=False --target_abis=armeabi-v7a || exit 1 - python tools/bazel_adb_run.py --target="//mace/utils:utils_test" --run_target=False --target_abis=armeabi-v7a || exit 1
env: TYPE=Extra-Test-ARMEABI-v7a env: TYPE=Extra-Test-ARMEABI-v7a
os: linux os: linux
dist: xenial dist: xenial
...@@ -130,3 +145,26 @@ jobs: ...@@ -130,3 +145,26 @@ jobs:
os: linux os: linux
dist: xenial dist: xenial
sudo: required sudo: required
- stage: Extra Test
script:
- bazel build "//mace/libmace:libmace_static" --config=darwin --config=optimization_darwin --define openmp=false --define quantize=true --config symbol_hidden || exit 1;
- bazel build "//mace/libmace:libmace_dynamic" --config=darwin --config=optimization_darwin --define openmp=false --define quantize=true --config symbol_hidden || exit 1;
env: TYPE=Build-Library
os: osx
osx_image: xcode7.2
- stage: Extra Test
script:
- bazel build "//mace/libmace:libmace_static" --config=ios --config=optimization_darwin --define openmp=false --define quantize=true --define neon=true --config symbol_hidden || exit 1;
- bazel build "//mace/libmace:libmace_dynamic" --config=ios --config=optimization_darwin --define openmp=false --define quantize=true --define neon=true --config symbol_hidden || exit 1;
env: TYPE=Build-Library
os: osx
osx_image: xcode7.2
- stage: Extra Test
script:
- echo 'API Test'
- python tools/bazel_adb_run.py --target="//mace/test:mace_api_test" --run_target=False --enable_openmp=false --target_abis=host || exit 1
- python tools/bazel_adb_run.py --target="//mace/test:mace_api_mt_test" --run_target=False --enable_openmp=false --target_abis=host || exit 1
- python tools/bazel_adb_run.py --target="//mace/utils:utils_test" --run_target=False --enable_openmp=false --target_abis=host || exit 1
env: TYPE=Extra-Test-darwin_x86_64
os: osx
osx_image: xcode7.2
...@@ -88,10 +88,10 @@ http_archive( ...@@ -88,10 +88,10 @@ http_archive(
http_archive( http_archive(
name = "tflite", name = "tflite",
sha256 = "c886d46ad8c91fcafed2d910ad9e7bc5aeb29856c387bdf9b6b4903cc16e6e60", sha256 = "1bb4571ee5cbde427ecfed076b39edaad96ace897ab86bb2495bdb93c706b203",
strip_prefix = "tensorflow-mace-ffc8cc7e8c9d1894753509e88b17e251bc6255e3", strip_prefix = "tensorflow-mace-ffc8cc7e8c9d1894753509e88b17e251bc6255e3",
urls = [ urls = [
"https://cnbj1.fds.api.xiaomi.com/mace/third-party/tflite/tensorflow-mace-ffc8cc7e8c9d1894753509e88b17e251bc6255e3.zip", "http://cnbj1.fds.api.xiaomi.com/mace/third-party/tflite/tensorflow-mace-ffc8cc7e8c9d1894753509e88b17e251bc6255e3_custom.zip",
], ],
) )
......
...@@ -12,8 +12,8 @@ load( ...@@ -12,8 +12,8 @@ load(
"if_android", "if_android",
"if_android_armv7", "if_android_armv7",
"if_hexagon_enabled", "if_hexagon_enabled",
"if_hta_enabled",
"if_hexagon_or_hta_enabled", "if_hexagon_or_hta_enabled",
"if_hta_enabled",
"if_neon_enabled", "if_neon_enabled",
"if_not_hexagon_enabled", "if_not_hexagon_enabled",
"if_opencl_enabled", "if_opencl_enabled",
......
...@@ -17,9 +17,30 @@ ...@@ -17,9 +17,30 @@
// Do not include cl2.hpp directly, include this header instead. // Do not include cl2.hpp directly, include this header instead.
#include "mace/port/port-arch.h"
#define CL_HPP_MINIMUM_OPENCL_VERSION 110 #define CL_HPP_MINIMUM_OPENCL_VERSION 110
#ifdef MACE_OS_MAC
#define CL_HPP_TARGET_OPENCL_VERSION 120
#define CL_TARGET_OPENCL_VERSION 120
#else
#define CL_HPP_TARGET_OPENCL_VERSION 200 #define CL_HPP_TARGET_OPENCL_VERSION 200
#define CL_TARGET_OPENCL_VERSION 200
#endif // MACE_OS_MAC
#ifdef MACE_OS_MAC
// disable deprecated warning in macOS 10.14
#define CL_SILENCE_DEPRECATION
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#pragma GCC diagnostic ignored "-Wignored-attributes"
#endif // MACE_OS_MAC
#include "include/CL/cl2.hpp" #include "include/CL/cl2.hpp"
#ifdef MACE_OS_MAC
#pragma GCC diagnostic pop
#endif
#endif // MACE_CORE_RUNTIME_OPENCL_CL2_HEADER_H_ #endif // MACE_CORE_RUNTIME_OPENCL_CL2_HEADER_H_
...@@ -154,16 +154,19 @@ const std::string OpenCLErrorToString(cl_int error) { ...@@ -154,16 +154,19 @@ const std::string OpenCLErrorToString(cl_int error) {
return "CL_INVALID_LINKER_OPTIONS"; return "CL_INVALID_LINKER_OPTIONS";
case CL_INVALID_DEVICE_PARTITION_COUNT: case CL_INVALID_DEVICE_PARTITION_COUNT:
return "CL_INVALID_DEVICE_PARTITION_COUNT"; return "CL_INVALID_DEVICE_PARTITION_COUNT";
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
case CL_INVALID_PIPE_SIZE: case CL_INVALID_PIPE_SIZE:
return "CL_INVALID_PIPE_SIZE"; return "CL_INVALID_PIPE_SIZE";
case CL_INVALID_DEVICE_QUEUE: case CL_INVALID_DEVICE_QUEUE:
return "CL_INVALID_DEVICE_QUEUE"; return "CL_INVALID_DEVICE_QUEUE";
#endif
default: default:
return MakeString("UNKNOWN: ", error); return MakeString("UNKNOWN: ", error);
} }
} }
namespace { namespace {
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
void OpenCLPrintfCallback(const char *buffer, void OpenCLPrintfCallback(const char *buffer,
size_t length, size_t length,
size_t final, size_t final,
...@@ -172,6 +175,7 @@ void OpenCLPrintfCallback(const char *buffer, ...@@ -172,6 +175,7 @@ void OpenCLPrintfCallback(const char *buffer,
MACE_UNUSED(user_data); MACE_UNUSED(user_data);
fwrite(buffer, 1, length, stdout); fwrite(buffer, 1, length, stdout);
} }
#endif
void GetAdrenoContextProperties(std::vector<cl_context_properties> *properties, void GetAdrenoContextProperties(std::vector<cl_context_properties> *properties,
GPUPerfHint gpu_perf_hint, GPUPerfHint gpu_perf_hint,
...@@ -340,6 +344,7 @@ OpenCLRuntime::OpenCLRuntime( ...@@ -340,6 +344,7 @@ OpenCLRuntime::OpenCLRuntime(
new cl::Context({*device_}, context_properties.data(), new cl::Context({*device_}, context_properties.data(),
nullptr, nullptr, &err)); nullptr, nullptr, &err));
} else { } else {
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
if (is_profiling_enabled_ && gpu_type_ == GPUType::MALI) { if (is_profiling_enabled_ && gpu_type_ == GPUType::MALI) {
std::vector<cl_context_properties> context_properties = { std::vector<cl_context_properties> context_properties = {
CL_CONTEXT_PLATFORM, (cl_context_properties)default_platform(), CL_CONTEXT_PLATFORM, (cl_context_properties)default_platform(),
...@@ -353,6 +358,10 @@ OpenCLRuntime::OpenCLRuntime( ...@@ -353,6 +358,10 @@ OpenCLRuntime::OpenCLRuntime(
context_ = std::shared_ptr<cl::Context>( context_ = std::shared_ptr<cl::Context>(
new cl::Context({*device_}, nullptr, nullptr, nullptr, &err)); new cl::Context({*device_}, nullptr, nullptr, nullptr, &err));
} }
#else
context_ = std::shared_ptr<cl::Context>(
new cl::Context({*device_}, nullptr, nullptr, nullptr, &err));
#endif
} }
if (err != CL_SUCCESS) { if (err != CL_SUCCESS) {
LOG(ERROR) << "error: " << OpenCLErrorToString(err); LOG(ERROR) << "error: " << OpenCLErrorToString(err);
......
...@@ -17,14 +17,25 @@ ...@@ -17,14 +17,25 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "mace/port/port-arch.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
/** /**
* Wrapper of OpenCL 2.0, based on file opencl20/CL/cl.h * Wrapper of OpenCL 2.0, based on file opencl20/CL/cl.h
*/ */
#ifdef MACE_OS_MAC
typedef cl_queue_properties_APPLE cl_queue_properties;
#endif
#if CL_HPP_TARGET_OPENCL_VERSION < 200
#define CL_API_SUFFIX__VERSION_2_0
#endif
namespace mace { namespace mace {
namespace runtime { namespace runtime {
class OpenCLLibrary final { class OpenCLLibrary final {
private: private:
OpenCLLibrary(); OpenCLLibrary();
......
...@@ -17,6 +17,8 @@ ...@@ -17,6 +17,8 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "mace/utils/memory.h"
namespace mace { namespace mace {
ScratchImageManager::ScratchImageManager() = default; ScratchImageManager::ScratchImageManager() = default;
...@@ -43,8 +45,7 @@ Image *ScratchImageManager::Spawn( ...@@ -43,8 +45,7 @@ Image *ScratchImageManager::Spawn(
// if not found // if not found
if (found_image_idx == -1) { if (found_image_idx == -1) {
reference_count_.push_back(0); reference_count_.push_back(0);
images_[image_count] = images_[image_count] = make_unique<Image>(allocator);
std::move(std::unique_ptr<Image>(new Image(allocator)));
if (images_.at(image_count)->Allocate(shape, dt) != if (images_.at(image_count)->Allocate(shape, dt) !=
MaceStatus::MACE_SUCCESS) { MaceStatus::MACE_SUCCESS) {
return nullptr; return nullptr;
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
load( load(
"//mace:mace.bzl", "//mace:mace.bzl",
"if_android", "if_android",
"if_darwin",
"if_hexagon_enabled", "if_hexagon_enabled",
"if_hta_enabled", "if_hta_enabled",
"if_opencl_enabled", "if_opencl_enabled",
...@@ -18,10 +19,12 @@ cc_binary( ...@@ -18,10 +19,12 @@ cc_binary(
"-DMACE_ENABLE_OPENCL", "-DMACE_ENABLE_OPENCL",
]), ]),
linkopts = [ linkopts = [
"-fuse-ld=gold",
"-lm", "-lm",
"-ldl", "-ldl",
] + if_openmp_enabled([ ] + if_darwin(
[],
default_value = ["-fuse-ld=gold"],
) + if_openmp_enabled([
"-fopenmp", "-fopenmp",
]) + if_android([ ]) + if_android([
"-ldl", "-ldl",
...@@ -33,10 +36,11 @@ cc_binary( ...@@ -33,10 +36,11 @@ cc_binary(
"//external:gflags_nothreads", "//external:gflags_nothreads",
"//mace/codegen:generated_mace_engine_factory", "//mace/codegen:generated_mace_engine_factory",
"//mace/codegen:generated_libmace", "//mace/codegen:generated_libmace",
"//mace/utils:utils_hdrs",
] + if_opencl_enabled([
"//mace/codegen:generated_opencl_binary", "//mace/codegen:generated_opencl_binary",
"//mace/codegen:generated_opencl_parameter", "//mace/codegen:generated_opencl_parameter",
"//mace/utils:utils_hdrs", ]) + if_hexagon_enabled([
] + if_hexagon_enabled([
"//third_party/nnlib:libhexagon", "//third_party/nnlib:libhexagon",
]) + if_hta_enabled([ ]) + if_hta_enabled([
"//third_party/hta", "//third_party/hta",
...@@ -54,10 +58,12 @@ cc_binary( ...@@ -54,10 +58,12 @@ cc_binary(
"-DMACE_ENABLE_OPENCL", "-DMACE_ENABLE_OPENCL",
]), ]),
linkopts = [ linkopts = [
"-fuse-ld=gold",
"-lm", "-lm",
"-ldl", "-ldl",
] + if_android([ ] + if_darwin(
[],
default_value = ["-fuse-ld=gold"],
) + if_android([
"-ldl", "-ldl",
"-pie", "-pie",
"-llog", "-llog",
...@@ -67,8 +73,9 @@ cc_binary( ...@@ -67,8 +73,9 @@ cc_binary(
"//external:gflags_nothreads", "//external:gflags_nothreads",
"//mace/codegen:generated_libmace", "//mace/codegen:generated_libmace",
"//mace/codegen:generated_mace_engine_factory", "//mace/codegen:generated_mace_engine_factory",
"//mace/utils:utils_hdrs",
] + if_opencl_enabled([
"//mace/codegen:generated_opencl_binary", "//mace/codegen:generated_opencl_binary",
"//mace/codegen:generated_opencl_parameter", "//mace/codegen:generated_opencl_parameter",
"//mace/utils:utils_hdrs", ]),
],
) )
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
#include <dirent.h> #include <dirent.h>
#include <errno.h> #include <errno.h>
#include <fcntl.h> #include <fcntl.h>
#include <malloc.h>
#include <sys/mman.h> #include <sys/mman.h>
#include <sys/stat.h> #include <sys/stat.h>
#include <unistd.h> #include <unistd.h>
......
...@@ -10,14 +10,14 @@ licenses(["notice"]) # Apache 2.0 ...@@ -10,14 +10,14 @@ licenses(["notice"]) # Apache 2.0
load( load(
"//mace:mace.bzl", "//mace:mace.bzl",
"if_android", "if_android",
"if_linux",
"if_darwin",
"if_neon_enabled",
"if_openmp_enabled",
"if_android_armv7", "if_android_armv7",
"if_darwin",
"if_hexagon_enabled", "if_hexagon_enabled",
"if_hta_enabled", "if_hta_enabled",
"if_linux",
"if_neon_enabled",
"if_opencl_enabled", "if_opencl_enabled",
"if_openmp_enabled",
"if_quantize_enabled", "if_quantize_enabled",
) )
...@@ -53,11 +53,14 @@ cc_library( ...@@ -53,11 +53,14 @@ cc_library(
cc_binary( cc_binary(
name = "libmace.so", name = "libmace.so",
linkopts = [ linkopts = if_darwin(
"-Wl,-soname,libmace.so", ["-Wl,-install_name,libmace.so"],
"-Wl,--version-script", [
"$(location //mace/libmace:mace_version_script.lds)", "-Wl,-soname,libmace.so",
] + if_openmp_enabled([ "-Wl,--version-script",
"$(location //mace/libmace:mace_version_script.lds)",
],
) + if_openmp_enabled([
"-fopenmp", "-fopenmp",
]), ]),
linkshared = 1, linkshared = 1,
...@@ -96,6 +99,7 @@ genrule( ...@@ -96,6 +99,7 @@ genrule(
"//mace/public", "//mace/public",
"//mace/utils", "//mace/utils",
"//mace/proto:mace_cc", "//mace/proto:mace_cc",
"//mace/port/linux_base:port_linux_base",
"@com_google_protobuf//:protobuf_lite", "@com_google_protobuf//:protobuf_lite",
] + if_android([ ] + if_android([
"//mace/port/android:port_android", "//mace/port/android:port_android",
...@@ -103,6 +107,7 @@ genrule( ...@@ -103,6 +107,7 @@ genrule(
"//mace/port/linux:port_linux", "//mace/port/linux:port_linux",
]) + if_darwin([ ]) + if_darwin([
"//mace/port/darwin:port_darwin", "//mace/port/darwin:port_darwin",
"//mace/port/darwin:darwin_ar_merge",
]) + if_opencl_enabled([ ]) + if_opencl_enabled([
"//mace/ops:opencl_kernels", "//mace/ops:opencl_kernels",
"//mace/codegen:generated_opencl", "//mace/codegen:generated_opencl",
...@@ -130,10 +135,12 @@ genrule( ...@@ -130,10 +135,12 @@ genrule(
"$(locations //mace/port:port_base) " + "$(locations //mace/port:port_base) " +
"$(locations //mace/port/posix:port_posix) " + "$(locations //mace/port/posix:port_posix) " +
if_android( if_android(
"$(locations //mace/port/linux_base:port_linux_base) " +
"$(locations //mace/port/android:port_android) ", "$(locations //mace/port/android:port_android) ",
default_value = "", default_value = "",
) + ) +
if_linux( if_linux(
"$(locations //mace/port/linux_base:port_linux_base) " +
"$(locations //mace/port/linux:port_linux) ", "$(locations //mace/port/linux:port_linux) ",
default_value = "", default_value = "",
) + ) +
...@@ -151,7 +158,10 @@ genrule( ...@@ -151,7 +158,10 @@ genrule(
) + ) +
"$@ " + "$@ " +
"$$tmp_mri_file);" + "$$tmp_mri_file);" +
"$(AR) -M <$$tmp_mri_file;" + if_darwin(
"bash $(locations //mace/port/darwin:darwin_ar_merge) $(AR) < $$tmp_mri_file;",
default_value = "$(AR) -M <$$tmp_mri_file;",
) +
"rm -rf $$tmp_mri_file;", "rm -rf $$tmp_mri_file;",
# "$(STRIP) -x $@;", # FIXME this will crash # "$(STRIP) -x $@;", # FIXME this will crash
tools = ["//mace/python/tools:archive_static_lib"], tools = ["//mace/python/tools:archive_static_lib"],
......
...@@ -9,11 +9,11 @@ licenses(["notice"]) # Apache 2.0 ...@@ -9,11 +9,11 @@ licenses(["notice"]) # Apache 2.0
load( load(
"//mace:mace.bzl", "//mace:mace.bzl",
"if_android", "if_android",
"if_neon_enabled",
"if_openmp_enabled",
"if_android_armv7", "if_android_armv7",
"if_hexagon_enabled", "if_hexagon_enabled",
"if_neon_enabled",
"if_opencl_enabled", "if_opencl_enabled",
"if_openmp_enabled",
"if_quantize_enabled", "if_quantize_enabled",
) )
...@@ -55,7 +55,7 @@ cc_library( ...@@ -55,7 +55,7 @@ cc_library(
cc_library( cc_library(
name = "testing", name = "testing",
hdrs = [ hdrs = [
"testing/test_utils.h", "testing/test_utils.h",
], ],
copts = [ copts = [
"-Werror", "-Werror",
...@@ -450,7 +450,9 @@ cc_test( ...@@ -450,7 +450,9 @@ cc_test(
]) + if_hexagon_enabled([ ]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON", "-DMACE_ENABLE_HEXAGON",
]), ]),
linkopts = ["-fopenmp"], linkopts = if_openmp_enabled([
"-fopenmp",
]),
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
":ops", ":ops",
...@@ -485,12 +487,15 @@ cc_test( ...@@ -485,12 +487,15 @@ cc_test(
]) + if_hexagon_enabled([ ]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON", "-DMACE_ENABLE_HEXAGON",
]), ]),
linkopts = ["-fopenmp"], linkopts = if_openmp_enabled([
"-fopenmp",
]),
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
":ops", ":ops",
"//mace/benchmark:statistics", "//mace/benchmark:statistics",
"//mace/core:test_benchmark_main", "//mace/core:test_benchmark_main",
"//third_party/eigen3", "//third_party/eigen3",
"@gemmlowp",
], ],
) )
...@@ -67,10 +67,15 @@ void ReluBenchmark(int iters, int batch, int channels, int height, int width) { ...@@ -67,10 +67,15 @@ void ReluBenchmark(int iters, int batch, int channels, int height, int width) {
} \ } \
MACE_BENCHMARK(MACE_BM_RELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) MACE_BENCHMARK(MACE_BM_RELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_RELU(N, C, H, W) \ #define MACE_BM_RELU(N, C, H, W) \
MACE_BM_RELU_MACRO(N, C, H, W, float, CPU); \ MACE_BM_RELU_MACRO(N, C, H, W, float, CPU); \
MACE_BM_RELU_MACRO(N, C, H, W, float, GPU); \ MACE_BM_RELU_MACRO(N, C, H, W, float, GPU); \
MACE_BM_RELU_MACRO(N, C, H, W, half, GPU); MACE_BM_RELU_MACRO(N, C, H, W, half, GPU)
#else
#define MACE_BM_RELU(N, C, H, W) \
MACE_BM_RELU_MACRO(N, C, H, W, float, CPU)
#endif
MACE_BM_RELU(1, 1, 512, 512); MACE_BM_RELU(1, 1, 512, 512);
MACE_BM_RELU(1, 3, 128, 128); MACE_BM_RELU(1, 3, 128, 128);
...@@ -123,10 +128,15 @@ void ReluxBenchmark(int iters, int batch, int channels, int height, int width) { ...@@ -123,10 +128,15 @@ void ReluxBenchmark(int iters, int batch, int channels, int height, int width) {
} \ } \
MACE_BENCHMARK(MACE_BM_RELUX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) MACE_BENCHMARK(MACE_BM_RELUX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_RELUX(N, C, H, W) \ #define MACE_BM_RELUX(N, C, H, W) \
MACE_BM_RELUX_MACRO(N, C, H, W, float, CPU); \ MACE_BM_RELUX_MACRO(N, C, H, W, float, CPU); \
MACE_BM_RELUX_MACRO(N, C, H, W, float, GPU); \ MACE_BM_RELUX_MACRO(N, C, H, W, float, GPU); \
MACE_BM_RELUX_MACRO(N, C, H, W, half, GPU); MACE_BM_RELUX_MACRO(N, C, H, W, half, GPU)
#else
#define MACE_BM_RELUX(N, C, H, W) \
MACE_BM_RELUX_MACRO(N, C, H, W, float, CPU)
#endif
MACE_BM_RELUX(1, 1, 512, 512); MACE_BM_RELUX(1, 1, 512, 512);
MACE_BM_RELUX(1, 3, 128, 128); MACE_BM_RELUX(1, 3, 128, 128);
...@@ -182,10 +192,15 @@ void PreluBenchmark(int iters, int batch, int channels, int height, int width) { ...@@ -182,10 +192,15 @@ void PreluBenchmark(int iters, int batch, int channels, int height, int width) {
} \ } \
MACE_BENCHMARK(MACE_BM_PRELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) MACE_BENCHMARK(MACE_BM_PRELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_PRELU(N, C, H, W) \ #define MACE_BM_PRELU(N, C, H, W) \
MACE_BM_PRELU_MACRO(N, C, H, W, float, CPU); \ MACE_BM_PRELU_MACRO(N, C, H, W, float, CPU); \
MACE_BM_PRELU_MACRO(N, C, H, W, float, GPU); \ MACE_BM_PRELU_MACRO(N, C, H, W, float, GPU); \
MACE_BM_PRELU_MACRO(N, C, H, W, half, GPU); MACE_BM_PRELU_MACRO(N, C, H, W, half, GPU)
#else
#define MACE_BM_PRELU(N, C, H, W) \
MACE_BM_PRELU_MACRO(N, C, H, W, float, CPU)
#endif
MACE_BM_PRELU(1, 1, 512, 512); MACE_BM_PRELU(1, 1, 512, 512);
MACE_BM_PRELU(1, 3, 128, 128); MACE_BM_PRELU(1, 3, 128, 128);
...@@ -237,10 +252,15 @@ void TanhBenchmark(int iters, int batch, int channels, int height, int width) { ...@@ -237,10 +252,15 @@ void TanhBenchmark(int iters, int batch, int channels, int height, int width) {
} \ } \
MACE_BENCHMARK(MACE_BM_TANH_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) MACE_BENCHMARK(MACE_BM_TANH_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_TANH(N, C, H, W) \ #define MACE_BM_TANH(N, C, H, W) \
MACE_BM_TANH_MACRO(N, C, H, W, float, CPU); \ MACE_BM_TANH_MACRO(N, C, H, W, float, CPU); \
MACE_BM_TANH_MACRO(N, C, H, W, float, GPU); \ MACE_BM_TANH_MACRO(N, C, H, W, float, GPU); \
MACE_BM_TANH_MACRO(N, C, H, W, half, GPU); MACE_BM_TANH_MACRO(N, C, H, W, half, GPU)
#else
#define MACE_BM_TANH(N, C, H, W) \
MACE_BM_TANH_MACRO(N, C, H, W, float, CPU)
#endif
MACE_BM_TANH(1, 1, 512, 512); MACE_BM_TANH(1, 1, 512, 512);
MACE_BM_TANH(1, 3, 128, 128); MACE_BM_TANH(1, 3, 128, 128);
...@@ -293,10 +313,15 @@ void SigmoidBenchmark( ...@@ -293,10 +313,15 @@ void SigmoidBenchmark(
} \ } \
MACE_BENCHMARK(MACE_BM_SIGMOID_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) MACE_BENCHMARK(MACE_BM_SIGMOID_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_SIGMOID(N, C, H, W) \ #define MACE_BM_SIGMOID(N, C, H, W) \
MACE_BM_SIGMOID_MACRO(N, C, H, W, float, CPU); \ MACE_BM_SIGMOID_MACRO(N, C, H, W, float, CPU); \
MACE_BM_SIGMOID_MACRO(N, C, H, W, float, GPU); \ MACE_BM_SIGMOID_MACRO(N, C, H, W, float, GPU); \
MACE_BM_SIGMOID_MACRO(N, C, H, W, half, GPU); MACE_BM_SIGMOID_MACRO(N, C, H, W, half, GPU)
#else
#define MACE_BM_SIGMOID(N, C, H, W) \
MACE_BM_SIGMOID_MACRO(N, C, H, W, float, CPU)
#endif
MACE_BM_SIGMOID(1, 1, 512, 512); MACE_BM_SIGMOID(1, 1, 512, 512);
MACE_BM_SIGMOID(1, 3, 128, 128); MACE_BM_SIGMOID(1, 3, 128, 128);
......
...@@ -65,10 +65,15 @@ void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) { ...@@ -65,10 +65,15 @@ void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
MACE_BENCHMARK( \ MACE_BENCHMARK( \
MACE_BM_ADDN_##INPUTS##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE) MACE_BM_ADDN_##INPUTS##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_ADDN(INPUTS, N, H, W, C) \ #define MACE_BM_ADDN(INPUTS, N, H, W, C) \
MACE_BM_ADDN_MACRO(INPUTS, N, H, W, C, float, CPU); \ MACE_BM_ADDN_MACRO(INPUTS, N, H, W, C, float, CPU); \
MACE_BM_ADDN_MACRO(INPUTS, N, H, W, C, float, GPU); \ MACE_BM_ADDN_MACRO(INPUTS, N, H, W, C, float, GPU); \
MACE_BM_ADDN_MACRO(INPUTS, N, H, W, C, half, GPU); MACE_BM_ADDN_MACRO(INPUTS, N, H, W, C, half, GPU);
#else
#define MACE_BM_ADDN(INPUTS, N, H, W, C) \
MACE_BM_ADDN_MACRO(INPUTS, N, H, W, C, float, CPU);
#endif
MACE_BM_ADDN(2, 1, 256, 256, 32); MACE_BM_ADDN(2, 1, 256, 256, 32);
MACE_BM_ADDN(2, 1, 128, 128, 32); MACE_BM_ADDN(2, 1, 128, 128, 32);
......
...@@ -80,10 +80,15 @@ void BatchNorm( ...@@ -80,10 +80,15 @@ void BatchNorm(
} \ } \
MACE_BENCHMARK(MACE_BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) MACE_BENCHMARK(MACE_BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_BATCH_NORM(N, C, H, W) \ #define MACE_BM_BATCH_NORM(N, C, H, W) \
MACE_BM_BATCH_NORM_MACRO(N, C, H, W, float, CPU); \ MACE_BM_BATCH_NORM_MACRO(N, C, H, W, float, CPU); \
MACE_BM_BATCH_NORM_MACRO(N, C, H, W, float, GPU); \ MACE_BM_BATCH_NORM_MACRO(N, C, H, W, float, GPU); \
MACE_BM_BATCH_NORM_MACRO(N, C, H, W, half, GPU); MACE_BM_BATCH_NORM_MACRO(N, C, H, W, half, GPU);
#else
#define MACE_BM_BATCH_NORM(N, C, H, W) \
MACE_BM_BATCH_NORM_MACRO(N, C, H, W, float, CPU);
#endif
MACE_BM_BATCH_NORM(1, 1, 512, 512); MACE_BM_BATCH_NORM(1, 1, 512, 512);
MACE_BM_BATCH_NORM(1, 3, 128, 128); MACE_BM_BATCH_NORM(1, 3, 128, 128);
......
...@@ -64,9 +64,14 @@ void BMBatchToSpace( ...@@ -64,9 +64,14 @@ void BMBatchToSpace(
MACE_BENCHMARK( \ MACE_BENCHMARK( \
MACE_BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE) MACE_BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_BATCH_TO_SPACE(N, H, W, C, ARG) \ #define MACE_BM_BATCH_TO_SPACE(N, H, W, C, ARG) \
MACE_BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, float, GPU); \ MACE_BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, float, GPU); \
MACE_BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, float, CPU); MACE_BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, float, CPU);
#else
#define MACE_BM_BATCH_TO_SPACE(N, H, W, C, ARG) \
MACE_BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, float, CPU);
#endif
MACE_BM_BATCH_TO_SPACE(128, 8, 8, 128, 2); MACE_BM_BATCH_TO_SPACE(128, 8, 8, 128, 2);
MACE_BM_BATCH_TO_SPACE(4, 128, 128, 32, 2); MACE_BM_BATCH_TO_SPACE(4, 128, 128, 32, 2);
......
...@@ -100,7 +100,7 @@ class BiasAddOp<DeviceType::GPU, T> : public Operation { ...@@ -100,7 +100,7 @@ class BiasAddOp<DeviceType::GPU, T> : public Operation {
explicit BiasAddOp(OpConstructContext *context) explicit BiasAddOp(OpConstructContext *context)
: Operation(context), : Operation(context),
has_data_format_(Operation::GetOptionalArg<int>("has_data_format", 1)) { has_data_format_(Operation::GetOptionalArg<int>("has_data_format", 1)) {
MemoryType mem_type; MemoryType mem_type = MemoryType::CPU_BUFFER;
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE; mem_type = MemoryType::GPU_IMAGE;
kernel_ = make_unique<opencl::image::BiasAddKernel<T>>(); kernel_ = make_unique<opencl::image::BiasAddKernel<T>>();
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
...@@ -70,10 +69,15 @@ void BiasAdd(int iters, int batch, int channels, int height, int width) { ...@@ -70,10 +69,15 @@ void BiasAdd(int iters, int batch, int channels, int height, int width) {
} \ } \
MACE_BENCHMARK(MACE_BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) MACE_BENCHMARK(MACE_BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_BIAS_ADD(N, C, H, W) \ #define MACE_BM_BIAS_ADD(N, C, H, W) \
MACE_BM_BIAS_ADD_MACRO(N, C, H, W, float, CPU); \ MACE_BM_BIAS_ADD_MACRO(N, C, H, W, float, CPU); \
MACE_BM_BIAS_ADD_MACRO(N, C, H, W, float, GPU); \ MACE_BM_BIAS_ADD_MACRO(N, C, H, W, float, GPU); \
MACE_BM_BIAS_ADD_MACRO(N, C, H, W, half, GPU); MACE_BM_BIAS_ADD_MACRO(N, C, H, W, half, GPU);
#else
#define MACE_BM_BIAS_ADD(N, C, H, W) \
MACE_BM_BIAS_ADD_MACRO(N, C, H, W, float, CPU);
#endif
MACE_BM_BIAS_ADD(1, 1, 512, 512); MACE_BM_BIAS_ADD(1, 1, 512, 512);
MACE_BM_BIAS_ADD(1, 3, 128, 128); MACE_BM_BIAS_ADD(1, 3, 128, 128);
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/ops/opencl/buffer_transformer.h" #include "mace/ops/opencl/buffer_transformer.h"
...@@ -96,3 +98,5 @@ MACE_BM_B2I(256, 32, 3, 3); ...@@ -96,3 +98,5 @@ MACE_BM_B2I(256, 32, 3, 3);
} // namespace test } // namespace test
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_ENABLE_OPENCL
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifdef MACE_ENABLE_OPENCL
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
#include "mace/ops/opencl/buffer_transformer.h" #include "mace/ops/opencl/buffer_transformer.h"
...@@ -242,3 +244,5 @@ TEST(BufferToImageTest, ArgStringHalfToHalfSmall) { ...@@ -242,3 +244,5 @@ TEST(BufferToImageTest, ArgStringHalfToHalfSmall) {
} // namespace test } // namespace test
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_ENABLE_OPENCL
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifdef MACE_ENABLE_OPENCL
#include <cstring> #include <cstring>
#include "gtest/gtest.h" #include "gtest/gtest.h"
...@@ -111,3 +113,6 @@ TEST_F(BufferTransformTest, Argument) { ...@@ -111,3 +113,6 @@ TEST_F(BufferTransformTest, Argument) {
} // namespace test } // namespace test
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_ENABLE_OPENCL
...@@ -67,10 +67,15 @@ void ChannelShuffle( ...@@ -67,10 +67,15 @@ void ChannelShuffle(
MACE_BENCHMARK( \ MACE_BENCHMARK( \
MACE_BM_CHANNEL_SHUFFLE_##N##_##C##_##H##_##W##_##G##_##TYPE##_##DEVICE) MACE_BM_CHANNEL_SHUFFLE_##N##_##C##_##H##_##W##_##G##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_CHANNEL_SHUFFLE(N, C, H, W, G) \ #define MACE_BM_CHANNEL_SHUFFLE(N, C, H, W, G) \
MACE_BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, CPU); \ MACE_BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, CPU); \
MACE_BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, GPU); \ MACE_BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, GPU); \
MACE_BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, half, GPU); MACE_BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, half, GPU);
#else
#define MACE_BM_CHANNEL_SHUFFLE(N, C, H, W, G) \
MACE_BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, CPU);
#endif
MACE_BM_CHANNEL_SHUFFLE(1, 64, 64, 64, 8); MACE_BM_CHANNEL_SHUFFLE(1, 64, 64, 64, 8);
MACE_BM_CHANNEL_SHUFFLE(1, 64, 128, 128, 8); MACE_BM_CHANNEL_SHUFFLE(1, 64, 128, 128, 8);
......
...@@ -63,9 +63,14 @@ void ConcatHelper(int iters, int concat_dim, int dim0, int dim1) { ...@@ -63,9 +63,14 @@ void ConcatHelper(int iters, int concat_dim, int dim0, int dim1) {
} \ } \
MACE_BENCHMARK(MACE_BM_CONCAT_CPU_##AXIS##_##DIM0##_##DIM1##_##TYPE) MACE_BENCHMARK(MACE_BM_CONCAT_CPU_##AXIS##_##DIM0##_##DIM1##_##TYPE)
#ifdef MACE_ENABLE_QUANTIZE
#define MACE_BM_CONCAT_CPU(AXIS, DIM0, DIM1) \ #define MACE_BM_CONCAT_CPU(AXIS, DIM0, DIM1) \
MACE_BM_CONCAT_CPU_MACRO(AXIS, DIM0, DIM1, float); \ MACE_BM_CONCAT_CPU_MACRO(AXIS, DIM0, DIM1, float); \
MACE_BM_CONCAT_CPU_MACRO(AXIS, DIM0, DIM1, uint8_t); \ MACE_BM_CONCAT_CPU_MACRO(AXIS, DIM0, DIM1, uint8_t)
#else
#define MACE_BM_CONCAT_CPU(AXIS, DIM0, DIM1) \
MACE_BM_CONCAT_CPU_MACRO(AXIS, DIM0, DIM1, float)
#endif
MACE_BM_CONCAT_CPU(0, 100, 1000); MACE_BM_CONCAT_CPU(0, 100, 1000);
MACE_BM_CONCAT_CPU(0, 100, 100000); MACE_BM_CONCAT_CPU(0, 100, 100000);
...@@ -73,6 +78,7 @@ MACE_BM_CONCAT_CPU(1, 100, 1000); ...@@ -73,6 +78,7 @@ MACE_BM_CONCAT_CPU(1, 100, 1000);
MACE_BM_CONCAT_CPU(1, 100, 100000); MACE_BM_CONCAT_CPU(1, 100, 100000);
MACE_BM_CONCAT_CPU(1, 1225, 128); MACE_BM_CONCAT_CPU(1, 1225, 128);
#ifdef MACE_ENABLE_OPENCL
namespace { namespace {
template <typename T> template <typename T>
void OpenCLConcatHelper(int iters, void OpenCLConcatHelper(int iters,
...@@ -129,6 +135,8 @@ MACE_BM_CONCAT_OPENCL_MACRO(3, 32, 32, 64, half); ...@@ -129,6 +135,8 @@ MACE_BM_CONCAT_OPENCL_MACRO(3, 32, 32, 64, half);
MACE_BM_CONCAT_OPENCL_MACRO(3, 32, 32, 128, half); MACE_BM_CONCAT_OPENCL_MACRO(3, 32, 32, 128, half);
MACE_BM_CONCAT_OPENCL_MACRO(3, 32, 32, 256, half); MACE_BM_CONCAT_OPENCL_MACRO(3, 32, 32, 256, half);
#endif // MACE_ENABLE_OPENCL
} // namespace test } // namespace test
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
...@@ -79,6 +79,7 @@ void Conv2d(int iters, ...@@ -79,6 +79,7 @@ void Conv2d(int iters,
} }
} }
#ifdef MACE_ENABLE_QUANTIZE
template <> template <>
void Conv2d<CPU, uint8_t>(int iters, void Conv2d<CPU, uint8_t>(int iters,
int batch, int batch,
...@@ -132,6 +133,7 @@ void Conv2d<CPU, uint8_t>(int iters, ...@@ -132,6 +133,7 @@ void Conv2d<CPU, uint8_t>(int iters,
net.Sync(); net.Sync();
} }
} }
#endif
} // namespace } // namespace
...@@ -167,12 +169,25 @@ void Conv2d<CPU, uint8_t>(int iters, ...@@ -167,12 +169,25 @@ void Conv2d<CPU, uint8_t>(int iters,
MACE_BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##D##\ MACE_BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##D##\
DILATION##_##P##_##OC##_##TYPE##_##DEVICE) DILATION##_##P##_##OC##_##TYPE##_##DEVICE)
#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_CONV_2D(N, C, H, W, KH, KW, S, D, P, OC) \ #define MACE_BM_CONV_2D(N, C, H, W, KH, KW, S, D, P, OC) \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, CPU); \ MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, CPU); \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, GPU); \ MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, GPU); \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, half, GPU); \ MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, half, GPU); \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, uint8_t, CPU); MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, uint8_t, CPU)
#elif defined(MACE_ENABLE_OPENCL)
#define MACE_BM_CONV_2D(N, C, H, W, KH, KW, S, D, P, OC) \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, CPU); \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, GPU); \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, half, GPU)
#elif defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_CONV_2D(N, C, H, W, KH, KW, S, D, P, OC) \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, CPU); \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, uint8_t, CPU)
#else
#define MACE_BM_CONV_2D(N, C, H, W, KH, KW, S, D, P, OC) \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, CPU)
#endif
// Filter sizes and data alignments // Filter sizes and data alignments
......
...@@ -25,7 +25,9 @@ namespace test { ...@@ -25,7 +25,9 @@ namespace test {
class Conv2dOpTest : public OpsTestBase { class Conv2dOpTest : public OpsTestBase {
protected: protected:
virtual void SetUp() { virtual void SetUp() {
#ifdef MACE_ENABLE_OPENCL
OpTestContext::Get()->SetOCLImageTestFlag(); OpTestContext::Get()->SetOCLImageTestFlag();
#endif
} }
}; };
......
...@@ -41,10 +41,12 @@ void CropHelper(int iters, ...@@ -41,10 +41,12 @@ void CropHelper(int iters,
auto input_shape1 = TransposeShape<index_t, index_t>(shape1, {0, 3, 1, 2}); auto input_shape1 = TransposeShape<index_t, index_t>(shape1, {0, 3, 1, 2});
net.AddRandomInput<D, float>("Input0", input_shape0); net.AddRandomInput<D, float>("Input0", input_shape0);
net.AddRandomInput<D, float>("Input1", input_shape1); net.AddRandomInput<D, float>("Input1", input_shape1);
#ifdef MACE_ENABLE_OPENCL
} else if (D == DeviceType::GPU) { } else if (D == DeviceType::GPU) {
// Add input data // Add input data
net.AddRandomInput<D, T>("Input0", shape0); net.AddRandomInput<D, T>("Input0", shape0);
net.AddRandomInput<D, T>("Input1", shape1); net.AddRandomInput<D, T>("Input1", shape1);
#endif // MACE_ENABLE_OPENCL
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
...@@ -85,17 +87,21 @@ void CropHelper(int iters, ...@@ -85,17 +87,21 @@ void CropHelper(int iters,
MACE_BENCHMARK(MACE_BM_CROP_##N##_##H##_##W##_##C##_##AXIS##_##OFFSET\ MACE_BENCHMARK(MACE_BM_CROP_##N##_##H##_##W##_##C##_##AXIS##_##OFFSET\
##_##DEVICE##_##TYPE) ##_##DEVICE##_##TYPE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_CROP(N, H, W, C, AXIS, OFFSET) \ #define MACE_BM_CROP(N, H, W, C, AXIS, OFFSET) \
MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, CPU, float); \ MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, CPU, float); \
MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, GPU, float); \ MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, GPU, float); \
MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, GPU, half); MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, GPU, half)
#else
#define MACE_BM_CROP(N, H, W, C, AXIS, OFFSET) \
MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, CPU, float)
#endif // MACE_ENABLE_OPENCL
MACE_BM_CROP(4, 32, 32, 32, 2, 4); MACE_BM_CROP(4, 32, 32, 32, 2, 4);
MACE_BM_CROP(8, 32, 32, 64, 1, 0); MACE_BM_CROP(8, 32, 32, 64, 1, 0);
MACE_BM_CROP(8, 32, 32, 128, 0, 0); MACE_BM_CROP(8, 32, 32, 128, 0, 0);
MACE_BM_CROP(8, 32, 32, 256, 2, 4); MACE_BM_CROP(8, 32, 32, 256, 2, 4);
} // namespace test } // namespace test
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
...@@ -89,8 +89,6 @@ static void Deconv2d(int iters, ...@@ -89,8 +89,6 @@ static void Deconv2d(int iters,
##OW##_##P##_##OC##_##TYPE##_##DEVICE( \ ##OW##_##P##_##OC##_##TYPE##_##DEVICE( \
int iters) { \ int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \ const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
int64_t oh = OH; \
int64_t ow = OW; \
const int64_t macs = \ const int64_t macs = \
static_cast<int64_t>(iters) * mace::benchmark::StatMACs( \ static_cast<int64_t>(iters) * mace::benchmark::StatMACs( \
"Deconv2D", {OC, C, KH, KW}, {N, OH, OW, OC}); \ "Deconv2D", {OC, C, KH, KW}, {N, OH, OW, OC}); \
...@@ -104,10 +102,15 @@ static void Deconv2d(int iters, ...@@ -104,10 +102,15 @@ static void Deconv2d(int iters,
##OW##_##P##_##OC##_##TYPE##_##DEVICE) ##OW##_##P##_##OC##_##TYPE##_##DEVICE)
// TODO(liutuo): add cpu benchmark when optimized. // TODO(liutuo): add cpu benchmark when optimized.
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_DECONV_2D(N, C, H, W, KH, KW, S, OH, OW, P, OC) \ #define MACE_BM_DECONV_2D(N, C, H, W, KH, KW, S, OH, OW, P, OC) \
MACE_BM_DECONV_2D_MACRO(N, C, H, W, KH, KW, S, OH, OW, P, OC, float, CPU); \ MACE_BM_DECONV_2D_MACRO(N, C, H, W, KH, KW, S, OH, OW, P, OC, float, CPU); \
MACE_BM_DECONV_2D_MACRO(N, C, H, W, KH, KW, S, OH, OW, P, OC, float, GPU); \ MACE_BM_DECONV_2D_MACRO(N, C, H, W, KH, KW, S, OH, OW, P, OC, float, GPU); \
MACE_BM_DECONV_2D_MACRO(N, C, H, W, KH, KW, S, OH, OW, P, OC, half, GPU); MACE_BM_DECONV_2D_MACRO(N, C, H, W, KH, KW, S, OH, OW, P, OC, half, GPU)
#else
#define MACE_BM_DECONV_2D(N, C, H, W, KH, KW, S, OH, OW, P, OC) \
MACE_BM_DECONV_2D_MACRO(N, C, H, W, KH, KW, S, OH, OW, P, OC, float, CPU)
#endif
MACE_BM_DECONV_2D(1, 32, 60, 60, 1, 1, 1, 60, 60, VALID, 128); MACE_BM_DECONV_2D(1, 32, 60, 60, 1, 1, 1, 60, 60, VALID, 128);
......
...@@ -68,10 +68,15 @@ void DepthToSpace( ...@@ -68,10 +68,15 @@ void DepthToSpace(
MACE_BENCHMARK( \ MACE_BENCHMARK( \
MACE_BM_DEPTH_TO_SPACE_##N##_##C##_##H##_##W##_##G##_##TYPE##_##DEVICE) MACE_BM_DEPTH_TO_SPACE_##N##_##C##_##H##_##W##_##G##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_DEPTH_TO_SPACE(N, C, H, W, G) \ #define MACE_BM_DEPTH_TO_SPACE(N, C, H, W, G) \
MACE_BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, float, CPU); \ MACE_BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, float, CPU); \
MACE_BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, float, GPU); \ MACE_BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, float, GPU); \
MACE_BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, half, GPU); MACE_BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, half, GPU);
#else
#define MACE_BM_DEPTH_TO_SPACE(N, C, H, W, G) \
MACE_BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, float, CPU)
#endif
MACE_BM_DEPTH_TO_SPACE(1, 64, 64, 64, 4); MACE_BM_DEPTH_TO_SPACE(1, 64, 64, 64, 4);
MACE_BM_DEPTH_TO_SPACE(1, 64, 128, 128, 4); MACE_BM_DEPTH_TO_SPACE(1, 64, 128, 128, 4);
......
...@@ -128,11 +128,25 @@ void DepthwiseConv2d(int iters, ...@@ -128,11 +128,25 @@ void DepthwiseConv2d(int iters,
MACE_BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE\ MACE_BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE\
##_##P##_##M##_##TYPE##_##DEVICE) ##_##P##_##M##_##TYPE##_##DEVICE)
#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, M) \ #define MACE_BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, M) \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, CPU); \ MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, CPU); \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, GPU); \ MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, GPU); \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, half, GPU); \ MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, half, GPU); \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, uint8_t, CPU); MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, uint8_t, CPU)
#elif defined(MACE_ENABLE_OPENCL)
#define MACE_BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, M) \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, CPU); \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, GPU); \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, half, GPU)
#elif defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, M) \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, CPU); \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, uint8_t, CPU)
#else
#define MACE_BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, M) \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, CPU)
#endif
MACE_BM_DEPTHWISE_CONV_2D(1, 32, 112, 112, 3, 3, 1, SAME, 1); MACE_BM_DEPTHWISE_CONV_2D(1, 32, 112, 112, 3, 3, 1, SAME, 1);
MACE_BM_DEPTHWISE_CONV_2D(1, 32, 56, 56, 3, 3, 2, VALID, 1); MACE_BM_DEPTHWISE_CONV_2D(1, 32, 56, 56, 3, 3, 2, VALID, 1);
......
...@@ -93,10 +93,15 @@ static void DepthwiseDeconv2d(int iters, ...@@ -93,10 +93,15 @@ static void DepthwiseDeconv2d(int iters,
MACE_BM_DEPTHWISE_DECONV2D_##N##_##C##_##H##_##W##_##KH##_##KW##_##S##_##P\ MACE_BM_DEPTHWISE_DECONV2D_##N##_##C##_##H##_##W##_##KH##_##KW##_##S##_##P\
##_##TYPE##_##DEVICE) ##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_DEPTHWISE_DECONV2D(N, C, H, W, KH, KW, S, P) \ #define MACE_BM_DEPTHWISE_DECONV2D(N, C, H, W, KH, KW, S, P) \
MACE_BM_DEPTHWISE_DECONV2D_MACRO(N, C, H, W, KH, KW, S, P, float, CPU); \ MACE_BM_DEPTHWISE_DECONV2D_MACRO(N, C, H, W, KH, KW, S, P, float, CPU); \
MACE_BM_DEPTHWISE_DECONV2D_MACRO(N, C, H, W, KH, KW, S, P, float, GPU); \ MACE_BM_DEPTHWISE_DECONV2D_MACRO(N, C, H, W, KH, KW, S, P, float, GPU); \
MACE_BM_DEPTHWISE_DECONV2D_MACRO(N, C, H, W, KH, KW, S, P, half, GPU); MACE_BM_DEPTHWISE_DECONV2D_MACRO(N, C, H, W, KH, KW, S, P, half, GPU);
#else
#define MACE_BM_DEPTHWISE_DECONV2D(N, C, H, W, KH, KW, S, P) \
MACE_BM_DEPTHWISE_DECONV2D_MACRO(N, C, H, W, KH, KW, S, P, float, CPU)
#endif
MACE_BM_DEPTHWISE_DECONV2D(1, 128, 15, 15, 1, 1, 1, 0); MACE_BM_DEPTHWISE_DECONV2D(1, 128, 15, 15, 1, 1, 1, 0);
MACE_BM_DEPTHWISE_DECONV2D(1, 32, 60, 60, 1, 1, 1, 0); MACE_BM_DEPTHWISE_DECONV2D(1, 32, 60, 60, 1, 1, 1, 0);
......
...@@ -80,10 +80,15 @@ void EltwiseBenchmark( ...@@ -80,10 +80,15 @@ void EltwiseBenchmark(
MACE_BENCHMARK( \ MACE_BENCHMARK( \
MACE_BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE) MACE_BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_ELTWISE(ELT_TYPE, N, H, W, C) \ #define MACE_BM_ELTWISE(ELT_TYPE, N, H, W, C) \
MACE_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, CPU); \ MACE_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, CPU); \
MACE_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, GPU); \ MACE_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, GPU); \
MACE_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, half, GPU); MACE_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, half, GPU)
#else
#define MACE_BM_ELTWISE(ELT_TYPE, N, H, W, C) \
MACE_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, CPU)
#endif
MACE_BM_ELTWISE(2, 1, 128, 128, 32); MACE_BM_ELTWISE(2, 1, 128, 128, 32);
MACE_BM_ELTWISE(2, 1, 240, 240, 256); MACE_BM_ELTWISE(2, 1, 240, 240, 256);
...@@ -93,8 +98,10 @@ MACE_BM_ELTWISE(0, 1, 240, 240, 256); ...@@ -93,8 +98,10 @@ MACE_BM_ELTWISE(0, 1, 240, 240, 256);
MACE_BM_ELTWISE(5, 1, 128, 128, 32); MACE_BM_ELTWISE(5, 1, 128, 128, 32);
MACE_BM_ELTWISE(5, 1, 240, 240, 256); MACE_BM_ELTWISE(5, 1, 240, 240, 256);
#ifdef MACE_ENABLE_QUANTIZE
MACE_BM_ELTWISE_MACRO(0, 1, 128, 128, 32, uint8_t, CPU); MACE_BM_ELTWISE_MACRO(0, 1, 128, 128, 32, uint8_t, CPU);
MACE_BM_ELTWISE_MACRO(1, 1, 128, 128, 32, uint8_t, CPU); MACE_BM_ELTWISE_MACRO(1, 1, 128, 128, 32, uint8_t, CPU);
#endif
} // namespace test } // namespace test
} // namespace ops } // namespace ops
......
...@@ -185,7 +185,7 @@ class FullyConnectedOp<DeviceType::GPU, T> : public FullyConnectedOpBase { ...@@ -185,7 +185,7 @@ class FullyConnectedOp<DeviceType::GPU, T> : public FullyConnectedOpBase {
public: public:
explicit FullyConnectedOp(OpConstructContext *context) explicit FullyConnectedOp(OpConstructContext *context)
: FullyConnectedOpBase(context) { : FullyConnectedOpBase(context) {
MemoryType mem_type; MemoryType mem_type = MemoryType::CPU_BUFFER;
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE; mem_type = MemoryType::GPU_IMAGE;
kernel_ = make_unique<opencl::image::FullyConnectedKernel<T>>(); kernel_ = make_unique<opencl::image::FullyConnectedKernel<T>>();
......
...@@ -41,13 +41,15 @@ void FCBenchmark( ...@@ -41,13 +41,15 @@ void FCBenchmark(
{out_channel, channel, height, width}, true); {out_channel, channel, height, width}, true);
net.AddRandomInput<D, float>("Bias", {out_channel}, true); net.AddRandomInput<D, float>("Bias", {out_channel}, true);
OpenCLBufferType weight_type = OpenCLBufferType::WEIGHT_WIDTH;
OpDefBuilder("FullyConnected", "FullyConnectedTest") OpDefBuilder("FullyConnected", "FullyConnectedTest")
.Input("Input") .Input("Input")
.Input("Weight") .Input("Weight")
.Input("Bias") .Input("Bias")
.Output("Output") .Output("Output")
.AddIntArg("weight_type", static_cast<int>(weight_type)) #ifdef MACE_ENABLE_OPENCL
.AddIntArg("weight_type",
static_cast<int>(OpenCLBufferType::WEIGHT_WIDTH))
#endif
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
...@@ -64,6 +66,7 @@ void FCBenchmark( ...@@ -64,6 +66,7 @@ void FCBenchmark(
net.Sync(); net.Sync();
} }
#ifdef MACE_ENABLE_QUANTIZE
template <> template <>
void FCBenchmark<CPU, uint8_t>( void FCBenchmark<CPU, uint8_t>(
int iters, int batch, int height, int width, int channel, int out_channel) { int iters, int batch, int height, int width, int channel, int out_channel) {
...@@ -100,6 +103,8 @@ void FCBenchmark<CPU, uint8_t>( ...@@ -100,6 +103,8 @@ void FCBenchmark<CPU, uint8_t>(
net.Run(); net.Run();
} }
} }
#endif // MACE_ENABLE_QUANTIZE
} // namespace } // namespace
#define MACE_BM_FC_MACRO(N, H, W, C, OC, TYPE, DEVICE) \ #define MACE_BM_FC_MACRO(N, H, W, C, OC, TYPE, DEVICE) \
...@@ -116,11 +121,25 @@ void FCBenchmark<CPU, uint8_t>( ...@@ -116,11 +121,25 @@ void FCBenchmark<CPU, uint8_t>(
} \ } \
MACE_BENCHMARK(MACE_BM_FC_##N##_##H##_##W##_##C##_##OC##_##TYPE##_##DEVICE) MACE_BENCHMARK(MACE_BM_FC_##N##_##H##_##W##_##C##_##OC##_##TYPE##_##DEVICE)
#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_FC(N, H, W, C, OC) \ #define MACE_BM_FC(N, H, W, C, OC) \
MACE_BM_FC_MACRO(N, H, W, C, OC, float, CPU); \ MACE_BM_FC_MACRO(N, H, W, C, OC, float, CPU); \
MACE_BM_FC_MACRO(N, H, W, C, OC, float, GPU); \ MACE_BM_FC_MACRO(N, H, W, C, OC, float, GPU); \
MACE_BM_FC_MACRO(N, H, W, C, OC, half, GPU); \ MACE_BM_FC_MACRO(N, H, W, C, OC, half, GPU); \
MACE_BM_FC_MACRO(N, H, W, C, OC, uint8_t, CPU); MACE_BM_FC_MACRO(N, H, W, C, OC, uint8_t, CPU)
#elif defined(MACE_ENABLE_OPENCL)
#define MACE_BM_FC(N, H, W, C, OC) \
MACE_BM_FC_MACRO(N, H, W, C, OC, float, CPU); \
MACE_BM_FC_MACRO(N, H, W, C, OC, float, GPU); \
MACE_BM_FC_MACRO(N, H, W, C, OC, half, GPU)
#elif defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_FC(N, H, W, C, OC) \
MACE_BM_FC_MACRO(N, H, W, C, OC, float, CPU); \
MACE_BM_FC_MACRO(N, H, W, C, OC, uint8_t, CPU)
#else
#define MACE_BM_FC(N, H, W, C, OC) \
MACE_BM_FC_MACRO(N, H, W, C, OC, float, CPU)
#endif
MACE_BM_FC(1, 16, 16, 32, 32); MACE_BM_FC(1, 16, 16, 32, 32);
MACE_BM_FC(1, 8, 8, 32, 1000); MACE_BM_FC(1, 8, 8, 32, 1000);
......
...@@ -90,10 +90,15 @@ void LSTMCell(int iters, int batch, int input_size, int hidden_units) { ...@@ -90,10 +90,15 @@ void LSTMCell(int iters, int batch, int input_size, int hidden_units) {
MACE_BENCHMARK( \ MACE_BENCHMARK( \
MACE_BM_LSTMCELL_##N##_##INPUT_SIZE##_##HIDDEN_UNITS##_##TYPE##_##DEVICE) MACE_BM_LSTMCELL_##N##_##INPUT_SIZE##_##HIDDEN_UNITS##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_LSTMCELL(N, INPUT_SIZE, HIDDEN_UNITS) \ #define MACE_BM_LSTMCELL(N, INPUT_SIZE, HIDDEN_UNITS) \
MACE_BM_LSTMCELL_MACRO(N, INPUT_SIZE, HIDDEN_UNITS, float, CPU); \ MACE_BM_LSTMCELL_MACRO(N, INPUT_SIZE, HIDDEN_UNITS, float, CPU); \
MACE_BM_LSTMCELL_MACRO(N, INPUT_SIZE, HIDDEN_UNITS, float, GPU); \ MACE_BM_LSTMCELL_MACRO(N, INPUT_SIZE, HIDDEN_UNITS, float, GPU); \
MACE_BM_LSTMCELL_MACRO(N, INPUT_SIZE, HIDDEN_UNITS, half, GPU); MACE_BM_LSTMCELL_MACRO(N, INPUT_SIZE, HIDDEN_UNITS, half, GPU)
#else
#define MACE_BM_LSTMCELL(N, INPUT_SIZE, HIDDEN_UNITS) \
MACE_BM_LSTMCELL_MACRO(N, INPUT_SIZE, HIDDEN_UNITS, float, CPU)
#endif
MACE_BM_LSTMCELL(1, 64, 256); MACE_BM_LSTMCELL(1, 64, 256);
MACE_BM_LSTMCELL(30, 64, 256); MACE_BM_LSTMCELL(30, 64, 256);
......
...@@ -106,6 +106,7 @@ void MatmulBenchmark_Eigen(int iters, int m, int k, int n) { ...@@ -106,6 +106,7 @@ void MatmulBenchmark_Eigen(int iters, int m, int k, int n) {
} }
} }
#ifdef MACE_ENABLE_QUANTIZE
void MatmulBenchmark_gemmlowp_uint8(int iters, int rows, int depth, int cols) { void MatmulBenchmark_gemmlowp_uint8(int iters, int rows, int depth, int cols) {
mace::testing::StopTiming(); mace::testing::StopTiming();
...@@ -181,6 +182,7 @@ void MatmulBenchmark_gemmlowp_int32(int iters, int rows, int depth, int cols) { ...@@ -181,6 +182,7 @@ void MatmulBenchmark_gemmlowp_int32(int iters, int rows, int depth, int cols) {
-128, output_pipeline); -128, output_pipeline);
} }
} }
#endif
} // namespace } // namespace
...@@ -195,10 +197,16 @@ void MatmulBenchmark_gemmlowp_int32(int iters, int rows, int depth, int cols) { ...@@ -195,10 +197,16 @@ void MatmulBenchmark_gemmlowp_int32(int iters, int rows, int depth, int cols) {
} \ } \
MACE_BENCHMARK(MACE_BM_MATMUL_##M##_##K##_##N##_##FUNC) MACE_BENCHMARK(MACE_BM_MATMUL_##M##_##K##_##N##_##FUNC)
#ifdef MACE_ENABLE_QUANTIZE
#define MACE_BM_MATMUL(M, K, N) \ #define MACE_BM_MATMUL(M, K, N) \
MACE_BM_MATMUL_FUNC(M, K, N, Eigen, float); \ MACE_BM_MATMUL_FUNC(M, K, N, Eigen, float); \
MACE_BM_MATMUL_FUNC(M, K, N, gemmlowp_uint8, uint8_t); \ MACE_BM_MATMUL_FUNC(M, K, N, gemmlowp_uint8, uint8_t); \
MACE_BM_MATMUL_FUNC(M, K, N, gemmlowp_int32, uint8_t); MACE_BM_MATMUL_FUNC(M, K, N, gemmlowp_int32, uint8_t);
#else
#define MACE_BM_MATMUL(M, K, N) \
MACE_BM_MATMUL_FUNC(M, K, N, Eigen, float)
#endif
// Embedding size 384 // Embedding size 384
MACE_BM_MATMUL(7, 384, 384); MACE_BM_MATMUL(7, 384, 384);
...@@ -247,7 +255,7 @@ MACE_BM_MATMUL(512, 512, 196); ...@@ -247,7 +255,7 @@ MACE_BM_MATMUL(512, 512, 196);
MACE_BM_MATMUL(1024, 1024, 49); MACE_BM_MATMUL(1024, 1024, 49);
namespace { namespace {
template <DeviceType D, typename T> template<DeviceType D, typename T>
void MatMulBenchmark( void MatMulBenchmark(
int iters, int batch, int height, int channels, int out_width) { int iters, int batch, int height, int channels, int out_width) {
mace::testing::StopTiming(); mace::testing::StopTiming();
...@@ -289,7 +297,7 @@ void MatMulBenchmark( ...@@ -289,7 +297,7 @@ void MatMulBenchmark(
net.Sync(); net.Sync();
} }
template <DeviceType D, typename T> template<DeviceType D, typename T>
void MatMulTransposeBenchmark( void MatMulTransposeBenchmark(
int iters, int batch, int height, int channels, int out_width) { int iters, int batch, int height, int channels, int out_width) {
mace::testing::StopTiming(); mace::testing::StopTiming();
...@@ -349,9 +357,14 @@ void MatMulTransposeBenchmark( ...@@ -349,9 +357,14 @@ void MatMulTransposeBenchmark(
} \ } \
MACE_BENCHMARK(MACE_BM_MATMUL_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE) MACE_BENCHMARK(MACE_BM_MATMUL_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_QUANTIZE
#define MACE_BM_MATMUL_OP(N, H, C, W) \ #define MACE_BM_MATMUL_OP(N, H, C, W) \
MACE_BM_MATMUL_MACRO(N, H, C, W, float, CPU); \ MACE_BM_MATMUL_MACRO(N, H, C, W, float, CPU); \
MACE_BM_MATMUL_MACRO(N, H, C, W, uint8_t, CPU); MACE_BM_MATMUL_MACRO(N, H, C, W, uint8_t, CPU)
#else
#define MACE_BM_MATMUL_OP(N, H, C, W) \
MACE_BM_MATMUL_MACRO(N, H, C, W, float, CPU)
#endif
#define MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, TYPE, DEVICE) \ #define MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, TYPE, DEVICE) \
static void MACE_BM_MATMUL_##T_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE( \ static void MACE_BM_MATMUL_##T_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE( \
...@@ -365,9 +378,14 @@ void MatMulTransposeBenchmark( ...@@ -365,9 +378,14 @@ void MatMulTransposeBenchmark(
} \ } \
MACE_BENCHMARK(MACE_BM_MATMUL_##T_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE) MACE_BENCHMARK(MACE_BM_MATMUL_##T_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_QUANTIZE
#define MACE_BM_MATMUL_TRANPOSE(N, H, C, W) \ #define MACE_BM_MATMUL_TRANPOSE(N, H, C, W) \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float, CPU); \ MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float, CPU); \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, uint8_t, CPU); MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, uint8_t, CPU)
#else
#define MACE_BM_MATMUL_TRANPOSE(N, H, C, W) \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float, CPU)
#endif
MACE_BM_MATMUL_OP(1, 30000, 256, 1); MACE_BM_MATMUL_OP(1, 30000, 256, 1);
MACE_BM_MATMUL_OP(1, 128, 256, 128); MACE_BM_MATMUL_OP(1, 128, 256, 128);
......
...@@ -20,7 +20,6 @@ namespace mace { ...@@ -20,7 +20,6 @@ namespace mace {
namespace ops { namespace ops {
namespace test { namespace test {
OpDefBuilder::OpDefBuilder(const char *type, const std::string &name) { OpDefBuilder::OpDefBuilder(const char *type, const std::string &name) {
op_def_.set_type(type); op_def_.set_type(type);
op_def_.set_name(name); op_def_.set_name(name);
...@@ -102,11 +101,13 @@ void OpDefBuilder::Finalize(OperatorDef *op_def) const { ...@@ -102,11 +101,13 @@ void OpDefBuilder::Finalize(OperatorDef *op_def) const {
} }
namespace { namespace {
#ifdef MACE_ENABLE_OPENCL
std::string GetStoragePathFromEnv() { std::string GetStoragePathFromEnv() {
char *storage_path_str = getenv("MACE_INTERNAL_STORAGE_PATH"); char *storage_path_str = getenv("MACE_INTERNAL_STORAGE_PATH");
if (storage_path_str == nullptr) return ""; if (storage_path_str == nullptr) return "";
return storage_path_str; return storage_path_str;
} }
#endif
} // namespace } // namespace
OpTestContext *OpTestContext::Get(int num_threads, OpTestContext *OpTestContext::Get(int num_threads,
...@@ -120,27 +121,35 @@ OpTestContext *OpTestContext::Get(int num_threads, ...@@ -120,27 +121,35 @@ OpTestContext *OpTestContext::Get(int num_threads,
OpTestContext::OpTestContext(int num_threads, OpTestContext::OpTestContext(int num_threads,
CPUAffinityPolicy cpu_affinity_policy, CPUAffinityPolicy cpu_affinity_policy,
#ifdef MACE_ENABLE_OPENCL
bool use_gemmlowp) bool use_gemmlowp)
: gpu_context_(std::make_shared<GPUContext>(GetStoragePathFromEnv())), : gpu_context_(std::make_shared<GPUContext>(GetStoragePathFromEnv())),
opencl_mem_types_({MemoryType::GPU_IMAGE}) { opencl_mem_types_({MemoryType::GPU_IMAGE}) {
#else
bool use_gemmlowp) {
#endif
device_map_[DeviceType::CPU] = make_unique<CPUDevice>( device_map_[DeviceType::CPU] = make_unique<CPUDevice>(
num_threads, cpu_affinity_policy, use_gemmlowp); num_threads, cpu_affinity_policy, use_gemmlowp);
#ifdef MACE_ENABLE_OPENCL
device_map_[DeviceType::GPU] = make_unique<GPUDevice>( device_map_[DeviceType::GPU] = make_unique<GPUDevice>(
gpu_context_->opencl_tuner(), gpu_context_->opencl_tuner(),
gpu_context_->opencl_cache_storage(), gpu_context_->opencl_cache_storage(),
GPUPriorityHint::PRIORITY_NORMAL, GPUPriorityHint::PRIORITY_NORMAL,
GPUPerfHint::PERF_HIGH); GPUPerfHint::PERF_HIGH);
} #endif // MACE_ENABLE_OPENCL
std::shared_ptr<GPUContext> OpTestContext::gpu_context() const {
return gpu_context_;
} }
Device *OpTestContext::GetDevice(DeviceType device_type) { Device *OpTestContext::GetDevice(DeviceType device_type) {
return device_map_[device_type].get(); return device_map_[device_type].get();
} }
#ifdef MACE_ENABLE_OPENCL
std::shared_ptr<GPUContext> OpTestContext::gpu_context() const {
return gpu_context_;
}
std::vector<MemoryType> OpTestContext::opencl_mem_types() { std::vector<MemoryType> OpTestContext::opencl_mem_types() {
return opencl_mem_types_; return opencl_mem_types_;
} }
...@@ -156,6 +165,7 @@ void OpTestContext::SetOCLImageTestFlag() { ...@@ -156,6 +165,7 @@ void OpTestContext::SetOCLImageTestFlag() {
void OpTestContext::SetOCLImageAndBufferTestFlag() { void OpTestContext::SetOCLImageAndBufferTestFlag() {
opencl_mem_types_ = {MemoryType::GPU_IMAGE, MemoryType::GPU_BUFFER}; opencl_mem_types_ = {MemoryType::GPU_IMAGE, MemoryType::GPU_BUFFER};
} }
#endif // MACE_ENABLE_OPENCL
bool OpsTestNet::Setup(mace::DeviceType device) { bool OpsTestNet::Setup(mace::DeviceType device) {
NetDef net_def; NetDef net_def;
...@@ -227,6 +237,7 @@ MaceStatus OpsTestNet::Run() { ...@@ -227,6 +237,7 @@ MaceStatus OpsTestNet::Run() {
MaceStatus OpsTestNet::RunOp(mace::DeviceType device) { MaceStatus OpsTestNet::RunOp(mace::DeviceType device) {
if (device == DeviceType::GPU) { if (device == DeviceType::GPU) {
#ifdef MACE_ENABLE_OPENCL
auto opencl_mem_types = OpTestContext::Get()->opencl_mem_types(); auto opencl_mem_types = OpTestContext::Get()->opencl_mem_types();
for (auto type : opencl_mem_types) { for (auto type : opencl_mem_types) {
OpTestContext::Get()->GetDevice(device) OpTestContext::Get()->GetDevice(device)
...@@ -235,6 +246,9 @@ MaceStatus OpsTestNet::RunOp(mace::DeviceType device) { ...@@ -235,6 +246,9 @@ MaceStatus OpsTestNet::RunOp(mace::DeviceType device) {
MACE_RETURN_IF_ERROR(Run()); MACE_RETURN_IF_ERROR(Run());
} }
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
#else
return MaceStatus::MACE_UNSUPPORTED;
#endif
} else { } else {
Setup(device); Setup(device);
return Run(); return Run();
......
...@@ -28,8 +28,6 @@ ...@@ -28,8 +28,6 @@
#include "gtest/gtest.h" #include "gtest/gtest.h"
#include "mace/core/net.h" #include "mace/core/net.h"
#include "mace/core/device_context.h" #include "mace/core/device_context.h"
#include "mace/core/runtime/opencl/gpu_device.h"
#include "mace/core/runtime/opencl/opencl_util.h"
#include "mace/core/tensor.h" #include "mace/core/tensor.h"
#include "mace/core/workspace.h" #include "mace/core/workspace.h"
#include "mace/ops/ops_registry.h" #include "mace/ops/ops_registry.h"
...@@ -39,6 +37,11 @@ ...@@ -39,6 +37,11 @@
#include "mace/utils/quantize.h" #include "mace/utils/quantize.h"
#include "mace/ops/testing/test_utils.h" #include "mace/ops/testing/test_utils.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/gpu_device.h"
#include "mace/core/runtime/opencl/opencl_util.h"
#endif
namespace mace { namespace mace {
namespace ops { namespace ops {
namespace test { namespace test {
...@@ -78,21 +81,28 @@ class OpTestContext { ...@@ -78,21 +81,28 @@ class OpTestContext {
int num_threads = -1, int num_threads = -1,
CPUAffinityPolicy cpu_affinity_policy = AFFINITY_BIG_ONLY, CPUAffinityPolicy cpu_affinity_policy = AFFINITY_BIG_ONLY,
bool use_gemmlowp = true); bool use_gemmlowp = true);
std::shared_ptr<GPUContext> gpu_context() const;
Device *GetDevice(DeviceType device_type); Device *GetDevice(DeviceType device_type);
#ifdef MACE_ENABLE_OPENCL
std::shared_ptr<GPUContext> gpu_context() const;
std::vector<MemoryType> opencl_mem_types(); std::vector<MemoryType> opencl_mem_types();
void SetOCLBufferTestFlag(); void SetOCLBufferTestFlag();
void SetOCLImageTestFlag(); void SetOCLImageTestFlag();
void SetOCLImageAndBufferTestFlag(); void SetOCLImageAndBufferTestFlag();
#endif
private: private:
OpTestContext(int num_threads, OpTestContext(int num_threads,
CPUAffinityPolicy cpu_affinity_policy, CPUAffinityPolicy cpu_affinity_policy,
bool use_gemmlowp); bool use_gemmlowp);
MACE_DISABLE_COPY_AND_ASSIGN(OpTestContext); MACE_DISABLE_COPY_AND_ASSIGN(OpTestContext);
std::map<DeviceType, std::unique_ptr<Device>> device_map_;
#ifdef MACE_ENABLE_OPENCL
std::shared_ptr<GPUContext> gpu_context_; std::shared_ptr<GPUContext> gpu_context_;
std::vector<MemoryType> opencl_mem_types_; std::vector<MemoryType> opencl_mem_types_;
std::map<DeviceType, std::unique_ptr<Device>> device_map_; #endif
}; };
class OpsTestNet { class OpsTestNet {
...@@ -420,7 +430,9 @@ class OpsTestBase : public ::testing::Test { ...@@ -420,7 +430,9 @@ class OpsTestBase : public ::testing::Test {
} }
virtual void TearDown() { virtual void TearDown() {
#ifdef MACE_ENABLE_OPENCL
OpTestContext::Get()->SetOCLImageTestFlag(); OpTestContext::Get()->SetOCLImageTestFlag();
#endif
} }
}; };
......
...@@ -71,10 +71,15 @@ void Pad(int iters, int batch, int height, ...@@ -71,10 +71,15 @@ void Pad(int iters, int batch, int height,
MACE_BENCHMARK(MACE_BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##MODE##_##TYPE \ MACE_BENCHMARK(MACE_BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##MODE##_##TYPE \
##_##DEVICE) ##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_PAD_MODE(N, H, W, C, PAD, MODE) \ #define MACE_BM_PAD_MODE(N, H, W, C, PAD, MODE) \
MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, float, CPU); \ MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, float, CPU); \
MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, float, GPU); \ MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, float, GPU); \
MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, half, GPU); MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, half, GPU)
#else
#define MACE_BM_PAD_MODE(N, H, W, C, PAD, MODE) \
MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, float, CPU)
#endif
#define MACE_BM_PAD(N, H, W, C, PAD) \ #define MACE_BM_PAD(N, H, W, C, PAD) \
MACE_BM_PAD_MODE(N, H, W, C, PAD, CONSTANT); \ MACE_BM_PAD_MODE(N, H, W, C, PAD, CONSTANT); \
......
...@@ -89,11 +89,25 @@ void Pooling(int iters, ...@@ -89,11 +89,25 @@ void Pooling(int iters,
MACE_BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_\ MACE_BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_\
##TYPE##_##DEVICE) ##TYPE##_##DEVICE)
#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_POOLING(N, C, H, W, K, S, PA, PO) \ #define MACE_BM_POOLING(N, C, H, W, K, S, PA, PO) \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, CPU); \ MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, CPU); \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, GPU); \ MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, GPU); \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, half, GPU); \ MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, half, GPU); \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, uint8_t, CPU); MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, uint8_t, CPU)
#elif defined(MACE_ENABLE_OPENCL)
#define MACE_BM_POOLING(N, C, H, W, K, S, PA, PO) \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, CPU); \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, GPU); \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, half, GPU)
#elif defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_POOLING(N, C, H, W, K, S, PA, PO) \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, CPU); \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, uint8_t, CPU)
#else
#define MACE_BM_POOLING(N, C, H, W, K, S, PA, PO) \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, CPU)
#endif
MACE_BM_POOLING(1, 3, 129, 129, 2, 2, SAME, MAX); MACE_BM_POOLING(1, 3, 129, 129, 2, 2, SAME, MAX);
......
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifdef MACE_ENABLE_QUANTIZE
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
...@@ -113,3 +115,5 @@ MACE_BM_DEQUANTIZE(1470000); ...@@ -113,3 +115,5 @@ MACE_BM_DEQUANTIZE(1470000);
} // namespace test } // namespace test
} // namespace ops } // namespace ops
} // namespace mace } // namespace mace
#endif // MACE_ENABLE_QUANTIZE
...@@ -67,10 +67,15 @@ void Reduce(int iters, int batch, int channels, ...@@ -67,10 +67,15 @@ void Reduce(int iters, int batch, int channels,
MACE_BENCHMARK( \ MACE_BENCHMARK( \
MACE_BM_REDUCE_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) MACE_BM_REDUCE_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_REDUCE(N, C, H, W) \ #define MACE_BM_REDUCE(N, C, H, W) \
MACE_BM_REDUCE_MACRO(N, C, H, W, float, GPU); \ MACE_BM_REDUCE_MACRO(N, C, H, W, float, GPU); \
MACE_BM_REDUCE_MACRO(N, C, H, W, half, GPU); \ MACE_BM_REDUCE_MACRO(N, C, H, W, half, GPU); \
MACE_BM_REDUCE_MACRO(N, C, H, W, float, CPU); MACE_BM_REDUCE_MACRO(N, C, H, W, float, CPU)
#else
#define MACE_BM_REDUCE(N, C, H, W) \
MACE_BM_REDUCE_MACRO(N, C, H, W, float, CPU)
#endif
MACE_BM_REDUCE(1, 1, 512, 512); MACE_BM_REDUCE(1, 1, 512, 512);
......
...@@ -82,10 +82,15 @@ void ResizeBicubicBenchmark(int iters, ...@@ -82,10 +82,15 @@ void ResizeBicubicBenchmark(int iters,
MACE_BM_RESIZE_BICUBIC_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_\ MACE_BM_RESIZE_BICUBIC_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_\
##DEVICE) ##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_RESIZE_BICUBIC(N, C, H0, W0, H1, W1) \ #define MACE_BM_RESIZE_BICUBIC(N, C, H0, W0, H1, W1) \
MACE_BM_RESIZE_BICUBIC_MACRO(N, C, H0, W0, H1, W1, float, CPU); \ MACE_BM_RESIZE_BICUBIC_MACRO(N, C, H0, W0, H1, W1, float, CPU); \
MACE_BM_RESIZE_BICUBIC_MACRO(N, C, H0, W0, H1, W1, float, GPU); \ MACE_BM_RESIZE_BICUBIC_MACRO(N, C, H0, W0, H1, W1, float, GPU); \
MACE_BM_RESIZE_BICUBIC_MACRO(N, C, H0, W0, H1, W1, half, GPU); MACE_BM_RESIZE_BICUBIC_MACRO(N, C, H0, W0, H1, W1, half, GPU)
#else
#define MACE_BM_RESIZE_BICUBIC(N, C, H0, W0, H1, W1) \
MACE_BM_RESIZE_BICUBIC_MACRO(N, C, H0, W0, H1, W1, float, CPU)
#endif
MACE_BM_RESIZE_BICUBIC(1, 128, 120, 120, 480, 480); MACE_BM_RESIZE_BICUBIC(1, 128, 120, 120, 480, 480);
MACE_BM_RESIZE_BICUBIC(1, 256, 7, 7, 15, 15); MACE_BM_RESIZE_BICUBIC(1, 256, 7, 7, 15, 15);
......
...@@ -88,11 +88,25 @@ void ResizeBilinearBenchmark(int iters, ...@@ -88,11 +88,25 @@ void ResizeBilinearBenchmark(int iters,
MACE_BM_RESIZE_BILINEAR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_\ MACE_BM_RESIZE_BILINEAR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_\
##DEVICE) ##DEVICE)
#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1) \ #define MACE_BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1) \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \ MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, uint8_t, CPU); \ MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, uint8_t, CPU); \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, GPU); \ MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, GPU); \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, half, GPU); MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, half, GPU)
#elif defined(MACE_ENABLE_OPENCL)
#define MACE_BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1) \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, GPU); \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, half, GPU)
#elif defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1) \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, uint8_t, CPU)
#else
#define MACE_BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1) \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, CPU)
#endif
MACE_BM_RESIZE_BILINEAR(1, 128, 120, 120, 480, 480); MACE_BM_RESIZE_BILINEAR(1, 128, 120, 120, 480, 480);
MACE_BM_RESIZE_BILINEAR(1, 256, 7, 7, 15, 15); MACE_BM_RESIZE_BILINEAR(1, 256, 7, 7, 15, 15);
......
...@@ -87,10 +87,15 @@ void ResizeNearestNeighborBenchmark(int iters, ...@@ -87,10 +87,15 @@ void ResizeNearestNeighborBenchmark(int iters,
MACE_BM_RESIZE_NEAREST_NEIGHBOR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_\ MACE_BM_RESIZE_NEAREST_NEIGHBOR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_\
##TYPE##_##DEVICE) ##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_RESIZE_NEAREST_NEIGHBOR(N, C, H0, W0, H1, W1) \ #define MACE_BM_RESIZE_NEAREST_NEIGHBOR(N, C, H0, W0, H1, W1) \
MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \ MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \
MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, float, GPU); \ MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, float, GPU); \
MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, half, GPU); MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, half, GPU)
#else
#define MACE_BM_RESIZE_NEAREST_NEIGHBOR(N, C, H0, W0, H1, W1) \
MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, float, CPU)
#endif
MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 128, 120, 120, 480, 480); MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 128, 120, 120, 480, 480);
MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 256, 7, 7, 15, 15); MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 256, 7, 7, 15, 15);
......
...@@ -51,8 +51,6 @@ void Reverse(int iters, int batch, int channels, int height, int width) { ...@@ -51,8 +51,6 @@ void Reverse(int iters, int batch, int channels, int height, int width) {
#define MACE_BM_REVERSE_MACRO(N, C, H, W, TYPE, DEVICE) \ #define MACE_BM_REVERSE_MACRO(N, C, H, W, TYPE, DEVICE) \
static void MACE_BM_REVERSE_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \ static void MACE_BM_REVERSE_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) { \ int iters) { \
const int64_t macs = \
static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \ const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
Reverse<DEVICE, TYPE>(iters, N, C, H, W); \ Reverse<DEVICE, TYPE>(iters, N, C, H, W); \
......
...@@ -57,6 +57,7 @@ void SoftmaxBenchmark( ...@@ -57,6 +57,7 @@ void SoftmaxBenchmark(
net.Sync(); net.Sync();
} }
#ifdef MACE_ENABLE_QUANTIZE
template <> template <>
void SoftmaxBenchmark<CPU, uint8_t>( void SoftmaxBenchmark<CPU, uint8_t>(
int iters, int batch, int channels, int height, int width) { int iters, int batch, int channels, int height, int width) {
...@@ -80,6 +81,9 @@ void SoftmaxBenchmark<CPU, uint8_t>( ...@@ -80,6 +81,9 @@ void SoftmaxBenchmark<CPU, uint8_t>(
output->SetScale(0); output->SetScale(0);
output->SetZeroPoint(1); output->SetZeroPoint(1);
Tensor *input = net.GetTensor("Input");
input->SetScale(0.1);
// Warm-up // Warm-up
for (int i = 0; i < 2; ++i) { for (int i = 0; i < 2; ++i) {
net.Run(); net.Run();
...@@ -92,6 +96,8 @@ void SoftmaxBenchmark<CPU, uint8_t>( ...@@ -92,6 +96,8 @@ void SoftmaxBenchmark<CPU, uint8_t>(
} }
net.Sync(); net.Sync();
} }
#endif // MACE_ENABLE_QUANTIZE
} // namespace } // namespace
#define MACE_BM_SOFTMAX_MACRO(N, C, H, W, TYPE, DEVICE) \ #define MACE_BM_SOFTMAX_MACRO(N, C, H, W, TYPE, DEVICE) \
...@@ -103,11 +109,25 @@ void SoftmaxBenchmark<CPU, uint8_t>( ...@@ -103,11 +109,25 @@ void SoftmaxBenchmark<CPU, uint8_t>(
} \ } \
MACE_BENCHMARK(MACE_BM_SOFTMAX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) MACE_BENCHMARK(MACE_BM_SOFTMAX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_SOFTMAX(N, C, H, W) \ #define MACE_BM_SOFTMAX(N, C, H, W) \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, CPU); \ MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, CPU); \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, uint8_t, CPU); \ MACE_BM_SOFTMAX_MACRO(N, C, H, W, uint8_t, CPU); \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, GPU); \ MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, GPU); \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, half, GPU); MACE_BM_SOFTMAX_MACRO(N, C, H, W, half, GPU)
#elif defined(MACE_ENABLE_OPENCL)
#define MACE_BM_SOFTMAX(N, C, H, W) \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, CPU); \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, GPU); \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, half, GPU)
#elif defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_SOFTMAX(N, C, H, W) \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, CPU); \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, uint8_t, CPU)
#else
#define MACE_BM_SOFTMAX(N, C, H, W) \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, CPU)
#endif
MACE_BM_SOFTMAX(1, 2, 512, 512); MACE_BM_SOFTMAX(1, 2, 512, 512);
MACE_BM_SOFTMAX(1, 3, 512, 512); MACE_BM_SOFTMAX(1, 3, 512, 512);
......
...@@ -70,10 +70,23 @@ void BMSpaceToBatch( ...@@ -70,10 +70,23 @@ void BMSpaceToBatch(
MACE_BENCHMARK( \ MACE_BENCHMARK( \
MACE_BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE) MACE_BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE)
#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_SPACE_TO_BATCH(N, H, W, C, SHAPE) \ #define MACE_BM_SPACE_TO_BATCH(N, H, W, C, SHAPE) \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, GPU); \ MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, GPU); \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, CPU); \ MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, CPU); \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, uint8_t, CPU); MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, uint8_t, CPU)
#elif defined(MACE_ENABLE_OPENCL)
#define MACE_BM_SPACE_TO_BATCH(N, H, W, C, SHAPE) \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, GPU); \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, CPU)
#elif defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_SPACE_TO_BATCH(N, H, W, C, SHAPE) \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, CPU); \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, uint8_t, CPU)
#else
#define MACE_BM_SPACE_TO_BATCH(N, H, W, C, SHAPE) \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, CPU)
#endif
MACE_BM_SPACE_TO_BATCH(128, 16, 16, 128, 2); MACE_BM_SPACE_TO_BATCH(128, 16, 16, 128, 2);
MACE_BM_SPACE_TO_BATCH(1, 256, 256, 32, 2); MACE_BM_SPACE_TO_BATCH(1, 256, 256, 32, 2);
......
...@@ -68,10 +68,15 @@ void SpaceToDepth( ...@@ -68,10 +68,15 @@ void SpaceToDepth(
MACE_BENCHMARK( \ MACE_BENCHMARK( \
MACE_BM_SPACE_TO_DEPTH_##N##_##C##_##H##_##W##_##G##_##TYPE##_##DEVICE) MACE_BM_SPACE_TO_DEPTH_##N##_##C##_##H##_##W##_##G##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_SPACE_TO_DEPTH(N, C, H, W, G) \ #define MACE_BM_SPACE_TO_DEPTH(N, C, H, W, G) \
MACE_BM_SPACE_TO_DEPTH_MACRO(N, C, H, W, G, float, CPU); \ MACE_BM_SPACE_TO_DEPTH_MACRO(N, C, H, W, G, float, CPU); \
MACE_BM_SPACE_TO_DEPTH_MACRO(N, C, H, W, G, float, GPU); \ MACE_BM_SPACE_TO_DEPTH_MACRO(N, C, H, W, G, float, GPU); \
MACE_BM_SPACE_TO_DEPTH_MACRO(N, C, H, W, G, half, GPU); MACE_BM_SPACE_TO_DEPTH_MACRO(N, C, H, W, G, half, GPU)
#else
#define MACE_BM_SPACE_TO_DEPTH(N, C, H, W, G) \
MACE_BM_SPACE_TO_DEPTH_MACRO(N, C, H, W, G, float, CPU)
#endif
MACE_BM_SPACE_TO_DEPTH(1, 64, 64, 64, 4); MACE_BM_SPACE_TO_DEPTH(1, 64, 64, 64, 4);
MACE_BM_SPACE_TO_DEPTH(1, 64, 128, 128, 4); MACE_BM_SPACE_TO_DEPTH(1, 64, 128, 128, 4);
......
...@@ -73,10 +73,15 @@ void BMSplitHelper(int iters, ...@@ -73,10 +73,15 @@ void BMSplitHelper(int iters,
MACE_BENCHMARK( \ MACE_BENCHMARK( \
MACE_BM_SPLIT_##N##_##H##_##W##_##C##_##NO##_##TYPE##_##DEVICE) MACE_BM_SPLIT_##N##_##H##_##W##_##C##_##NO##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_SPLIT(N, H, W, C, NO) \ #define MACE_BM_SPLIT(N, H, W, C, NO) \
MACE_BM_SPLIT_MACRO(N, H, W, C, NO, float, CPU); \ MACE_BM_SPLIT_MACRO(N, H, W, C, NO, float, CPU); \
MACE_BM_SPLIT_MACRO(N, H, W, C, NO, float, GPU); \ MACE_BM_SPLIT_MACRO(N, H, W, C, NO, float, GPU); \
MACE_BM_SPLIT_MACRO(N, H, W, C, NO, half, GPU); MACE_BM_SPLIT_MACRO(N, H, W, C, NO, half, GPU)
#else
#define MACE_BM_SPLIT(N, H, W, C, NO) \
MACE_BM_SPLIT_MACRO(N, H, W, C, NO, float, CPU)
#endif
MACE_BM_SPLIT(1, 32, 32, 32, 2); MACE_BM_SPLIT(1, 32, 32, 32, 2);
MACE_BM_SPLIT(1, 32, 32, 128, 2); MACE_BM_SPLIT(1, 32, 32, 128, 2);
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
// limitations under the License. // limitations under the License.
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
...@@ -69,10 +68,15 @@ void SqrDiffMean(int iters, int batch, int channels, ...@@ -69,10 +68,15 @@ void SqrDiffMean(int iters, int batch, int channels,
MACE_BENCHMARK( \ MACE_BENCHMARK( \
MACE_BM_SQRDIFF_MEAN_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) MACE_BM_SQRDIFF_MEAN_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_SQRDIFF_MEAN(N, C, H, W) \ #define MACE_BM_SQRDIFF_MEAN(N, C, H, W) \
MACE_BM_SQRDIFF_MEAN_MACRO(N, C, H, W, float, GPU); \ MACE_BM_SQRDIFF_MEAN_MACRO(N, C, H, W, float, GPU); \
MACE_BM_SQRDIFF_MEAN_MACRO(N, C, H, W, half, GPU); \ MACE_BM_SQRDIFF_MEAN_MACRO(N, C, H, W, half, GPU); \
MACE_BM_SQRDIFF_MEAN_MACRO(N, C, H, W, float, CPU); MACE_BM_SQRDIFF_MEAN_MACRO(N, C, H, W, float, CPU)
#else
#define MACE_BM_SQRDIFF_MEAN(N, C, H, W) \
MACE_BM_SQRDIFF_MEAN_MACRO(N, C, H, W, float, CPU)
#endif
MACE_BM_SQRDIFF_MEAN(1, 1, 512, 512); MACE_BM_SQRDIFF_MEAN(1, 1, 512, 512);
......
...@@ -48,7 +48,7 @@ cc_test( ...@@ -48,7 +48,7 @@ cc_test(
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
":port", ":port",
"@gtest//:gtest", "@gtest",
"@gtest//:gtest_main", "@gtest//:gtest_main",
], ],
) )
...@@ -14,9 +14,8 @@ cc_library( ...@@ -14,9 +14,8 @@ cc_library(
hdrs = if_android(glob([ hdrs = if_android(glob([
"*.h", "*.h",
])), ])),
deps = [ deps = if_android([
"//mace/port:port_base", "//mace/port/linux_base:port_linux_base",
"//mace/port/posix:port_posix", ]),
],
alwayslink = 1, alwayslink = 1,
) )
...@@ -43,51 +43,12 @@ ...@@ -43,51 +43,12 @@
namespace mace { namespace mace {
namespace port { namespace port {
int64_t AndroidEnv::NowMicros() {
#ifdef __hexagon__
return HAP_perf_get_time_us();
#else
return mace::port::posix::NowMicros();
#endif
}
FileSystem *AndroidEnv::GetFileSystem() {
return &posix_file_system_;
}
LogWriter *AndroidEnv::GetLogWriter() { LogWriter *AndroidEnv::GetLogWriter() {
return &log_writer_; return &log_writer_;
} }
namespace { namespace {
int GetCPUCount() {
int cpu_count = 0;
std::string cpu_sys_conf = "/proc/cpuinfo";
std::ifstream f(cpu_sys_conf);
if (!f.is_open()) {
LOG(ERROR) << "failed to open " << cpu_sys_conf;
return -1;
}
std::string line;
const std::string processor_key = "processor";
while (std::getline(f, line)) {
if (line.size() >= processor_key.size()
&& line.compare(0, processor_key.size(), processor_key) == 0) {
++cpu_count;
}
}
if (f.bad()) {
LOG(ERROR) << "failed to read " << cpu_sys_conf;
}
if (!f.eof()) {
LOG(ERROR) << "failed to read end of " << cpu_sys_conf;
}
f.close();
VLOG(1) << "CPU cores: " << cpu_count;
return cpu_count;
}
struct BacktraceState { struct BacktraceState {
void** current; void** current;
void** end; void** end;
...@@ -115,38 +76,6 @@ size_t BackTrace(void** buffer, size_t max) { ...@@ -115,38 +76,6 @@ size_t BackTrace(void** buffer, size_t max) {
} // namespace } // namespace
MaceStatus AndroidEnv::GetCPUMaxFreq(std::vector<float> *max_freqs) {
MACE_CHECK_NOTNULL(max_freqs);
int cpu_count = GetCPUCount();
if (cpu_count < 0) {
return MaceStatus::MACE_RUNTIME_ERROR;
}
for (int cpu_id = 0; cpu_id < cpu_count; ++cpu_id) {
std::string cpuinfo_max_freq_sys_conf = MakeString(
"/sys/devices/system/cpu/cpu",
cpu_id,
"/cpufreq/cpuinfo_max_freq");
std::ifstream f(cpuinfo_max_freq_sys_conf);
if (!f.is_open()) {
LOG(ERROR) << "failed to open " << cpuinfo_max_freq_sys_conf;
return MaceStatus::MACE_RUNTIME_ERROR;
}
std::string line;
if (std::getline(f, line)) {
float freq = strtof(line.c_str(), nullptr);
max_freqs->push_back(freq);
}
if (f.bad()) {
LOG(ERROR) << "failed to read " << cpuinfo_max_freq_sys_conf;
}
f.close();
}
VLOG(1) << "CPU freq: " << MakeString(*max_freqs);
return MaceStatus::MACE_SUCCESS;
}
MaceStatus AndroidEnv::SchedSetAffinity(const std::vector<size_t> &cpu_ids) { MaceStatus AndroidEnv::SchedSetAffinity(const std::vector<size_t> &cpu_ids) {
// compute mask // compute mask
cpu_set_t mask; cpu_set_t mask;
......
...@@ -20,18 +20,16 @@ ...@@ -20,18 +20,16 @@
#include <vector> #include <vector>
#include "mace/port/android/logger.h" #include "mace/port/android/logger.h"
#include "mace/port/posix/file_system.h"
#include "mace/port/env.h" #include "mace/port/env.h"
#include "mace/port/linux_base/env.h"
#include "mace/port/posix/file_system.h"
namespace mace { namespace mace {
namespace port { namespace port {
class AndroidEnv : public Env { class AndroidEnv : public LinuxBaseEnv {
public: public:
int64_t NowMicros() override;
MaceStatus GetCPUMaxFreq(std::vector<float> *max_freqs) override;
MaceStatus SchedSetAffinity(const std::vector<size_t> &cpu_ids) override; MaceStatus SchedSetAffinity(const std::vector<size_t> &cpu_ids) override;
FileSystem *GetFileSystem() override;
LogWriter *GetLogWriter() override; LogWriter *GetLogWriter() override;
std::vector<std::string> GetBackTraceUnsafe(int max_steps) override; std::vector<std::string> GetBackTraceUnsafe(int max_steps) override;
std::unique_ptr<MallocLogger> NewMallocLogger( std::unique_ptr<MallocLogger> NewMallocLogger(
...@@ -39,7 +37,6 @@ class AndroidEnv : public Env { ...@@ -39,7 +37,6 @@ class AndroidEnv : public Env {
const std::string &name) override; const std::string &name) override;
private: private:
PosixFileSystem posix_file_system_;
AndroidLogWriter log_writer_; AndroidLogWriter log_writer_;
}; };
......
...@@ -20,3 +20,8 @@ cc_library( ...@@ -20,3 +20,8 @@ cc_library(
], ],
alwayslink = 1, alwayslink = 1,
) )
sh_library(
name = "darwin_ar_merge",
srcs = ["ar_merge_on_darwin.sh"],
)
#!/usr/bin/env bash
# Copyright 2018 The MACE Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
output_file_path=""
object_files=""
workspace=`mktemp -d ./tmpd.XXXXXX`
ar_command=$1
while read script_line; do
command=""
lib_path=""
eval $(echo ${script_line} | awk -F" " \
'{printf("command=%s\nlib_path=%s", $1, $2);}')
upper_command=`echo ${command} | tr 'a-z' 'A-Z'`
if [[ ${upper_command} == "CREATE" ]]; then
output_file_path=${lib_path}
elif [[ ${upper_command} == "ADDLIB" ]]; then
lib_name=$(basename ${lib_path})
lib_dir=${workspace}"/"${lib_name}
mkdir ${lib_dir}
cp ${lib_path} ${lib_dir}
cur_path=`pwd`
cd ${lib_dir}
${cur_path}"/"${ar_command} -x ${lib_name}
object_files=${object_files}" "${lib_dir}"/*.o"
cd ${cur_path}
elif [[ ${upper_command} == "SAVE" ]]; then
${ar_command} -rcsu $output_file_path ${object_files}
elif [[ ${upper_command} == "END" ]]; then
echo "========== ar_merge_on_darwin end =========="
else
echo "error: Get an invalid input line: "$script_line
fi
done
...@@ -15,7 +15,10 @@ ...@@ -15,7 +15,10 @@
#include "mace/port/darwin/env.h" #include "mace/port/darwin/env.h"
#include <execinfo.h> #include <execinfo.h>
#include <stdint.h>
#include <sys/sysctl.h>
#include <sys/time.h> #include <sys/time.h>
#include <sys/types.h>
#include <cstddef> #include <cstddef>
#include <string> #include <string>
...@@ -24,14 +27,37 @@ ...@@ -24,14 +27,37 @@
#include "mace/port/posix/backtrace.h" #include "mace/port/posix/backtrace.h"
#include "mace/port/posix/file_system.h" #include "mace/port/posix/file_system.h"
#include "mace/port/posix/time.h" #include "mace/port/posix/time.h"
#include "mace/utils/logging.h"
namespace mace { namespace mace {
namespace port { namespace port {
namespace {
const char kCpuFrequencyMax[] = "hw.cpufrequency_max";
}
int64_t DarwinEnv::NowMicros() { int64_t DarwinEnv::NowMicros() {
return mace::port::posix::NowMicros(); return mace::port::posix::NowMicros();
} }
// TODO(luxuhui): this func is not accurate, darwin does not support
// acquiring CPU frequencies, we need to reconsider the CPU scheduling
// strategy.
MaceStatus DarwinEnv::GetCPUMaxFreq(std::vector<float> *max_freqs) {
MACE_CHECK_NOTNULL(max_freqs);
uint64_t freq = 0;
size_t size = sizeof(freq);
int ret = sysctlbyname(kCpuFrequencyMax, &freq, &size, NULL, 0);
if (ret < 0) {
LOG(ERROR) << "failed to get property: " << kCpuFrequencyMax;
return MaceStatus::MACE_RUNTIME_ERROR;
}
max_freqs->push_back(freq);
return MaceStatus::MACE_SUCCESS;
}
FileSystem *DarwinEnv::GetFileSystem() { FileSystem *DarwinEnv::GetFileSystem() {
return &posix_file_system_; return &posix_file_system_;
} }
......
...@@ -28,6 +28,7 @@ namespace port { ...@@ -28,6 +28,7 @@ namespace port {
class DarwinEnv : public Env { class DarwinEnv : public Env {
public: public:
int64_t NowMicros() override; int64_t NowMicros() override;
MaceStatus GetCPUMaxFreq(std::vector<float> *max_freqs) override;
FileSystem *GetFileSystem() override; FileSystem *GetFileSystem() override;
LogWriter *GetLogWriter() override; LogWriter *GetLogWriter() override;
std::vector<std::string> GetBackTraceUnsafe(int max_steps) override; std::vector<std::string> GetBackTraceUnsafe(int max_steps) override;
......
...@@ -14,9 +14,8 @@ cc_library( ...@@ -14,9 +14,8 @@ cc_library(
hdrs = if_linux(glob([ hdrs = if_linux(glob([
"*.h", "*.h",
])), ])),
deps = [ deps = if_linux([
"//mace/port:port_base", "//mace/port/linux_base:port_linux_base",
"//mace/port/posix:port_posix", ]),
],
alwayslink = 1, alwayslink = 1,
) )
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "mace/port/env.h"
#include "mace/port/posix/backtrace.h" #include "mace/port/posix/backtrace.h"
#include "mace/port/posix/file_system.h" #include "mace/port/posix/file_system.h"
#include "mace/port/posix/time.h" #include "mace/port/posix/time.h"
...@@ -28,14 +29,6 @@ ...@@ -28,14 +29,6 @@
namespace mace { namespace mace {
namespace port { namespace port {
int64_t LinuxEnv::NowMicros() {
return mace::port::posix::NowMicros();
}
FileSystem *LinuxEnv::GetFileSystem() {
return &posix_file_system_;
}
LogWriter *LinuxEnv::GetLogWriter() { LogWriter *LinuxEnv::GetLogWriter() {
return &log_writer_; return &log_writer_;
} }
......
...@@ -18,22 +18,18 @@ ...@@ -18,22 +18,18 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "mace/port/env.h" #include "mace/port/linux_base/env.h"
#include "mace/port/logger.h" #include "mace/port/logger.h"
#include "mace/port/posix/file_system.h"
namespace mace { namespace mace {
namespace port { namespace port {
class LinuxEnv : public Env { class LinuxEnv : public LinuxBaseEnv {
public: public:
int64_t NowMicros() override;
FileSystem *GetFileSystem() override;
LogWriter *GetLogWriter() override; LogWriter *GetLogWriter() override;
std::vector<std::string> GetBackTraceUnsafe(int max_steps) override; std::vector<std::string> GetBackTraceUnsafe(int max_steps) override;
private: private:
PosixFileSystem posix_file_system_;
LogWriter log_writer_; LogWriter log_writer_;
}; };
......
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"]) # Apache 2.0
cc_library(
name = "port_linux_base",
srcs = glob([
"*.cc",
]),
hdrs = glob([
"*.h",
]),
deps = [
"//mace/port:port_base",
"//mace/port/posix:port_posix",
],
alwayslink = 1,
)
// Copyright 2019 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/port/linux_base/env.h"
#include <sys/time.h>
#include <cstddef>
#include <fstream>
#include <string>
#include <vector>
#include "mace/port/posix/file_system.h"
#include "mace/port/posix/time.h"
#include "mace/utils/logging.h"
namespace mace {
namespace port {
namespace {
int GetCPUCount() {
int cpu_count = 0;
std::string cpu_sys_conf = "/proc/cpuinfo";
std::ifstream f(cpu_sys_conf);
if (!f.is_open()) {
LOG(ERROR) << "failed to open " << cpu_sys_conf;
return -1;
}
std::string line;
const std::string processor_key = "processor";
while (std::getline(f, line)) {
if (line.size() >= processor_key.size()
&& line.compare(0, processor_key.size(), processor_key) == 0) {
++cpu_count;
}
}
if (f.bad()) {
LOG(ERROR) << "failed to read " << cpu_sys_conf;
}
if (!f.eof()) {
LOG(ERROR) << "failed to read end of " << cpu_sys_conf;
}
f.close();
VLOG(1) << "CPU cores: " << cpu_count;
return cpu_count;
}
} // namespace
int64_t LinuxBaseEnv::NowMicros() {
return mace::port::posix::NowMicros();
}
FileSystem *LinuxBaseEnv::GetFileSystem() {
return &posix_file_system_;
}
MaceStatus LinuxBaseEnv::GetCPUMaxFreq(std::vector<float> *max_freqs) {
MACE_CHECK_NOTNULL(max_freqs);
int cpu_count = GetCPUCount();
if (cpu_count < 0) {
return MaceStatus::MACE_RUNTIME_ERROR;
}
for (int cpu_id = 0; cpu_id < cpu_count; ++cpu_id) {
std::string cpuinfo_max_freq_sys_conf = MakeString(
"/sys/devices/system/cpu/cpu",
cpu_id,
"/cpufreq/cpuinfo_max_freq");
std::ifstream f(cpuinfo_max_freq_sys_conf);
if (!f.is_open()) {
LOG(ERROR) << "failed to open " << cpuinfo_max_freq_sys_conf;
return MaceStatus::MACE_RUNTIME_ERROR;
}
std::string line;
if (std::getline(f, line)) {
float freq = strtof(line.c_str(), nullptr);
max_freqs->push_back(freq);
}
if (f.bad()) {
LOG(ERROR) << "failed to read " << cpuinfo_max_freq_sys_conf;
}
f.close();
}
VLOG(1) << "CPU freq: " << MakeString(*max_freqs);
return MaceStatus::MACE_SUCCESS;
}
} // namespace port
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_PORT_LINUX_BASE_ENV_H_
#define MACE_PORT_LINUX_BASE_ENV_H_
#include <vector>
#include "mace/port/env.h"
#include "mace/port/posix/file_system.h"
namespace mace {
namespace port {
class LinuxBaseEnv : public Env {
public:
int64_t NowMicros() override;
MaceStatus GetCPUMaxFreq(std::vector<float> *max_freqs) override;
FileSystem *GetFileSystem() override;
protected:
PosixFileSystem posix_file_system_;
};
} // namespace port
} // namespace mace
#endif // MACE_PORT_LINUX_BASE_ENV_H_
...@@ -7,12 +7,12 @@ licenses(["notice"]) # Apache 2.0 ...@@ -7,12 +7,12 @@ licenses(["notice"]) # Apache 2.0
load( load(
"//mace:mace.bzl", "//mace:mace.bzl",
"if_android", "if_android",
"if_neon_enabled",
"if_openmp_enabled",
"if_android_armv7", "if_android_armv7",
"if_hexagon_enabled", "if_hexagon_enabled",
"if_hta_enabled", "if_hta_enabled",
"if_neon_enabled",
"if_opencl_enabled", "if_opencl_enabled",
"if_openmp_enabled",
"if_quantize_enabled", "if_quantize_enabled",
) )
...@@ -21,7 +21,11 @@ cc_library( ...@@ -21,7 +21,11 @@ cc_library(
hdrs = [ hdrs = [
"mace_api_test.h", "mace_api_test.h",
], ],
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
) )
cc_test( cc_test(
...@@ -33,7 +37,7 @@ cc_test( ...@@ -33,7 +37,7 @@ cc_test(
"-Wextra", "-Wextra",
"-Wno-missing-field-initializers", "-Wno-missing-field-initializers",
] + if_openmp_enabled([ ] + if_openmp_enabled([
"-fopenmp" "-fopenmp",
]) + if_neon_enabled([ ]) + if_neon_enabled([
"-DMACE_ENABLE_NEON", "-DMACE_ENABLE_NEON",
]) + if_android_armv7([ ]) + if_android_armv7([
...@@ -49,12 +53,14 @@ cc_test( ...@@ -49,12 +53,14 @@ cc_test(
]) + if_hta_enabled([ ]) + if_hta_enabled([
"-DMACE_ENABLE_HTA", "-DMACE_ENABLE_HTA",
]), ]),
linkopts = ["-fopenmp"], linkopts = if_openmp_enabled([
"-fopenmp",
]),
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
":mace_api_test_header", ":mace_api_test_header",
"//mace/libmace",
"//mace/ops:test", "//mace/ops:test",
"//mace/libmace:libmace",
"@gtest//:gtest_main", "@gtest//:gtest_main",
], ],
) )
...@@ -68,7 +74,7 @@ cc_test( ...@@ -68,7 +74,7 @@ cc_test(
"-Wextra", "-Wextra",
"-Wno-missing-field-initializers", "-Wno-missing-field-initializers",
] + if_openmp_enabled([ ] + if_openmp_enabled([
"-fopenmp" "-fopenmp",
]) + if_neon_enabled([ ]) + if_neon_enabled([
"-DMACE_ENABLE_NEON", "-DMACE_ENABLE_NEON",
]) + if_android_armv7([ ]) + if_android_armv7([
...@@ -84,12 +90,14 @@ cc_test( ...@@ -84,12 +90,14 @@ cc_test(
]) + if_hta_enabled([ ]) + if_hta_enabled([
"-DMACE_ENABLE_HTA", "-DMACE_ENABLE_HTA",
]), ]),
linkopts = ["-fopenmp"], linkopts = if_openmp_enabled([
"-fopenmp",
]),
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
":mace_api_test_header", ":mace_api_test_header",
"//mace/libmace",
"//mace/ops:test", "//mace/ops:test",
"//mace/libmace:libmace",
"@gtest//:gtest_main", "@gtest//:gtest_main",
], ],
) )
...@@ -103,7 +111,7 @@ cc_test( ...@@ -103,7 +111,7 @@ cc_test(
"-Wextra", "-Wextra",
"-Wno-missing-field-initializers", "-Wno-missing-field-initializers",
] + if_openmp_enabled([ ] + if_openmp_enabled([
"-fopenmp" "-fopenmp",
]) + if_neon_enabled([ ]) + if_neon_enabled([
"-DMACE_ENABLE_NEON", "-DMACE_ENABLE_NEON",
]) + if_android_armv7([ ]) + if_android_armv7([
...@@ -119,11 +127,13 @@ cc_test( ...@@ -119,11 +127,13 @@ cc_test(
]) + if_hta_enabled([ ]) + if_hta_enabled([
"-DMACE_ENABLE_HTA", "-DMACE_ENABLE_HTA",
]), ]),
linkopts = ["-fopenmp"], linkopts = if_openmp_enabled([
"-fopenmp",
]),
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
"//mace/libmace",
"//mace/ops:test", "//mace/ops:test",
"//mace/libmace:libmace",
"@gtest//:gtest_main", "@gtest//:gtest_main",
], ],
) )
...@@ -137,7 +147,7 @@ cc_test( ...@@ -137,7 +147,7 @@ cc_test(
"-Wextra", "-Wextra",
"-Wno-missing-field-initializers", "-Wno-missing-field-initializers",
] + if_openmp_enabled([ ] + if_openmp_enabled([
"-fopenmp" "-fopenmp",
]) + if_neon_enabled([ ]) + if_neon_enabled([
"-DMACE_ENABLE_NEON", "-DMACE_ENABLE_NEON",
]) + if_android_armv7([ ]) + if_android_armv7([
...@@ -153,11 +163,13 @@ cc_test( ...@@ -153,11 +163,13 @@ cc_test(
]) + if_hta_enabled([ ]) + if_hta_enabled([
"-DMACE_ENABLE_HTA", "-DMACE_ENABLE_HTA",
]), ]),
linkopts = ["-fopenmp"], linkopts = if_openmp_enabled([
"-fopenmp",
]),
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
"//mace/libmace",
"//mace/ops:test", "//mace/ops:test",
"//mace/libmace:libmace",
"@gtest//:gtest_main", "@gtest//:gtest_main",
], ],
) )
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/ops_test_util.h" #include "mace/ops/ops_test_util.h"
namespace mace { namespace mace {
...@@ -40,3 +42,5 @@ TEST(MaceAPIExceptionTest, WrongInputTest) { ...@@ -40,3 +42,5 @@ TEST(MaceAPIExceptionTest, WrongInputTest) {
} // namespace test } // namespace test
} // namespace mace } // namespace mace
#endif // MACE_ENABLE_OPENCL
...@@ -12,6 +12,8 @@ ...@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
#ifdef MACE_ENABLE_OPENCL
#include <thread> // NOLINT(build/c++11) #include <thread> // NOLINT(build/c++11)
#include "mace/test/mace_api_test.h" #include "mace/test/mace_api_test.h"
...@@ -102,3 +104,5 @@ TEST_F(MaceMTAPITest, MultipleThread) { ...@@ -102,3 +104,5 @@ TEST_F(MaceMTAPITest, MultipleThread) {
} // namespace test } // namespace test
} // namespace mace } // namespace mace
#endif // MACE_ENABLE_OPENCL
# Examples # Examples
load("//mace:mace.bzl", "if_openmp_enabled", "if_android", "if_opencl_enabled") load(
"//mace:mace.bzl",
"if_android",
"if_darwin",
"if_opencl_enabled",
"if_openmp_enabled",
)
cc_binary( cc_binary(
name = "mace_run_static", name = "mace_run_static",
...@@ -10,9 +16,10 @@ cc_binary( ...@@ -10,9 +16,10 @@ cc_binary(
] + if_opencl_enabled([ ] + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL", "-DMACE_ENABLE_OPENCL",
]), ]),
linkopts = [ linkopts = if_darwin(
"-fuse-ld=gold", [],
] + if_openmp_enabled([ default_value = ["-fuse-ld=gold"],
) + if_openmp_enabled([
"-fopenmp", "-fopenmp",
]), ]),
linkstatic = 1, linkstatic = 1,
...@@ -33,9 +40,10 @@ cc_binary( ...@@ -33,9 +40,10 @@ cc_binary(
] + if_opencl_enabled([ ] + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL", "-DMACE_ENABLE_OPENCL",
]), ]),
linkopts = [ linkopts = if_darwin(
"-fuse-ld=gold", [],
] + if_openmp_enabled([ default_value = ["-fuse-ld=gold"],
) + if_openmp_enabled([
"-fopenmp", "-fopenmp",
]), ]),
linkstatic = 0, linkstatic = 0,
......
...@@ -24,7 +24,6 @@ ...@@ -24,7 +24,6 @@
* --model_data_file=model_data.data \ * --model_data_file=model_data.data \
* --device=GPU * --device=GPU
*/ */
#include <malloc.h>
#include <stdint.h> #include <stdint.h>
#include <cstdio> #include <cstdio>
#include <cstdlib> #include <cstdlib>
...@@ -96,56 +95,6 @@ DataFormat ParseDataFormat(const std::string &data_format_str) { ...@@ -96,56 +95,6 @@ DataFormat ParseDataFormat(const std::string &data_format_str) {
} }
} }
struct mallinfo LogMallinfoChange(struct mallinfo prev) {
struct mallinfo curr = mallinfo();
if (prev.arena != curr.arena) {
LOG(INFO) << "Non-mmapped space allocated (bytes): " << curr.arena
<< ", diff: " << ((int64_t) curr.arena - (int64_t) prev.arena);
}
if (prev.ordblks != curr.ordblks) {
LOG(INFO) << "Number of free chunks: " << curr.ordblks
<< ", diff: "
<< ((int64_t) curr.ordblks - (int64_t) prev.ordblks);
}
if (prev.smblks != curr.smblks) {
LOG(INFO) << "Number of free fastbin blocks: " << curr.smblks
<< ", diff: " << ((int64_t) curr.smblks - (int64_t) prev.smblks);
}
if (prev.hblks != curr.hblks) {
LOG(INFO) << "Number of mmapped regions: " << curr.hblks
<< ", diff: " << ((int64_t) curr.hblks - (int64_t) prev.hblks);
}
if (prev.hblkhd != curr.hblkhd) {
LOG(INFO) << "Space allocated in mmapped regions (bytes): " << curr.hblkhd
<< ", diff: " << ((int64_t) curr.hblkhd - (int64_t) prev.hblkhd);
}
if (prev.usmblks != curr.usmblks) {
LOG(INFO) << "Maximum total allocated space (bytes): " << curr.usmblks
<< ", diff: "
<< ((int64_t) curr.usmblks - (int64_t) prev.usmblks);
}
if (prev.fsmblks != curr.fsmblks) {
LOG(INFO) << "Space in freed fastbin blocks (bytes): " << curr.fsmblks
<< ", diff: "
<< ((int64_t) curr.fsmblks - (int64_t) prev.fsmblks);
}
if (prev.uordblks != curr.uordblks) {
LOG(INFO) << "Total allocated space (bytes): " << curr.uordblks
<< ", diff: "
<< ((int64_t) curr.uordblks - (int64_t) prev.uordblks);
}
if (prev.fordblks != curr.fordblks) {
LOG(INFO) << "Total free space (bytes): " << curr.fordblks << ", diff: "
<< ((int64_t) curr.fordblks - (int64_t) prev.fordblks);
}
if (prev.keepcost != curr.keepcost) {
LOG(INFO) << "Top-most, releasable space (bytes): " << curr.keepcost
<< ", diff: "
<< ((int64_t) curr.keepcost - (int64_t) prev.keepcost);
}
return curr;
}
DEFINE_string(model_name, DEFINE_string(model_name,
"", "",
"model name in yaml"); "model name in yaml");
...@@ -395,8 +344,14 @@ bool RunModel(const std::string &model_name, ...@@ -395,8 +344,14 @@ bool RunModel(const std::string &model_name,
if (FLAGS_round > 0) { if (FLAGS_round > 0) {
LOG(INFO) << "Run model"; LOG(INFO) << "Run model";
int64_t total_run_duration = 0; int64_t total_run_duration = 0;
struct mallinfo prev = mallinfo();
for (int i = 0; i < FLAGS_round; ++i) { for (int i = 0; i < FLAGS_round; ++i) {
std::unique_ptr<port::Logger> info_log;
std::unique_ptr<port::MallocLogger> malloc_logger;
if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) {
info_log = LOG_PTR(INFO);
malloc_logger = port::Env::Default()->NewMallocLogger(
info_log.get(), MakeString(i));
}
MaceStatus run_status; MaceStatus run_status;
while (true) { while (true) {
int64_t t0 = NowMicros(); int64_t t0 = NowMicros();
...@@ -436,10 +391,6 @@ bool RunModel(const std::string &model_name, ...@@ -436,10 +391,6 @@ bool RunModel(const std::string &model_name,
break; break;
} }
} }
if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) {
LOG(INFO) << "=== check malloc info change #" << i << " ===";
prev = LogMallinfoChange(prev);
}
} }
model_run_millis = total_run_duration / 1000.0 / FLAGS_round; model_run_millis = total_run_duration / 1000.0 / FLAGS_round;
LOG(INFO) << "Average latency: " << model_run_millis << " ms"; LOG(INFO) << "Average latency: " << model_run_millis << " ms";
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#define MACE_UTILS_LOGGING_H_ #define MACE_UTILS_LOGGING_H_
#include <limits> #include <limits>
#include <memory>
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -24,6 +25,7 @@ ...@@ -24,6 +25,7 @@
#include "mace/port/env.h" #include "mace/port/env.h"
#include "mace/port/logger.h" #include "mace/port/logger.h"
#include "mace/utils/macros.h" #include "mace/utils/macros.h"
#include "mace/utils/memory.h"
#include "mace/utils/string_util.h" #include "mace/utils/string_util.h"
...@@ -33,6 +35,9 @@ namespace logging_internal { ...@@ -33,6 +35,9 @@ namespace logging_internal {
#define LOG(severity) \ #define LOG(severity) \
::mace::port::Logger(__FILE__, __LINE__, mace::severity) ::mace::port::Logger(__FILE__, __LINE__, mace::severity)
#define LOG_PTR(severity) \
make_unique<mace::port::Logger>(__FILE__, __LINE__, mace::severity)
#define VLOG_IS_ON(vll) (mace::ShouldGenerateVLogMessage(vll)) #define VLOG_IS_ON(vll) (mace::ShouldGenerateVLogMessage(vll))
#define VLOG(vll) if (VLOG_IS_ON(vll)) LOG(INFO) #define VLOG(vll) if (VLOG_IS_ON(vll)) LOG(INFO)
......
...@@ -24,25 +24,17 @@ build:linux --define linux=true ...@@ -24,25 +24,17 @@ build:linux --define linux=true
# MacOS host build, --config darwin # MacOS host build, --config darwin
build:darwin --define darwin=true build:darwin --define darwin=true
build:darwin --cpu=darwin_x86_64
# iOS and other darwin platforms, --config ios build:darwin --copt -Wno-unused-lambda-capture
build:ios --define darwin=true build:darwin --copt -Wno-missing-braces
build:ios --distinct_host_configuration=true
build:ios --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:ios --cpu=arm64
# Linux host build, --config linux
build:linux --define linux=true
# MacOS host build, --config darwin
build:darwin --define darwin=true
# iOS and other darwin platforms, --config ios # iOS and other darwin platforms, --config ios
build:ios --define darwin=true build:ios --define darwin=true
build:ios --distinct_host_configuration=true build:ios --distinct_host_configuration=true
build:ios --host_crosstool_top=@bazel_tools//tools/cpp:toolchain build:ios --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:ios --cpu=ios_arm64 build:ios --cpu=ios_arm64
build:ios --copt -Wno-unused-lambda-capture
build:ios --copt -Wno-missing-braces
# Usage example: bazel build --config arm_linux_gnueabihf # Usage example: bazel build --config arm_linux_gnueabihf
# Used to fix library not find linking issue, see also: # Used to fix library not find linking issue, see also:
...@@ -81,6 +73,12 @@ build:optimization --copt=-ffunction-sections ...@@ -81,6 +73,12 @@ build:optimization --copt=-ffunction-sections
build:optimization --copt=-fdata-sections build:optimization --copt=-fdata-sections
build:optimization --linkopt=-Wl,--gc-sections build:optimization --linkopt=-Wl,--gc-sections
# Usage example: bazel build --config optimization_darwin
build:optimization_darwin --copt=-O3
build:optimization_darwin --copt=-ffunction-sections
build:optimization_darwin --copt=-fdata-sections
build:optimization_darwin --linkopt=-Wl,-dead_strip
# Usage example: bazel build --config symbol_hidden # Usage example: bazel build --config symbol_hidden
build:symbol_hidden --copt=-fvisibility=hidden build:symbol_hidden --copt=-fvisibility=hidden
......
...@@ -20,7 +20,6 @@ ...@@ -20,7 +20,6 @@
# --stdout_processor=stdout_processor # --stdout_processor=stdout_processor
import argparse import argparse
import re
import sys import sys
import sh_commands import sh_commands
...@@ -105,6 +104,11 @@ def parse_args(): ...@@ -105,6 +104,11 @@ def parse_args():
type=str2bool, type=str2bool,
default=True, default=True,
help="Whether to use neon optimization") help="Whether to use neon optimization")
parser.add_argument(
"--enable_openmp",
type=str2bool,
default=True,
help="Disable openmp for multiple thread.")
parser.add_argument( parser.add_argument(
'--address_sanitizer', '--address_sanitizer',
action="store_true", action="store_true",
...@@ -140,7 +144,8 @@ def main(unused_args): ...@@ -140,7 +144,8 @@ def main(unused_args):
toolchain=toolchain, toolchain=toolchain,
enable_neon=FLAGS.enable_neon, enable_neon=FLAGS.enable_neon,
address_sanitizer=FLAGS.address_sanitizer, address_sanitizer=FLAGS.address_sanitizer,
debug_mode=FLAGS.debug_mode) debug_mode=FLAGS.debug_mode,
enable_openmp=FLAGS.enable_openmp)
if FLAGS.run_target: if FLAGS.run_target:
target_devices = DeviceManager.list_devices(FLAGS.device_yml) target_devices = DeviceManager.list_devices(FLAGS.device_yml)
if FLAGS.target_socs != TargetSOCTag.all and\ if FLAGS.target_socs != TargetSOCTag.all and\
......
...@@ -59,6 +59,9 @@ class DeviceWrapper: ...@@ -59,6 +59,9 @@ class DeviceWrapper:
raise e raise e
self.data_dir = DEVICE_DATA_DIR self.data_dir = DEVICE_DATA_DIR
self.interior_dir = self.data_dir + '/interior' self.interior_dir = self.data_dir + '/interior'
elif self.system == SystemType.host:
self.data_dir = DEVICE_DATA_DIR
self.interior_dir = self.data_dir + '/interior'
################## ##################
# internal use # # internal use #
......
...@@ -275,10 +275,11 @@ def bazel_build(target, ...@@ -275,10 +275,11 @@ def bazel_build(target,
extra_args=""): extra_args=""):
six.print_("* Build %s with ABI %s" % (target, abi)) six.print_("* Build %s with ABI %s" % (target, abi))
if abi == "host": if abi == "host":
toolchain = platform.system().lower()
bazel_args = ( bazel_args = (
"build", "build",
"--config", "--config",
platform.system().lower(), toolchain,
"--define", "--define",
"openmp=%s" % str(enable_openmp).lower(), "openmp=%s" % str(enable_openmp).lower(),
"--define", "--define",
...@@ -310,7 +311,10 @@ def bazel_build(target, ...@@ -310,7 +311,10 @@ def bazel_build(target,
if debug_mode: if debug_mode:
bazel_args += ("--config", "debug") bazel_args += ("--config", "debug")
if not address_sanitizer and not debug_mode: if not address_sanitizer and not debug_mode:
bazel_args += ("--config", "optimization") if toolchain == "darwin" or toolchain == "ios":
bazel_args += ("--config", "optimization_darwin")
else:
bazel_args += ("--config", "optimization")
if symbol_hidden: if symbol_hidden:
bazel_args += ("--config", "symbol_hidden") bazel_args += ("--config", "symbol_hidden")
if extra_args: if extra_args:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册