diff --git a/.travis.yml b/.travis.yml index b0ba55e06293a857defd7943f58ba3e5ed339c46..bc365d65cfbc958ae7bdd6ec189ac0d49c8949ed 100644 --- a/.travis.yml +++ b/.travis.yml @@ -59,6 +59,14 @@ jobs: env: TYPE=Ops-Test os: osx osx_image: xcode7.2 + - stage: Unit Test + script: + - echo "Ops Test On Darwin" + - python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=False --enable_openmp=false --target_abis=host || exit 1; + - bazel build "//mace/ops:ops_test" --config=ios --config=optimization_darwin --define openmp=false --define quantize=true --define neon=true --config symbol_hidden || exit 1; + env: TYPE=Ops-Test + os: osx + osx_image: xcode7.2 - stage: Unit Test script: - echo "Ops Test Without NEON" @@ -80,6 +88,13 @@ jobs: env: TYPE=Ops-Benchmark os: osx osx_image: xcode7.2 + - stage: Unit Test + script: + - python tools/bazel_adb_run.py --target="//mace/ops:ops_benchmark" --run_target=False --enable_openmp=false --target_abis=host || exit 1; + - bazel build "//mace/ops:ops_benchmark" --config=ios --config=optimization_darwin --define openmp=false --define quantize=true --define neon=true --config symbol_hidden || exit 1; + env: TYPE=Ops-Benchmark + os: osx + osx_image: xcode7.2 - stage: Unit Test script: - DYNAMIC_LIB_PATH="bazel-bin/mace/libmace/libmace.so" @@ -114,7 +129,7 @@ jobs: - python tools/bazel_adb_run.py --target="//mace/test:mace_api_test" --run_target=False --target_abis=armeabi-v7a || exit 1 - python tools/bazel_adb_run.py --target="//mace/test:mace_api_mt_test" --run_target=False --target_abis=armeabi-v7a || exit 1 - echo 'Extra Test' - - python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=False --target_abis=armeabi-v7a || exit 1 + - python tools/bazel_adb_run.py --target="//mace/utils:utils_test" --run_target=False --target_abis=armeabi-v7a || exit 1 env: TYPE=Extra-Test-ARMEABI-v7a os: linux dist: xenial @@ -130,3 +145,26 @@ jobs: os: linux dist: xenial sudo: required + - stage: Extra Test + script: + - bazel build "//mace/libmace:libmace_static" --config=darwin --config=optimization_darwin --define openmp=false --define quantize=true --config symbol_hidden || exit 1; + - bazel build "//mace/libmace:libmace_dynamic" --config=darwin --config=optimization_darwin --define openmp=false --define quantize=true --config symbol_hidden || exit 1; + env: TYPE=Build-Library + os: osx + osx_image: xcode7.2 + - stage: Extra Test + script: + - bazel build "//mace/libmace:libmace_static" --config=ios --config=optimization_darwin --define openmp=false --define quantize=true --define neon=true --config symbol_hidden || exit 1; + - bazel build "//mace/libmace:libmace_dynamic" --config=ios --config=optimization_darwin --define openmp=false --define quantize=true --define neon=true --config symbol_hidden || exit 1; + env: TYPE=Build-Library + os: osx + osx_image: xcode7.2 + - stage: Extra Test + script: + - echo 'API Test' + - python tools/bazel_adb_run.py --target="//mace/test:mace_api_test" --run_target=False --enable_openmp=false --target_abis=host || exit 1 + - python tools/bazel_adb_run.py --target="//mace/test:mace_api_mt_test" --run_target=False --enable_openmp=false --target_abis=host || exit 1 + - python tools/bazel_adb_run.py --target="//mace/utils:utils_test" --run_target=False --enable_openmp=false --target_abis=host || exit 1 + env: TYPE=Extra-Test-darwin_x86_64 + os: osx + osx_image: xcode7.2 diff --git a/WORKSPACE b/WORKSPACE index c8e20cee713794667b5ce305ac914e4fb4822d9b..524126a41b27444477f67688afc3acf140bad417 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -88,10 +88,10 @@ http_archive( http_archive( name = "tflite", - sha256 = "c886d46ad8c91fcafed2d910ad9e7bc5aeb29856c387bdf9b6b4903cc16e6e60", + sha256 = "1bb4571ee5cbde427ecfed076b39edaad96ace897ab86bb2495bdb93c706b203", strip_prefix = "tensorflow-mace-ffc8cc7e8c9d1894753509e88b17e251bc6255e3", urls = [ - "https://cnbj1.fds.api.xiaomi.com/mace/third-party/tflite/tensorflow-mace-ffc8cc7e8c9d1894753509e88b17e251bc6255e3.zip", + "http://cnbj1.fds.api.xiaomi.com/mace/third-party/tflite/tensorflow-mace-ffc8cc7e8c9d1894753509e88b17e251bc6255e3_custom.zip", ], ) diff --git a/mace/core/BUILD.bazel b/mace/core/BUILD.bazel index 91df4f0f1d0d0a66b2903575a4373b26897628cb..628a156d03041157d6639912c31505a6feda57e8 100644 --- a/mace/core/BUILD.bazel +++ b/mace/core/BUILD.bazel @@ -12,8 +12,8 @@ load( "if_android", "if_android_armv7", "if_hexagon_enabled", - "if_hta_enabled", "if_hexagon_or_hta_enabled", + "if_hta_enabled", "if_neon_enabled", "if_not_hexagon_enabled", "if_opencl_enabled", diff --git a/mace/core/runtime/opencl/cl2_header.h b/mace/core/runtime/opencl/cl2_header.h index 7b643ffa0f276402b343660379c000e61d4bb968..c97b600931b404abddee9d0b70c08418c995b76a 100644 --- a/mace/core/runtime/opencl/cl2_header.h +++ b/mace/core/runtime/opencl/cl2_header.h @@ -17,9 +17,30 @@ // Do not include cl2.hpp directly, include this header instead. +#include "mace/port/port-arch.h" + #define CL_HPP_MINIMUM_OPENCL_VERSION 110 + +#ifdef MACE_OS_MAC +#define CL_HPP_TARGET_OPENCL_VERSION 120 +#define CL_TARGET_OPENCL_VERSION 120 +#else #define CL_HPP_TARGET_OPENCL_VERSION 200 +#define CL_TARGET_OPENCL_VERSION 200 +#endif // MACE_OS_MAC + +#ifdef MACE_OS_MAC +// disable deprecated warning in macOS 10.14 +#define CL_SILENCE_DEPRECATION +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#pragma GCC diagnostic ignored "-Wignored-attributes" +#endif // MACE_OS_MAC #include "include/CL/cl2.hpp" +#ifdef MACE_OS_MAC +#pragma GCC diagnostic pop +#endif + #endif // MACE_CORE_RUNTIME_OPENCL_CL2_HEADER_H_ diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index 0a5f9460f1026670224dfa28738cca15486a206e..cd4fc9b7d5d30f43bdac59a22946660c87fde9d0 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -154,16 +154,19 @@ const std::string OpenCLErrorToString(cl_int error) { return "CL_INVALID_LINKER_OPTIONS"; case CL_INVALID_DEVICE_PARTITION_COUNT: return "CL_INVALID_DEVICE_PARTITION_COUNT"; +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 case CL_INVALID_PIPE_SIZE: return "CL_INVALID_PIPE_SIZE"; case CL_INVALID_DEVICE_QUEUE: return "CL_INVALID_DEVICE_QUEUE"; +#endif default: return MakeString("UNKNOWN: ", error); } } namespace { +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 void OpenCLPrintfCallback(const char *buffer, size_t length, size_t final, @@ -172,6 +175,7 @@ void OpenCLPrintfCallback(const char *buffer, MACE_UNUSED(user_data); fwrite(buffer, 1, length, stdout); } +#endif void GetAdrenoContextProperties(std::vector *properties, GPUPerfHint gpu_perf_hint, @@ -340,6 +344,7 @@ OpenCLRuntime::OpenCLRuntime( new cl::Context({*device_}, context_properties.data(), nullptr, nullptr, &err)); } else { +#if CL_HPP_TARGET_OPENCL_VERSION >= 200 if (is_profiling_enabled_ && gpu_type_ == GPUType::MALI) { std::vector context_properties = { CL_CONTEXT_PLATFORM, (cl_context_properties)default_platform(), @@ -353,6 +358,10 @@ OpenCLRuntime::OpenCLRuntime( context_ = std::shared_ptr( new cl::Context({*device_}, nullptr, nullptr, nullptr, &err)); } +#else + context_ = std::shared_ptr( + new cl::Context({*device_}, nullptr, nullptr, nullptr, &err)); +#endif } if (err != CL_SUCCESS) { LOG(ERROR) << "error: " << OpenCLErrorToString(err); diff --git a/mace/core/runtime/opencl/opencl_wrapper.cc b/mace/core/runtime/opencl/opencl_wrapper.cc index af9a620d6f5cef3417a0b7582611a9958f32bc66..eab1907b92e262eb3034fb907453279a380fb0ad 100644 --- a/mace/core/runtime/opencl/opencl_wrapper.cc +++ b/mace/core/runtime/opencl/opencl_wrapper.cc @@ -17,14 +17,25 @@ #include #include +#include "mace/port/port-arch.h" #include "mace/utils/logging.h" /** * Wrapper of OpenCL 2.0, based on file opencl20/CL/cl.h */ + +#ifdef MACE_OS_MAC +typedef cl_queue_properties_APPLE cl_queue_properties; +#endif + +#if CL_HPP_TARGET_OPENCL_VERSION < 200 +#define CL_API_SUFFIX__VERSION_2_0 +#endif + namespace mace { namespace runtime { + class OpenCLLibrary final { private: OpenCLLibrary(); diff --git a/mace/core/runtime/opencl/scratch_image.cc b/mace/core/runtime/opencl/scratch_image.cc index fc1ef0888aa8ef1581a5b5eda061fa403fc51548..2246dfdc37723ce3a880c901fa35fb9a87923fbd 100644 --- a/mace/core/runtime/opencl/scratch_image.cc +++ b/mace/core/runtime/opencl/scratch_image.cc @@ -17,6 +17,8 @@ #include #include +#include "mace/utils/memory.h" + namespace mace { ScratchImageManager::ScratchImageManager() = default; @@ -43,8 +45,7 @@ Image *ScratchImageManager::Spawn( // if not found if (found_image_idx == -1) { reference_count_.push_back(0); - images_[image_count] = - std::move(std::unique_ptr(new Image(allocator))); + images_[image_count] = make_unique(allocator); if (images_.at(image_count)->Allocate(shape, dt) != MaceStatus::MACE_SUCCESS) { return nullptr; diff --git a/mace/examples/cli/BUILD.bazel b/mace/examples/cli/BUILD.bazel index 70db67223b60ea701bfe9746561531afc89f27e9..efd4454dafa4fa6d790908b6234822532b0c4098 100644 --- a/mace/examples/cli/BUILD.bazel +++ b/mace/examples/cli/BUILD.bazel @@ -2,6 +2,7 @@ load( "//mace:mace.bzl", "if_android", + "if_darwin", "if_hexagon_enabled", "if_hta_enabled", "if_opencl_enabled", @@ -18,10 +19,12 @@ cc_binary( "-DMACE_ENABLE_OPENCL", ]), linkopts = [ - "-fuse-ld=gold", "-lm", "-ldl", - ] + if_openmp_enabled([ + ] + if_darwin( + [], + default_value = ["-fuse-ld=gold"], + ) + if_openmp_enabled([ "-fopenmp", ]) + if_android([ "-ldl", @@ -33,10 +36,11 @@ cc_binary( "//external:gflags_nothreads", "//mace/codegen:generated_mace_engine_factory", "//mace/codegen:generated_libmace", + "//mace/utils:utils_hdrs", + ] + if_opencl_enabled([ "//mace/codegen:generated_opencl_binary", "//mace/codegen:generated_opencl_parameter", - "//mace/utils:utils_hdrs", - ] + if_hexagon_enabled([ + ]) + if_hexagon_enabled([ "//third_party/nnlib:libhexagon", ]) + if_hta_enabled([ "//third_party/hta", @@ -54,10 +58,12 @@ cc_binary( "-DMACE_ENABLE_OPENCL", ]), linkopts = [ - "-fuse-ld=gold", "-lm", "-ldl", - ] + if_android([ + ] + if_darwin( + [], + default_value = ["-fuse-ld=gold"], + ) + if_android([ "-ldl", "-pie", "-llog", @@ -67,8 +73,9 @@ cc_binary( "//external:gflags_nothreads", "//mace/codegen:generated_libmace", "//mace/codegen:generated_mace_engine_factory", + "//mace/utils:utils_hdrs", + ] + if_opencl_enabled([ "//mace/codegen:generated_opencl_binary", "//mace/codegen:generated_opencl_parameter", - "//mace/utils:utils_hdrs", - ], + ]), ) diff --git a/mace/examples/cli/example.cc b/mace/examples/cli/example.cc index 89fa3e162f572c561cd24cbaef2751218803b413..cf55bf3080e44dd064c24cde9bee95ae8945edc8 100644 --- a/mace/examples/cli/example.cc +++ b/mace/examples/cli/example.cc @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/mace/libmace/BUILD.bazel b/mace/libmace/BUILD.bazel index 36eff0c80a76c3adb0b9e8738281974bf1aa2280..c1588792dc967ee55b24e373854ce633f65e3484 100644 --- a/mace/libmace/BUILD.bazel +++ b/mace/libmace/BUILD.bazel @@ -10,14 +10,14 @@ licenses(["notice"]) # Apache 2.0 load( "//mace:mace.bzl", "if_android", - "if_linux", - "if_darwin", - "if_neon_enabled", - "if_openmp_enabled", "if_android_armv7", + "if_darwin", "if_hexagon_enabled", "if_hta_enabled", + "if_linux", + "if_neon_enabled", "if_opencl_enabled", + "if_openmp_enabled", "if_quantize_enabled", ) @@ -53,11 +53,14 @@ cc_library( cc_binary( name = "libmace.so", - linkopts = [ - "-Wl,-soname,libmace.so", - "-Wl,--version-script", - "$(location //mace/libmace:mace_version_script.lds)", - ] + if_openmp_enabled([ + linkopts = if_darwin( + ["-Wl,-install_name,libmace.so"], + [ + "-Wl,-soname,libmace.so", + "-Wl,--version-script", + "$(location //mace/libmace:mace_version_script.lds)", + ], + ) + if_openmp_enabled([ "-fopenmp", ]), linkshared = 1, @@ -96,6 +99,7 @@ genrule( "//mace/public", "//mace/utils", "//mace/proto:mace_cc", + "//mace/port/linux_base:port_linux_base", "@com_google_protobuf//:protobuf_lite", ] + if_android([ "//mace/port/android:port_android", @@ -103,6 +107,7 @@ genrule( "//mace/port/linux:port_linux", ]) + if_darwin([ "//mace/port/darwin:port_darwin", + "//mace/port/darwin:darwin_ar_merge", ]) + if_opencl_enabled([ "//mace/ops:opencl_kernels", "//mace/codegen:generated_opencl", @@ -130,10 +135,12 @@ genrule( "$(locations //mace/port:port_base) " + "$(locations //mace/port/posix:port_posix) " + if_android( + "$(locations //mace/port/linux_base:port_linux_base) " + "$(locations //mace/port/android:port_android) ", default_value = "", ) + if_linux( + "$(locations //mace/port/linux_base:port_linux_base) " + "$(locations //mace/port/linux:port_linux) ", default_value = "", ) + @@ -151,7 +158,10 @@ genrule( ) + "$@ " + "$$tmp_mri_file);" + - "$(AR) -M <$$tmp_mri_file;" + + if_darwin( + "bash $(locations //mace/port/darwin:darwin_ar_merge) $(AR) < $$tmp_mri_file;", + default_value = "$(AR) -M <$$tmp_mri_file;", + ) + "rm -rf $$tmp_mri_file;", # "$(STRIP) -x $@;", # FIXME this will crash tools = ["//mace/python/tools:archive_static_lib"], diff --git a/mace/ops/BUILD.bazel b/mace/ops/BUILD.bazel index bbf5f34822b734eb6555702cc219454bcf4ec051..255250fd945e388981fb46f7fa5443f624059227 100644 --- a/mace/ops/BUILD.bazel +++ b/mace/ops/BUILD.bazel @@ -9,11 +9,11 @@ licenses(["notice"]) # Apache 2.0 load( "//mace:mace.bzl", "if_android", - "if_neon_enabled", - "if_openmp_enabled", "if_android_armv7", "if_hexagon_enabled", + "if_neon_enabled", "if_opencl_enabled", + "if_openmp_enabled", "if_quantize_enabled", ) @@ -55,7 +55,7 @@ cc_library( cc_library( name = "testing", hdrs = [ - "testing/test_utils.h", + "testing/test_utils.h", ], copts = [ "-Werror", @@ -450,7 +450,9 @@ cc_test( ]) + if_hexagon_enabled([ "-DMACE_ENABLE_HEXAGON", ]), - linkopts = ["-fopenmp"], + linkopts = if_openmp_enabled([ + "-fopenmp", + ]), linkstatic = 1, deps = [ ":ops", @@ -485,12 +487,15 @@ cc_test( ]) + if_hexagon_enabled([ "-DMACE_ENABLE_HEXAGON", ]), - linkopts = ["-fopenmp"], + linkopts = if_openmp_enabled([ + "-fopenmp", + ]), linkstatic = 1, deps = [ ":ops", "//mace/benchmark:statistics", "//mace/core:test_benchmark_main", "//third_party/eigen3", + "@gemmlowp", ], ) diff --git a/mace/ops/activation_benchmark.cc b/mace/ops/activation_benchmark.cc index 910281a563584dd5da39920b8a36225f822e76ce..9f78ee7ec966bd86577d76d8edf0cba9d6e76443 100644 --- a/mace/ops/activation_benchmark.cc +++ b/mace/ops/activation_benchmark.cc @@ -67,10 +67,15 @@ void ReluBenchmark(int iters, int batch, int channels, int height, int width) { } \ MACE_BENCHMARK(MACE_BM_RELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_RELU(N, C, H, W) \ MACE_BM_RELU_MACRO(N, C, H, W, float, CPU); \ MACE_BM_RELU_MACRO(N, C, H, W, float, GPU); \ - MACE_BM_RELU_MACRO(N, C, H, W, half, GPU); + MACE_BM_RELU_MACRO(N, C, H, W, half, GPU) +#else +#define MACE_BM_RELU(N, C, H, W) \ + MACE_BM_RELU_MACRO(N, C, H, W, float, CPU) +#endif MACE_BM_RELU(1, 1, 512, 512); MACE_BM_RELU(1, 3, 128, 128); @@ -123,10 +128,15 @@ void ReluxBenchmark(int iters, int batch, int channels, int height, int width) { } \ MACE_BENCHMARK(MACE_BM_RELUX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_RELUX(N, C, H, W) \ MACE_BM_RELUX_MACRO(N, C, H, W, float, CPU); \ MACE_BM_RELUX_MACRO(N, C, H, W, float, GPU); \ - MACE_BM_RELUX_MACRO(N, C, H, W, half, GPU); + MACE_BM_RELUX_MACRO(N, C, H, W, half, GPU) +#else +#define MACE_BM_RELUX(N, C, H, W) \ + MACE_BM_RELUX_MACRO(N, C, H, W, float, CPU) +#endif MACE_BM_RELUX(1, 1, 512, 512); MACE_BM_RELUX(1, 3, 128, 128); @@ -182,10 +192,15 @@ void PreluBenchmark(int iters, int batch, int channels, int height, int width) { } \ MACE_BENCHMARK(MACE_BM_PRELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_PRELU(N, C, H, W) \ MACE_BM_PRELU_MACRO(N, C, H, W, float, CPU); \ MACE_BM_PRELU_MACRO(N, C, H, W, float, GPU); \ - MACE_BM_PRELU_MACRO(N, C, H, W, half, GPU); + MACE_BM_PRELU_MACRO(N, C, H, W, half, GPU) +#else +#define MACE_BM_PRELU(N, C, H, W) \ + MACE_BM_PRELU_MACRO(N, C, H, W, float, CPU) +#endif MACE_BM_PRELU(1, 1, 512, 512); MACE_BM_PRELU(1, 3, 128, 128); @@ -237,10 +252,15 @@ void TanhBenchmark(int iters, int batch, int channels, int height, int width) { } \ MACE_BENCHMARK(MACE_BM_TANH_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_TANH(N, C, H, W) \ MACE_BM_TANH_MACRO(N, C, H, W, float, CPU); \ MACE_BM_TANH_MACRO(N, C, H, W, float, GPU); \ - MACE_BM_TANH_MACRO(N, C, H, W, half, GPU); + MACE_BM_TANH_MACRO(N, C, H, W, half, GPU) +#else +#define MACE_BM_TANH(N, C, H, W) \ + MACE_BM_TANH_MACRO(N, C, H, W, float, CPU) +#endif MACE_BM_TANH(1, 1, 512, 512); MACE_BM_TANH(1, 3, 128, 128); @@ -293,10 +313,15 @@ void SigmoidBenchmark( } \ MACE_BENCHMARK(MACE_BM_SIGMOID_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_SIGMOID(N, C, H, W) \ MACE_BM_SIGMOID_MACRO(N, C, H, W, float, CPU); \ MACE_BM_SIGMOID_MACRO(N, C, H, W, float, GPU); \ - MACE_BM_SIGMOID_MACRO(N, C, H, W, half, GPU); + MACE_BM_SIGMOID_MACRO(N, C, H, W, half, GPU) +#else +#define MACE_BM_SIGMOID(N, C, H, W) \ + MACE_BM_SIGMOID_MACRO(N, C, H, W, float, CPU) +#endif MACE_BM_SIGMOID(1, 1, 512, 512); MACE_BM_SIGMOID(1, 3, 128, 128); diff --git a/mace/ops/addn_benchmark.cc b/mace/ops/addn_benchmark.cc index 1a97107bfeb916c2af18ba549de0a913ba35fe60..1933597e626dea0c842e1165024ad255547b44fe 100644 --- a/mace/ops/addn_benchmark.cc +++ b/mace/ops/addn_benchmark.cc @@ -65,10 +65,15 @@ void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) { MACE_BENCHMARK( \ MACE_BM_ADDN_##INPUTS##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_ADDN(INPUTS, N, H, W, C) \ MACE_BM_ADDN_MACRO(INPUTS, N, H, W, C, float, CPU); \ MACE_BM_ADDN_MACRO(INPUTS, N, H, W, C, float, GPU); \ MACE_BM_ADDN_MACRO(INPUTS, N, H, W, C, half, GPU); +#else +#define MACE_BM_ADDN(INPUTS, N, H, W, C) \ + MACE_BM_ADDN_MACRO(INPUTS, N, H, W, C, float, CPU); +#endif MACE_BM_ADDN(2, 1, 256, 256, 32); MACE_BM_ADDN(2, 1, 128, 128, 32); diff --git a/mace/ops/batch_norm_benchmark.cc b/mace/ops/batch_norm_benchmark.cc index 74f7a013c14af8294aaabcddf5a7a29d8662edf1..939cb3cb2428287487576a5a5cfa1e2b83c3409f 100644 --- a/mace/ops/batch_norm_benchmark.cc +++ b/mace/ops/batch_norm_benchmark.cc @@ -80,10 +80,15 @@ void BatchNorm( } \ MACE_BENCHMARK(MACE_BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_BATCH_NORM(N, C, H, W) \ MACE_BM_BATCH_NORM_MACRO(N, C, H, W, float, CPU); \ MACE_BM_BATCH_NORM_MACRO(N, C, H, W, float, GPU); \ MACE_BM_BATCH_NORM_MACRO(N, C, H, W, half, GPU); +#else +#define MACE_BM_BATCH_NORM(N, C, H, W) \ + MACE_BM_BATCH_NORM_MACRO(N, C, H, W, float, CPU); +#endif MACE_BM_BATCH_NORM(1, 1, 512, 512); MACE_BM_BATCH_NORM(1, 3, 128, 128); diff --git a/mace/ops/batch_to_space_benchmark.cc b/mace/ops/batch_to_space_benchmark.cc index 607b50551efb34d4fe0a115e98a61d68c654ff2a..c05f4316b8c18cb4599b05260b63565099a1f77e 100644 --- a/mace/ops/batch_to_space_benchmark.cc +++ b/mace/ops/batch_to_space_benchmark.cc @@ -64,9 +64,14 @@ void BMBatchToSpace( MACE_BENCHMARK( \ MACE_BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_BATCH_TO_SPACE(N, H, W, C, ARG) \ MACE_BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, float, GPU); \ MACE_BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, float, CPU); +#else +#define MACE_BM_BATCH_TO_SPACE(N, H, W, C, ARG) \ + MACE_BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, float, CPU); +#endif MACE_BM_BATCH_TO_SPACE(128, 8, 8, 128, 2); MACE_BM_BATCH_TO_SPACE(4, 128, 128, 32, 2); diff --git a/mace/ops/bias_add.cc b/mace/ops/bias_add.cc index a8883e1431205f46e5abbb2a78f4b45d8537cec7..6606c2c257b2ead3dd756a8477e39f383a25b37c 100644 --- a/mace/ops/bias_add.cc +++ b/mace/ops/bias_add.cc @@ -100,7 +100,7 @@ class BiasAddOp : public Operation { explicit BiasAddOp(OpConstructContext *context) : Operation(context), has_data_format_(Operation::GetOptionalArg("has_data_format", 1)) { - MemoryType mem_type; + MemoryType mem_type = MemoryType::CPU_BUFFER; if (context->device()->gpu_runtime()->UseImageMemory()) { mem_type = MemoryType::GPU_IMAGE; kernel_ = make_unique>(); diff --git a/mace/ops/bias_add_benchmark.cc b/mace/ops/bias_add_benchmark.cc index 7de89dd2296829390eb1964911af5378c6edf9cc..34f6a713b3429fbf9da955b20df917f4a0b8bc32 100644 --- a/mace/ops/bias_add_benchmark.cc +++ b/mace/ops/bias_add_benchmark.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -70,10 +69,15 @@ void BiasAdd(int iters, int batch, int channels, int height, int width) { } \ MACE_BENCHMARK(MACE_BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_BIAS_ADD(N, C, H, W) \ MACE_BM_BIAS_ADD_MACRO(N, C, H, W, float, CPU); \ MACE_BM_BIAS_ADD_MACRO(N, C, H, W, float, GPU); \ MACE_BM_BIAS_ADD_MACRO(N, C, H, W, half, GPU); +#else +#define MACE_BM_BIAS_ADD(N, C, H, W) \ + MACE_BM_BIAS_ADD_MACRO(N, C, H, W, float, CPU); +#endif MACE_BM_BIAS_ADD(1, 1, 512, 512); MACE_BM_BIAS_ADD(1, 3, 128, 128); diff --git a/mace/ops/buffer_to_image_benchmark.cc b/mace/ops/buffer_to_image_benchmark.cc index 8249c344bb4c7fed189aeae4afee3f42fce6c70c..92733d61b0f028074604b7840202507768b70e38 100644 --- a/mace/ops/buffer_to_image_benchmark.cc +++ b/mace/ops/buffer_to_image_benchmark.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifdef MACE_ENABLE_OPENCL + #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/opencl/buffer_transformer.h" @@ -96,3 +98,5 @@ MACE_BM_B2I(256, 32, 3, 3); } // namespace test } // namespace ops } // namespace mace + +#endif // MACE_ENABLE_OPENCL diff --git a/mace/ops/buffer_to_image_test.cc b/mace/ops/buffer_to_image_test.cc index 7a9d05138d463c6b178c81d32e70ae5eff87f464..a819b6a703859b2c111f23b3971eddd36a670be4 100644 --- a/mace/ops/buffer_to_image_test.cc +++ b/mace/ops/buffer_to_image_test.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifdef MACE_ENABLE_OPENCL + #include "gtest/gtest.h" #include "mace/ops/ops_test_util.h" #include "mace/ops/opencl/buffer_transformer.h" @@ -242,3 +244,5 @@ TEST(BufferToImageTest, ArgStringHalfToHalfSmall) { } // namespace test } // namespace ops } // namespace mace + +#endif // MACE_ENABLE_OPENCL diff --git a/mace/ops/buffer_transform_test.cc b/mace/ops/buffer_transform_test.cc index c0cd8038fe4dbc25c6928da1d7de41188f724146..b3f68a31ae854726e56b93f626c3bcb4ba24dac3 100644 --- a/mace/ops/buffer_transform_test.cc +++ b/mace/ops/buffer_transform_test.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifdef MACE_ENABLE_OPENCL + #include #include "gtest/gtest.h" @@ -111,3 +113,6 @@ TEST_F(BufferTransformTest, Argument) { } // namespace test } // namespace ops } // namespace mace + + +#endif // MACE_ENABLE_OPENCL diff --git a/mace/ops/channel_shuffle_benchmark.cc b/mace/ops/channel_shuffle_benchmark.cc index 40fe2b03617af74a64c6f469239fa66a97912f46..456ea8c3b6389521a90c7fe20f5ca1e7370eb997 100644 --- a/mace/ops/channel_shuffle_benchmark.cc +++ b/mace/ops/channel_shuffle_benchmark.cc @@ -67,10 +67,15 @@ void ChannelShuffle( MACE_BENCHMARK( \ MACE_BM_CHANNEL_SHUFFLE_##N##_##C##_##H##_##W##_##G##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_CHANNEL_SHUFFLE(N, C, H, W, G) \ MACE_BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, CPU); \ MACE_BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, GPU); \ MACE_BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, half, GPU); +#else +#define MACE_BM_CHANNEL_SHUFFLE(N, C, H, W, G) \ + MACE_BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, CPU); +#endif MACE_BM_CHANNEL_SHUFFLE(1, 64, 64, 64, 8); MACE_BM_CHANNEL_SHUFFLE(1, 64, 128, 128, 8); diff --git a/mace/ops/concat_benchmark.cc b/mace/ops/concat_benchmark.cc index 22eb544f96f15465177170868bdf4e68bcf46ab4..e67cfe991f5db9bf2d7bf0cf7f2cdcd08ba0c86d 100644 --- a/mace/ops/concat_benchmark.cc +++ b/mace/ops/concat_benchmark.cc @@ -63,9 +63,14 @@ void ConcatHelper(int iters, int concat_dim, int dim0, int dim1) { } \ MACE_BENCHMARK(MACE_BM_CONCAT_CPU_##AXIS##_##DIM0##_##DIM1##_##TYPE) +#ifdef MACE_ENABLE_QUANTIZE #define MACE_BM_CONCAT_CPU(AXIS, DIM0, DIM1) \ MACE_BM_CONCAT_CPU_MACRO(AXIS, DIM0, DIM1, float); \ - MACE_BM_CONCAT_CPU_MACRO(AXIS, DIM0, DIM1, uint8_t); \ + MACE_BM_CONCAT_CPU_MACRO(AXIS, DIM0, DIM1, uint8_t) +#else +#define MACE_BM_CONCAT_CPU(AXIS, DIM0, DIM1) \ + MACE_BM_CONCAT_CPU_MACRO(AXIS, DIM0, DIM1, float) +#endif MACE_BM_CONCAT_CPU(0, 100, 1000); MACE_BM_CONCAT_CPU(0, 100, 100000); @@ -73,6 +78,7 @@ MACE_BM_CONCAT_CPU(1, 100, 1000); MACE_BM_CONCAT_CPU(1, 100, 100000); MACE_BM_CONCAT_CPU(1, 1225, 128); +#ifdef MACE_ENABLE_OPENCL namespace { template void OpenCLConcatHelper(int iters, @@ -129,6 +135,8 @@ MACE_BM_CONCAT_OPENCL_MACRO(3, 32, 32, 64, half); MACE_BM_CONCAT_OPENCL_MACRO(3, 32, 32, 128, half); MACE_BM_CONCAT_OPENCL_MACRO(3, 32, 32, 256, half); +#endif // MACE_ENABLE_OPENCL + } // namespace test } // namespace ops } // namespace mace diff --git a/mace/ops/conv_2d_benchmark.cc b/mace/ops/conv_2d_benchmark.cc index a930ea909d507ca4b48c21ec8420be05fb617092..12bae8912dac006af50ef04245dad61120aa1d3b 100644 --- a/mace/ops/conv_2d_benchmark.cc +++ b/mace/ops/conv_2d_benchmark.cc @@ -79,6 +79,7 @@ void Conv2d(int iters, } } +#ifdef MACE_ENABLE_QUANTIZE template <> void Conv2d(int iters, int batch, @@ -132,6 +133,7 @@ void Conv2d(int iters, net.Sync(); } } +#endif } // namespace @@ -167,12 +169,25 @@ void Conv2d(int iters, MACE_BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##D##\ DILATION##_##P##_##OC##_##TYPE##_##DEVICE) +#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE) #define MACE_BM_CONV_2D(N, C, H, W, KH, KW, S, D, P, OC) \ MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, CPU); \ MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, GPU); \ MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, half, GPU); \ - MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, uint8_t, CPU); - + MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, uint8_t, CPU) +#elif defined(MACE_ENABLE_OPENCL) +#define MACE_BM_CONV_2D(N, C, H, W, KH, KW, S, D, P, OC) \ + MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, CPU); \ + MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, GPU); \ + MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, half, GPU) +#elif defined(MACE_ENABLE_QUANTIZE) +#define MACE_BM_CONV_2D(N, C, H, W, KH, KW, S, D, P, OC) \ + MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, CPU); \ + MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, uint8_t, CPU) +#else +#define MACE_BM_CONV_2D(N, C, H, W, KH, KW, S, D, P, OC) \ + MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, CPU) +#endif // Filter sizes and data alignments diff --git a/mace/ops/conv_2d_test.cc b/mace/ops/conv_2d_test.cc index 59eb532bcb241fd5a484766077384a1e771ef721..49d11700a19668082a43efe8008f07ae8123acb4 100644 --- a/mace/ops/conv_2d_test.cc +++ b/mace/ops/conv_2d_test.cc @@ -25,7 +25,9 @@ namespace test { class Conv2dOpTest : public OpsTestBase { protected: virtual void SetUp() { +#ifdef MACE_ENABLE_OPENCL OpTestContext::Get()->SetOCLImageTestFlag(); +#endif } }; diff --git a/mace/ops/crop_benchmark.cc b/mace/ops/crop_benchmark.cc index 724d8ca2958360e991031b003af59f4a3f27b183..e51a555dcf8bde9e0defcdea0c8793ad3658a3ac 100644 --- a/mace/ops/crop_benchmark.cc +++ b/mace/ops/crop_benchmark.cc @@ -41,10 +41,12 @@ void CropHelper(int iters, auto input_shape1 = TransposeShape(shape1, {0, 3, 1, 2}); net.AddRandomInput("Input0", input_shape0); net.AddRandomInput("Input1", input_shape1); +#ifdef MACE_ENABLE_OPENCL } else if (D == DeviceType::GPU) { // Add input data net.AddRandomInput("Input0", shape0); net.AddRandomInput("Input1", shape1); +#endif // MACE_ENABLE_OPENCL } else { MACE_NOT_IMPLEMENTED; } @@ -85,17 +87,21 @@ void CropHelper(int iters, MACE_BENCHMARK(MACE_BM_CROP_##N##_##H##_##W##_##C##_##AXIS##_##OFFSET\ ##_##DEVICE##_##TYPE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_CROP(N, H, W, C, AXIS, OFFSET) \ MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, CPU, float); \ MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, GPU, float); \ - MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, GPU, half); + MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, GPU, half) +#else +#define MACE_BM_CROP(N, H, W, C, AXIS, OFFSET) \ + MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, CPU, float) +#endif // MACE_ENABLE_OPENCL MACE_BM_CROP(4, 32, 32, 32, 2, 4); MACE_BM_CROP(8, 32, 32, 64, 1, 0); MACE_BM_CROP(8, 32, 32, 128, 0, 0); MACE_BM_CROP(8, 32, 32, 256, 2, 4); - } // namespace test } // namespace ops } // namespace mace diff --git a/mace/ops/deconv_2d_benchmark.cc b/mace/ops/deconv_2d_benchmark.cc index 0144bc595c04ab7decd2bd543846b8b575f4c55c..8a16e6918342e6d514ace4d7dbe11f9a77cfebb7 100644 --- a/mace/ops/deconv_2d_benchmark.cc +++ b/mace/ops/deconv_2d_benchmark.cc @@ -89,8 +89,6 @@ static void Deconv2d(int iters, ##OW##_##P##_##OC##_##TYPE##_##DEVICE( \ int iters) { \ const int64_t tot = static_cast(iters) * N * C * H * W; \ - int64_t oh = OH; \ - int64_t ow = OW; \ const int64_t macs = \ static_cast(iters) * mace::benchmark::StatMACs( \ "Deconv2D", {OC, C, KH, KW}, {N, OH, OW, OC}); \ @@ -104,10 +102,15 @@ static void Deconv2d(int iters, ##OW##_##P##_##OC##_##TYPE##_##DEVICE) // TODO(liutuo): add cpu benchmark when optimized. +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_DECONV_2D(N, C, H, W, KH, KW, S, OH, OW, P, OC) \ MACE_BM_DECONV_2D_MACRO(N, C, H, W, KH, KW, S, OH, OW, P, OC, float, CPU); \ MACE_BM_DECONV_2D_MACRO(N, C, H, W, KH, KW, S, OH, OW, P, OC, float, GPU); \ - MACE_BM_DECONV_2D_MACRO(N, C, H, W, KH, KW, S, OH, OW, P, OC, half, GPU); + MACE_BM_DECONV_2D_MACRO(N, C, H, W, KH, KW, S, OH, OW, P, OC, half, GPU) +#else +#define MACE_BM_DECONV_2D(N, C, H, W, KH, KW, S, OH, OW, P, OC) \ + MACE_BM_DECONV_2D_MACRO(N, C, H, W, KH, KW, S, OH, OW, P, OC, float, CPU) +#endif MACE_BM_DECONV_2D(1, 32, 60, 60, 1, 1, 1, 60, 60, VALID, 128); diff --git a/mace/ops/depth_to_space_benchmark.cc b/mace/ops/depth_to_space_benchmark.cc index 445ce08e5590ca437dcdd7c2d001a9d09ada941f..e6f32ff587d61a376c73e6b6a3fe06163a1f9717 100644 --- a/mace/ops/depth_to_space_benchmark.cc +++ b/mace/ops/depth_to_space_benchmark.cc @@ -68,10 +68,15 @@ void DepthToSpace( MACE_BENCHMARK( \ MACE_BM_DEPTH_TO_SPACE_##N##_##C##_##H##_##W##_##G##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_DEPTH_TO_SPACE(N, C, H, W, G) \ MACE_BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, float, CPU); \ MACE_BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, float, GPU); \ MACE_BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, half, GPU); +#else +#define MACE_BM_DEPTH_TO_SPACE(N, C, H, W, G) \ + MACE_BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, float, CPU) +#endif MACE_BM_DEPTH_TO_SPACE(1, 64, 64, 64, 4); MACE_BM_DEPTH_TO_SPACE(1, 64, 128, 128, 4); diff --git a/mace/ops/depthwise_conv2d_benchmark.cc b/mace/ops/depthwise_conv2d_benchmark.cc index f0adb412fe7afcc86b848963566e193553160e9b..d04dfb3a8f50daa0dcb2718909aee7aadbeeeab5 100644 --- a/mace/ops/depthwise_conv2d_benchmark.cc +++ b/mace/ops/depthwise_conv2d_benchmark.cc @@ -128,11 +128,25 @@ void DepthwiseConv2d(int iters, MACE_BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE\ ##_##P##_##M##_##TYPE##_##DEVICE) +#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE) #define MACE_BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, M) \ MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, CPU); \ MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, GPU); \ MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, half, GPU); \ - MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, uint8_t, CPU); + MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, uint8_t, CPU) +#elif defined(MACE_ENABLE_OPENCL) +#define MACE_BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, M) \ + MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, CPU); \ + MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, GPU); \ + MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, half, GPU) +#elif defined(MACE_ENABLE_QUANTIZE) +#define MACE_BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, M) \ + MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, CPU); \ + MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, uint8_t, CPU) +#else +#define MACE_BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, M) \ + MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, CPU) +#endif MACE_BM_DEPTHWISE_CONV_2D(1, 32, 112, 112, 3, 3, 1, SAME, 1); MACE_BM_DEPTHWISE_CONV_2D(1, 32, 56, 56, 3, 3, 2, VALID, 1); diff --git a/mace/ops/depthwise_deconv2d_benchmark.cc b/mace/ops/depthwise_deconv2d_benchmark.cc index 166922ec96cb7a3f53b807c97eb94aaa2cd22e1f..969c0ecc9ec30ae718eeae8032a56e02672792f4 100644 --- a/mace/ops/depthwise_deconv2d_benchmark.cc +++ b/mace/ops/depthwise_deconv2d_benchmark.cc @@ -93,10 +93,15 @@ static void DepthwiseDeconv2d(int iters, MACE_BM_DEPTHWISE_DECONV2D_##N##_##C##_##H##_##W##_##KH##_##KW##_##S##_##P\ ##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_DEPTHWISE_DECONV2D(N, C, H, W, KH, KW, S, P) \ MACE_BM_DEPTHWISE_DECONV2D_MACRO(N, C, H, W, KH, KW, S, P, float, CPU); \ MACE_BM_DEPTHWISE_DECONV2D_MACRO(N, C, H, W, KH, KW, S, P, float, GPU); \ MACE_BM_DEPTHWISE_DECONV2D_MACRO(N, C, H, W, KH, KW, S, P, half, GPU); +#else +#define MACE_BM_DEPTHWISE_DECONV2D(N, C, H, W, KH, KW, S, P) \ + MACE_BM_DEPTHWISE_DECONV2D_MACRO(N, C, H, W, KH, KW, S, P, float, CPU) +#endif MACE_BM_DEPTHWISE_DECONV2D(1, 128, 15, 15, 1, 1, 1, 0); MACE_BM_DEPTHWISE_DECONV2D(1, 32, 60, 60, 1, 1, 1, 0); diff --git a/mace/ops/eltwise_benchmark.cc b/mace/ops/eltwise_benchmark.cc index a1959e9df5c388dd6a3605538e83558f3d4e563d..5750132ddb2498b8d0aa08d593cc50d96059efb0 100644 --- a/mace/ops/eltwise_benchmark.cc +++ b/mace/ops/eltwise_benchmark.cc @@ -80,10 +80,15 @@ void EltwiseBenchmark( MACE_BENCHMARK( \ MACE_BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_ELTWISE(ELT_TYPE, N, H, W, C) \ MACE_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, CPU); \ MACE_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, GPU); \ - MACE_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, half, GPU); + MACE_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, half, GPU) +#else +#define MACE_BM_ELTWISE(ELT_TYPE, N, H, W, C) \ + MACE_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, CPU) +#endif MACE_BM_ELTWISE(2, 1, 128, 128, 32); MACE_BM_ELTWISE(2, 1, 240, 240, 256); @@ -93,8 +98,10 @@ MACE_BM_ELTWISE(0, 1, 240, 240, 256); MACE_BM_ELTWISE(5, 1, 128, 128, 32); MACE_BM_ELTWISE(5, 1, 240, 240, 256); +#ifdef MACE_ENABLE_QUANTIZE MACE_BM_ELTWISE_MACRO(0, 1, 128, 128, 32, uint8_t, CPU); MACE_BM_ELTWISE_MACRO(1, 1, 128, 128, 32, uint8_t, CPU); +#endif } // namespace test } // namespace ops diff --git a/mace/ops/fully_connected.cc b/mace/ops/fully_connected.cc index 22d45ea7c5de05eff05f2ad1fa30c9bcd92f6b7d..04745a055cfd519e8df365e430d952b206c843e9 100644 --- a/mace/ops/fully_connected.cc +++ b/mace/ops/fully_connected.cc @@ -185,7 +185,7 @@ class FullyConnectedOp : public FullyConnectedOpBase { public: explicit FullyConnectedOp(OpConstructContext *context) : FullyConnectedOpBase(context) { - MemoryType mem_type; + MemoryType mem_type = MemoryType::CPU_BUFFER; if (context->device()->gpu_runtime()->UseImageMemory()) { mem_type = MemoryType::GPU_IMAGE; kernel_ = make_unique>(); diff --git a/mace/ops/fully_connected_benchmark.cc b/mace/ops/fully_connected_benchmark.cc index 7e899c222013f143893818e103937206547b4f00..2b5bb07404f2ccbfae597ba09b1807b1ab5b2e89 100644 --- a/mace/ops/fully_connected_benchmark.cc +++ b/mace/ops/fully_connected_benchmark.cc @@ -41,13 +41,15 @@ void FCBenchmark( {out_channel, channel, height, width}, true); net.AddRandomInput("Bias", {out_channel}, true); - OpenCLBufferType weight_type = OpenCLBufferType::WEIGHT_WIDTH; OpDefBuilder("FullyConnected", "FullyConnectedTest") .Input("Input") .Input("Weight") .Input("Bias") .Output("Output") - .AddIntArg("weight_type", static_cast(weight_type)) +#ifdef MACE_ENABLE_OPENCL + .AddIntArg("weight_type", + static_cast(OpenCLBufferType::WEIGHT_WIDTH)) +#endif .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); @@ -64,6 +66,7 @@ void FCBenchmark( net.Sync(); } +#ifdef MACE_ENABLE_QUANTIZE template <> void FCBenchmark( int iters, int batch, int height, int width, int channel, int out_channel) { @@ -100,6 +103,8 @@ void FCBenchmark( net.Run(); } } +#endif // MACE_ENABLE_QUANTIZE + } // namespace #define MACE_BM_FC_MACRO(N, H, W, C, OC, TYPE, DEVICE) \ @@ -116,11 +121,25 @@ void FCBenchmark( } \ MACE_BENCHMARK(MACE_BM_FC_##N##_##H##_##W##_##C##_##OC##_##TYPE##_##DEVICE) +#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE) #define MACE_BM_FC(N, H, W, C, OC) \ MACE_BM_FC_MACRO(N, H, W, C, OC, float, CPU); \ MACE_BM_FC_MACRO(N, H, W, C, OC, float, GPU); \ MACE_BM_FC_MACRO(N, H, W, C, OC, half, GPU); \ - MACE_BM_FC_MACRO(N, H, W, C, OC, uint8_t, CPU); + MACE_BM_FC_MACRO(N, H, W, C, OC, uint8_t, CPU) +#elif defined(MACE_ENABLE_OPENCL) +#define MACE_BM_FC(N, H, W, C, OC) \ + MACE_BM_FC_MACRO(N, H, W, C, OC, float, CPU); \ + MACE_BM_FC_MACRO(N, H, W, C, OC, float, GPU); \ + MACE_BM_FC_MACRO(N, H, W, C, OC, half, GPU) +#elif defined(MACE_ENABLE_QUANTIZE) +#define MACE_BM_FC(N, H, W, C, OC) \ + MACE_BM_FC_MACRO(N, H, W, C, OC, float, CPU); \ + MACE_BM_FC_MACRO(N, H, W, C, OC, uint8_t, CPU) +#else +#define MACE_BM_FC(N, H, W, C, OC) \ + MACE_BM_FC_MACRO(N, H, W, C, OC, float, CPU) +#endif MACE_BM_FC(1, 16, 16, 32, 32); MACE_BM_FC(1, 8, 8, 32, 1000); diff --git a/mace/ops/lstmcell_benchmark.cc b/mace/ops/lstmcell_benchmark.cc index 697674cb2e853d3e0b24275a62e0d0d8bd2510b7..9f16a51b9c8c1630e3d82033e097cd5c2d1ef6cc 100644 --- a/mace/ops/lstmcell_benchmark.cc +++ b/mace/ops/lstmcell_benchmark.cc @@ -90,10 +90,15 @@ void LSTMCell(int iters, int batch, int input_size, int hidden_units) { MACE_BENCHMARK( \ MACE_BM_LSTMCELL_##N##_##INPUT_SIZE##_##HIDDEN_UNITS##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_LSTMCELL(N, INPUT_SIZE, HIDDEN_UNITS) \ MACE_BM_LSTMCELL_MACRO(N, INPUT_SIZE, HIDDEN_UNITS, float, CPU); \ MACE_BM_LSTMCELL_MACRO(N, INPUT_SIZE, HIDDEN_UNITS, float, GPU); \ - MACE_BM_LSTMCELL_MACRO(N, INPUT_SIZE, HIDDEN_UNITS, half, GPU); + MACE_BM_LSTMCELL_MACRO(N, INPUT_SIZE, HIDDEN_UNITS, half, GPU) +#else +#define MACE_BM_LSTMCELL(N, INPUT_SIZE, HIDDEN_UNITS) \ + MACE_BM_LSTMCELL_MACRO(N, INPUT_SIZE, HIDDEN_UNITS, float, CPU) +#endif MACE_BM_LSTMCELL(1, 64, 256); MACE_BM_LSTMCELL(30, 64, 256); diff --git a/mace/ops/matmul_benchmark.cc b/mace/ops/matmul_benchmark.cc index 308113ffcc380d67fd39f89bcb487fce628d77e9..2ab46bac2a09c9e3d95cf9980c006fef284d138a 100644 --- a/mace/ops/matmul_benchmark.cc +++ b/mace/ops/matmul_benchmark.cc @@ -106,6 +106,7 @@ void MatmulBenchmark_Eigen(int iters, int m, int k, int n) { } } +#ifdef MACE_ENABLE_QUANTIZE void MatmulBenchmark_gemmlowp_uint8(int iters, int rows, int depth, int cols) { mace::testing::StopTiming(); @@ -181,6 +182,7 @@ void MatmulBenchmark_gemmlowp_int32(int iters, int rows, int depth, int cols) { -128, output_pipeline); } } +#endif } // namespace @@ -195,10 +197,16 @@ void MatmulBenchmark_gemmlowp_int32(int iters, int rows, int depth, int cols) { } \ MACE_BENCHMARK(MACE_BM_MATMUL_##M##_##K##_##N##_##FUNC) +#ifdef MACE_ENABLE_QUANTIZE #define MACE_BM_MATMUL(M, K, N) \ MACE_BM_MATMUL_FUNC(M, K, N, Eigen, float); \ MACE_BM_MATMUL_FUNC(M, K, N, gemmlowp_uint8, uint8_t); \ MACE_BM_MATMUL_FUNC(M, K, N, gemmlowp_int32, uint8_t); +#else +#define MACE_BM_MATMUL(M, K, N) \ + MACE_BM_MATMUL_FUNC(M, K, N, Eigen, float) +#endif + // Embedding size 384 MACE_BM_MATMUL(7, 384, 384); @@ -247,7 +255,7 @@ MACE_BM_MATMUL(512, 512, 196); MACE_BM_MATMUL(1024, 1024, 49); namespace { -template +template void MatMulBenchmark( int iters, int batch, int height, int channels, int out_width) { mace::testing::StopTiming(); @@ -289,7 +297,7 @@ void MatMulBenchmark( net.Sync(); } -template +template void MatMulTransposeBenchmark( int iters, int batch, int height, int channels, int out_width) { mace::testing::StopTiming(); @@ -349,9 +357,14 @@ void MatMulTransposeBenchmark( } \ MACE_BENCHMARK(MACE_BM_MATMUL_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_QUANTIZE #define MACE_BM_MATMUL_OP(N, H, C, W) \ MACE_BM_MATMUL_MACRO(N, H, C, W, float, CPU); \ - MACE_BM_MATMUL_MACRO(N, H, C, W, uint8_t, CPU); + MACE_BM_MATMUL_MACRO(N, H, C, W, uint8_t, CPU) +#else +#define MACE_BM_MATMUL_OP(N, H, C, W) \ + MACE_BM_MATMUL_MACRO(N, H, C, W, float, CPU) +#endif #define MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, TYPE, DEVICE) \ static void MACE_BM_MATMUL_##T_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE( \ @@ -365,9 +378,14 @@ void MatMulTransposeBenchmark( } \ MACE_BENCHMARK(MACE_BM_MATMUL_##T_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_QUANTIZE #define MACE_BM_MATMUL_TRANPOSE(N, H, C, W) \ MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float, CPU); \ - MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, uint8_t, CPU); + MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, uint8_t, CPU) +#else +#define MACE_BM_MATMUL_TRANPOSE(N, H, C, W) \ + MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float, CPU) +#endif MACE_BM_MATMUL_OP(1, 30000, 256, 1); MACE_BM_MATMUL_OP(1, 128, 256, 128); diff --git a/mace/ops/ops_test_util.cc b/mace/ops/ops_test_util.cc index 25de146a59db15f456a0941c14222fc30a5a54e7..c1569204bdc11895ff47392838e9987bdf2ef75b 100644 --- a/mace/ops/ops_test_util.cc +++ b/mace/ops/ops_test_util.cc @@ -20,7 +20,6 @@ namespace mace { namespace ops { namespace test { - OpDefBuilder::OpDefBuilder(const char *type, const std::string &name) { op_def_.set_type(type); op_def_.set_name(name); @@ -102,11 +101,13 @@ void OpDefBuilder::Finalize(OperatorDef *op_def) const { } namespace { +#ifdef MACE_ENABLE_OPENCL std::string GetStoragePathFromEnv() { char *storage_path_str = getenv("MACE_INTERNAL_STORAGE_PATH"); if (storage_path_str == nullptr) return ""; return storage_path_str; } +#endif } // namespace OpTestContext *OpTestContext::Get(int num_threads, @@ -120,27 +121,35 @@ OpTestContext *OpTestContext::Get(int num_threads, OpTestContext::OpTestContext(int num_threads, CPUAffinityPolicy cpu_affinity_policy, + +#ifdef MACE_ENABLE_OPENCL bool use_gemmlowp) : gpu_context_(std::make_shared(GetStoragePathFromEnv())), opencl_mem_types_({MemoryType::GPU_IMAGE}) { +#else + bool use_gemmlowp) { +#endif device_map_[DeviceType::CPU] = make_unique( num_threads, cpu_affinity_policy, use_gemmlowp); +#ifdef MACE_ENABLE_OPENCL device_map_[DeviceType::GPU] = make_unique( gpu_context_->opencl_tuner(), gpu_context_->opencl_cache_storage(), GPUPriorityHint::PRIORITY_NORMAL, GPUPerfHint::PERF_HIGH); -} - -std::shared_ptr OpTestContext::gpu_context() const { - return gpu_context_; +#endif // MACE_ENABLE_OPENCL } Device *OpTestContext::GetDevice(DeviceType device_type) { return device_map_[device_type].get(); } +#ifdef MACE_ENABLE_OPENCL +std::shared_ptr OpTestContext::gpu_context() const { + return gpu_context_; +} + std::vector OpTestContext::opencl_mem_types() { return opencl_mem_types_; } @@ -156,6 +165,7 @@ void OpTestContext::SetOCLImageTestFlag() { void OpTestContext::SetOCLImageAndBufferTestFlag() { opencl_mem_types_ = {MemoryType::GPU_IMAGE, MemoryType::GPU_BUFFER}; } +#endif // MACE_ENABLE_OPENCL bool OpsTestNet::Setup(mace::DeviceType device) { NetDef net_def; @@ -227,6 +237,7 @@ MaceStatus OpsTestNet::Run() { MaceStatus OpsTestNet::RunOp(mace::DeviceType device) { if (device == DeviceType::GPU) { +#ifdef MACE_ENABLE_OPENCL auto opencl_mem_types = OpTestContext::Get()->opencl_mem_types(); for (auto type : opencl_mem_types) { OpTestContext::Get()->GetDevice(device) @@ -235,6 +246,9 @@ MaceStatus OpsTestNet::RunOp(mace::DeviceType device) { MACE_RETURN_IF_ERROR(Run()); } return MaceStatus::MACE_SUCCESS; +#else + return MaceStatus::MACE_UNSUPPORTED; +#endif } else { Setup(device); return Run(); diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index 8226079711535766f30e06626b80110c4883b82a..871803234236de5c3833468dfa785dd339e3ee16 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -28,8 +28,6 @@ #include "gtest/gtest.h" #include "mace/core/net.h" #include "mace/core/device_context.h" -#include "mace/core/runtime/opencl/gpu_device.h" -#include "mace/core/runtime/opencl/opencl_util.h" #include "mace/core/tensor.h" #include "mace/core/workspace.h" #include "mace/ops/ops_registry.h" @@ -39,6 +37,11 @@ #include "mace/utils/quantize.h" #include "mace/ops/testing/test_utils.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/gpu_device.h" +#include "mace/core/runtime/opencl/opencl_util.h" +#endif + namespace mace { namespace ops { namespace test { @@ -78,21 +81,28 @@ class OpTestContext { int num_threads = -1, CPUAffinityPolicy cpu_affinity_policy = AFFINITY_BIG_ONLY, bool use_gemmlowp = true); - std::shared_ptr gpu_context() const; Device *GetDevice(DeviceType device_type); + +#ifdef MACE_ENABLE_OPENCL + std::shared_ptr gpu_context() const; std::vector opencl_mem_types(); void SetOCLBufferTestFlag(); void SetOCLImageTestFlag(); void SetOCLImageAndBufferTestFlag(); +#endif + private: OpTestContext(int num_threads, CPUAffinityPolicy cpu_affinity_policy, bool use_gemmlowp); MACE_DISABLE_COPY_AND_ASSIGN(OpTestContext); + std::map> device_map_; + +#ifdef MACE_ENABLE_OPENCL std::shared_ptr gpu_context_; std::vector opencl_mem_types_; - std::map> device_map_; +#endif }; class OpsTestNet { @@ -420,7 +430,9 @@ class OpsTestBase : public ::testing::Test { } virtual void TearDown() { +#ifdef MACE_ENABLE_OPENCL OpTestContext::Get()->SetOCLImageTestFlag(); +#endif } }; diff --git a/mace/ops/pad_benchmark.cc b/mace/ops/pad_benchmark.cc index b449e02f9166c21620daf289baac89b34c25b37f..0d629c8ece9afa40a5ed68ecae3f5b93a52bf7f0 100644 --- a/mace/ops/pad_benchmark.cc +++ b/mace/ops/pad_benchmark.cc @@ -71,10 +71,15 @@ void Pad(int iters, int batch, int height, MACE_BENCHMARK(MACE_BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##MODE##_##TYPE \ ##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_PAD_MODE(N, H, W, C, PAD, MODE) \ MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, float, CPU); \ MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, float, GPU); \ - MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, half, GPU); + MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, half, GPU) +#else +#define MACE_BM_PAD_MODE(N, H, W, C, PAD, MODE) \ + MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, float, CPU) +#endif #define MACE_BM_PAD(N, H, W, C, PAD) \ MACE_BM_PAD_MODE(N, H, W, C, PAD, CONSTANT); \ diff --git a/mace/ops/pooling_benchmark.cc b/mace/ops/pooling_benchmark.cc index a8b6458c8df4a25cb37cf339248a2e9b9a4ad28a..6dc0aad21086919ec77225cddcc73910a1cf650b 100644 --- a/mace/ops/pooling_benchmark.cc +++ b/mace/ops/pooling_benchmark.cc @@ -89,11 +89,25 @@ void Pooling(int iters, MACE_BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_\ ##TYPE##_##DEVICE) +#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE) #define MACE_BM_POOLING(N, C, H, W, K, S, PA, PO) \ MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, CPU); \ MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, GPU); \ MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, half, GPU); \ - MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, uint8_t, CPU); + MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, uint8_t, CPU) +#elif defined(MACE_ENABLE_OPENCL) +#define MACE_BM_POOLING(N, C, H, W, K, S, PA, PO) \ + MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, CPU); \ + MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, GPU); \ + MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, half, GPU) +#elif defined(MACE_ENABLE_QUANTIZE) +#define MACE_BM_POOLING(N, C, H, W, K, S, PA, PO) \ + MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, CPU); \ + MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, uint8_t, CPU) +#else +#define MACE_BM_POOLING(N, C, H, W, K, S, PA, PO) \ + MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, CPU) +#endif MACE_BM_POOLING(1, 3, 129, 129, 2, 2, SAME, MAX); diff --git a/mace/ops/quantize_benchmark.cc b/mace/ops/quantize_benchmark.cc index dc877ec8753995ad436226e70fa42299efd1d0cc..c42cea360f5b75f5e436fed592b362a526c0be84 100644 --- a/mace/ops/quantize_benchmark.cc +++ b/mace/ops/quantize_benchmark.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifdef MACE_ENABLE_QUANTIZE + #include "mace/core/operator.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -113,3 +115,5 @@ MACE_BM_DEQUANTIZE(1470000); } // namespace test } // namespace ops } // namespace mace + +#endif // MACE_ENABLE_QUANTIZE diff --git a/mace/ops/reduce_benchmark.cc b/mace/ops/reduce_benchmark.cc index 1d5fbe33ccb10dc7ffbef9b00353ed93889691fd..6555515254302b4fcbdc3b6f2ecd5d50e1522075 100644 --- a/mace/ops/reduce_benchmark.cc +++ b/mace/ops/reduce_benchmark.cc @@ -67,10 +67,15 @@ void Reduce(int iters, int batch, int channels, MACE_BENCHMARK( \ MACE_BM_REDUCE_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_REDUCE(N, C, H, W) \ MACE_BM_REDUCE_MACRO(N, C, H, W, float, GPU); \ MACE_BM_REDUCE_MACRO(N, C, H, W, half, GPU); \ - MACE_BM_REDUCE_MACRO(N, C, H, W, float, CPU); + MACE_BM_REDUCE_MACRO(N, C, H, W, float, CPU) +#else +#define MACE_BM_REDUCE(N, C, H, W) \ + MACE_BM_REDUCE_MACRO(N, C, H, W, float, CPU) +#endif MACE_BM_REDUCE(1, 1, 512, 512); diff --git a/mace/ops/resize_bicubic_benchmark.cc b/mace/ops/resize_bicubic_benchmark.cc index f8f9eb74cbaea11cc8888cedf45ee166853d2579..5dc377fd79a62d77d8f4dfe218409989db8328f6 100644 --- a/mace/ops/resize_bicubic_benchmark.cc +++ b/mace/ops/resize_bicubic_benchmark.cc @@ -82,10 +82,15 @@ void ResizeBicubicBenchmark(int iters, MACE_BM_RESIZE_BICUBIC_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_\ ##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_RESIZE_BICUBIC(N, C, H0, W0, H1, W1) \ MACE_BM_RESIZE_BICUBIC_MACRO(N, C, H0, W0, H1, W1, float, CPU); \ MACE_BM_RESIZE_BICUBIC_MACRO(N, C, H0, W0, H1, W1, float, GPU); \ - MACE_BM_RESIZE_BICUBIC_MACRO(N, C, H0, W0, H1, W1, half, GPU); + MACE_BM_RESIZE_BICUBIC_MACRO(N, C, H0, W0, H1, W1, half, GPU) +#else +#define MACE_BM_RESIZE_BICUBIC(N, C, H0, W0, H1, W1) \ + MACE_BM_RESIZE_BICUBIC_MACRO(N, C, H0, W0, H1, W1, float, CPU) +#endif MACE_BM_RESIZE_BICUBIC(1, 128, 120, 120, 480, 480); MACE_BM_RESIZE_BICUBIC(1, 256, 7, 7, 15, 15); diff --git a/mace/ops/resize_bilinear_benchmark.cc b/mace/ops/resize_bilinear_benchmark.cc index 205fb926f587f29734e5eb107b772a0e43baf597..64a19f54591c99838ef3fa70f677ba0235e31848 100644 --- a/mace/ops/resize_bilinear_benchmark.cc +++ b/mace/ops/resize_bilinear_benchmark.cc @@ -88,11 +88,25 @@ void ResizeBilinearBenchmark(int iters, MACE_BM_RESIZE_BILINEAR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_\ ##DEVICE) +#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE) #define MACE_BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1) \ MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \ MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, uint8_t, CPU); \ MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, GPU); \ - MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, half, GPU); + MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, half, GPU) +#elif defined(MACE_ENABLE_OPENCL) +#define MACE_BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1) \ + MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \ + MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, GPU); \ + MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, half, GPU) +#elif defined(MACE_ENABLE_QUANTIZE) +#define MACE_BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1) \ + MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \ + MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, uint8_t, CPU) +#else +#define MACE_BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1) \ + MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, CPU) +#endif MACE_BM_RESIZE_BILINEAR(1, 128, 120, 120, 480, 480); MACE_BM_RESIZE_BILINEAR(1, 256, 7, 7, 15, 15); diff --git a/mace/ops/resize_nearest_neighbor_benchmark.cc b/mace/ops/resize_nearest_neighbor_benchmark.cc index d279cfe543f97b1b7448612d6d3ae023a3823e15..6fbaff5291c45a094dcf6b873aaaef96d7850541 100644 --- a/mace/ops/resize_nearest_neighbor_benchmark.cc +++ b/mace/ops/resize_nearest_neighbor_benchmark.cc @@ -87,10 +87,15 @@ void ResizeNearestNeighborBenchmark(int iters, MACE_BM_RESIZE_NEAREST_NEIGHBOR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_\ ##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_RESIZE_NEAREST_NEIGHBOR(N, C, H0, W0, H1, W1) \ MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \ MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, float, GPU); \ - MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, half, GPU); + MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, half, GPU) +#else +#define MACE_BM_RESIZE_NEAREST_NEIGHBOR(N, C, H0, W0, H1, W1) \ + MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, float, CPU) +#endif MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 128, 120, 120, 480, 480); MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 256, 7, 7, 15, 15); diff --git a/mace/ops/reverse_benchmark.cc b/mace/ops/reverse_benchmark.cc index 1e7ad93bf14e539e03f0ad294739ddd5d9679a14..e9d225d3a413f891d8e446523bfd4c60628c8ebb 100644 --- a/mace/ops/reverse_benchmark.cc +++ b/mace/ops/reverse_benchmark.cc @@ -51,8 +51,6 @@ void Reverse(int iters, int batch, int channels, int height, int width) { #define MACE_BM_REVERSE_MACRO(N, C, H, W, TYPE, DEVICE) \ static void MACE_BM_REVERSE_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \ int iters) { \ - const int64_t macs = \ - static_cast(iters) * N * C * H * W; \ const int64_t tot = static_cast(iters) * N * C * H * W; \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ Reverse(iters, N, C, H, W); \ diff --git a/mace/ops/softmax_benchmark.cc b/mace/ops/softmax_benchmark.cc index ec6b92185d9739da3f8fc7234ee2cd0c20c919af..ec70e94b1cbc2e31ba8f3fe9d19ac7f8e677929f 100644 --- a/mace/ops/softmax_benchmark.cc +++ b/mace/ops/softmax_benchmark.cc @@ -57,6 +57,7 @@ void SoftmaxBenchmark( net.Sync(); } +#ifdef MACE_ENABLE_QUANTIZE template <> void SoftmaxBenchmark( int iters, int batch, int channels, int height, int width) { @@ -80,6 +81,9 @@ void SoftmaxBenchmark( output->SetScale(0); output->SetZeroPoint(1); + Tensor *input = net.GetTensor("Input"); + input->SetScale(0.1); + // Warm-up for (int i = 0; i < 2; ++i) { net.Run(); @@ -92,6 +96,8 @@ void SoftmaxBenchmark( } net.Sync(); } +#endif // MACE_ENABLE_QUANTIZE + } // namespace #define MACE_BM_SOFTMAX_MACRO(N, C, H, W, TYPE, DEVICE) \ @@ -103,11 +109,25 @@ void SoftmaxBenchmark( } \ MACE_BENCHMARK(MACE_BM_SOFTMAX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) +#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE) #define MACE_BM_SOFTMAX(N, C, H, W) \ MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, CPU); \ MACE_BM_SOFTMAX_MACRO(N, C, H, W, uint8_t, CPU); \ MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, GPU); \ - MACE_BM_SOFTMAX_MACRO(N, C, H, W, half, GPU); + MACE_BM_SOFTMAX_MACRO(N, C, H, W, half, GPU) +#elif defined(MACE_ENABLE_OPENCL) +#define MACE_BM_SOFTMAX(N, C, H, W) \ + MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, CPU); \ + MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, GPU); \ + MACE_BM_SOFTMAX_MACRO(N, C, H, W, half, GPU) +#elif defined(MACE_ENABLE_QUANTIZE) +#define MACE_BM_SOFTMAX(N, C, H, W) \ + MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, CPU); \ + MACE_BM_SOFTMAX_MACRO(N, C, H, W, uint8_t, CPU) +#else +#define MACE_BM_SOFTMAX(N, C, H, W) \ + MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, CPU) +#endif MACE_BM_SOFTMAX(1, 2, 512, 512); MACE_BM_SOFTMAX(1, 3, 512, 512); diff --git a/mace/ops/space_to_batch_benchmark.cc b/mace/ops/space_to_batch_benchmark.cc index d88baee098179314bd3b2bef704c205328b7bdca..4a9fa11ed989c45c962cc9346eb2fa28939efe9c 100644 --- a/mace/ops/space_to_batch_benchmark.cc +++ b/mace/ops/space_to_batch_benchmark.cc @@ -70,10 +70,23 @@ void BMSpaceToBatch( MACE_BENCHMARK( \ MACE_BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE) +#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE) #define MACE_BM_SPACE_TO_BATCH(N, H, W, C, SHAPE) \ MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, GPU); \ MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, CPU); \ - MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, uint8_t, CPU); + MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, uint8_t, CPU) +#elif defined(MACE_ENABLE_OPENCL) +#define MACE_BM_SPACE_TO_BATCH(N, H, W, C, SHAPE) \ + MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, GPU); \ + MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, CPU) +#elif defined(MACE_ENABLE_QUANTIZE) +#define MACE_BM_SPACE_TO_BATCH(N, H, W, C, SHAPE) \ + MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, CPU); \ + MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, uint8_t, CPU) +#else +#define MACE_BM_SPACE_TO_BATCH(N, H, W, C, SHAPE) \ + MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, CPU) +#endif MACE_BM_SPACE_TO_BATCH(128, 16, 16, 128, 2); MACE_BM_SPACE_TO_BATCH(1, 256, 256, 32, 2); diff --git a/mace/ops/space_to_depth_benchmark.cc b/mace/ops/space_to_depth_benchmark.cc index 2ad42c85d00c04e0040deaf76ccb2d39bfabe7e1..56d6a0c1ac8e07998d9bdbe2b4f36598bc332e24 100644 --- a/mace/ops/space_to_depth_benchmark.cc +++ b/mace/ops/space_to_depth_benchmark.cc @@ -68,10 +68,15 @@ void SpaceToDepth( MACE_BENCHMARK( \ MACE_BM_SPACE_TO_DEPTH_##N##_##C##_##H##_##W##_##G##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_SPACE_TO_DEPTH(N, C, H, W, G) \ MACE_BM_SPACE_TO_DEPTH_MACRO(N, C, H, W, G, float, CPU); \ MACE_BM_SPACE_TO_DEPTH_MACRO(N, C, H, W, G, float, GPU); \ - MACE_BM_SPACE_TO_DEPTH_MACRO(N, C, H, W, G, half, GPU); + MACE_BM_SPACE_TO_DEPTH_MACRO(N, C, H, W, G, half, GPU) +#else +#define MACE_BM_SPACE_TO_DEPTH(N, C, H, W, G) \ + MACE_BM_SPACE_TO_DEPTH_MACRO(N, C, H, W, G, float, CPU) +#endif MACE_BM_SPACE_TO_DEPTH(1, 64, 64, 64, 4); MACE_BM_SPACE_TO_DEPTH(1, 64, 128, 128, 4); diff --git a/mace/ops/split_benchmark.cc b/mace/ops/split_benchmark.cc index 17584778a8ae93994530bdbad9f8a53d476b1e18..1cce558e80ec5e76a2d84d0049063d5f970f8c81 100644 --- a/mace/ops/split_benchmark.cc +++ b/mace/ops/split_benchmark.cc @@ -73,10 +73,15 @@ void BMSplitHelper(int iters, MACE_BENCHMARK( \ MACE_BM_SPLIT_##N##_##H##_##W##_##C##_##NO##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_SPLIT(N, H, W, C, NO) \ MACE_BM_SPLIT_MACRO(N, H, W, C, NO, float, CPU); \ MACE_BM_SPLIT_MACRO(N, H, W, C, NO, float, GPU); \ - MACE_BM_SPLIT_MACRO(N, H, W, C, NO, half, GPU); + MACE_BM_SPLIT_MACRO(N, H, W, C, NO, half, GPU) +#else +#define MACE_BM_SPLIT(N, H, W, C, NO) \ + MACE_BM_SPLIT_MACRO(N, H, W, C, NO, float, CPU) +#endif MACE_BM_SPLIT(1, 32, 32, 32, 2); MACE_BM_SPLIT(1, 32, 32, 128, 2); diff --git a/mace/ops/sqrdiff_mean_benchmark.cc b/mace/ops/sqrdiff_mean_benchmark.cc index 8445837d08ce56c0f2e7adec726159ee7978d586..211d8813f54eb01cae52fd6bfddc9b9372406b11 100644 --- a/mace/ops/sqrdiff_mean_benchmark.cc +++ b/mace/ops/sqrdiff_mean_benchmark.cc @@ -13,7 +13,6 @@ // limitations under the License. #include "mace/core/operator.h" -#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" @@ -69,10 +68,15 @@ void SqrDiffMean(int iters, int batch, int channels, MACE_BENCHMARK( \ MACE_BM_SQRDIFF_MEAN_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) +#ifdef MACE_ENABLE_OPENCL #define MACE_BM_SQRDIFF_MEAN(N, C, H, W) \ MACE_BM_SQRDIFF_MEAN_MACRO(N, C, H, W, float, GPU); \ MACE_BM_SQRDIFF_MEAN_MACRO(N, C, H, W, half, GPU); \ - MACE_BM_SQRDIFF_MEAN_MACRO(N, C, H, W, float, CPU); + MACE_BM_SQRDIFF_MEAN_MACRO(N, C, H, W, float, CPU) +#else +#define MACE_BM_SQRDIFF_MEAN(N, C, H, W) \ + MACE_BM_SQRDIFF_MEAN_MACRO(N, C, H, W, float, CPU) +#endif MACE_BM_SQRDIFF_MEAN(1, 1, 512, 512); diff --git a/mace/port/BUILD.bazel b/mace/port/BUILD.bazel index 722d3e700098eef7e2a3db6a97ddd43540244fe3..2c55ce7c42b1109bae946888ded101d8f18d37b3 100644 --- a/mace/port/BUILD.bazel +++ b/mace/port/BUILD.bazel @@ -48,7 +48,7 @@ cc_test( linkstatic = 1, deps = [ ":port", - "@gtest//:gtest", + "@gtest", "@gtest//:gtest_main", ], ) diff --git a/mace/port/android/BUILD.bazel b/mace/port/android/BUILD.bazel index fd5aacc51f3653a32a6fa4b5f5752772d6dd20bc..b3fff758653575dffc5cb9d403808b576f7e4431 100644 --- a/mace/port/android/BUILD.bazel +++ b/mace/port/android/BUILD.bazel @@ -14,9 +14,8 @@ cc_library( hdrs = if_android(glob([ "*.h", ])), - deps = [ - "//mace/port:port_base", - "//mace/port/posix:port_posix", - ], + deps = if_android([ + "//mace/port/linux_base:port_linux_base", + ]), alwayslink = 1, ) diff --git a/mace/port/android/env.cc b/mace/port/android/env.cc index 2940d344cf3a2d8f3b2fdafe72ef85904e4db442..fa338f078afef4ba6dbf5bb9930e554aab2b8292 100644 --- a/mace/port/android/env.cc +++ b/mace/port/android/env.cc @@ -43,51 +43,12 @@ namespace mace { namespace port { -int64_t AndroidEnv::NowMicros() { -#ifdef __hexagon__ - return HAP_perf_get_time_us(); -#else - return mace::port::posix::NowMicros(); -#endif -} - -FileSystem *AndroidEnv::GetFileSystem() { - return &posix_file_system_; -} - LogWriter *AndroidEnv::GetLogWriter() { return &log_writer_; } namespace { -int GetCPUCount() { - int cpu_count = 0; - std::string cpu_sys_conf = "/proc/cpuinfo"; - std::ifstream f(cpu_sys_conf); - if (!f.is_open()) { - LOG(ERROR) << "failed to open " << cpu_sys_conf; - return -1; - } - std::string line; - const std::string processor_key = "processor"; - while (std::getline(f, line)) { - if (line.size() >= processor_key.size() - && line.compare(0, processor_key.size(), processor_key) == 0) { - ++cpu_count; - } - } - if (f.bad()) { - LOG(ERROR) << "failed to read " << cpu_sys_conf; - } - if (!f.eof()) { - LOG(ERROR) << "failed to read end of " << cpu_sys_conf; - } - f.close(); - VLOG(1) << "CPU cores: " << cpu_count; - return cpu_count; -} - struct BacktraceState { void** current; void** end; @@ -115,38 +76,6 @@ size_t BackTrace(void** buffer, size_t max) { } // namespace -MaceStatus AndroidEnv::GetCPUMaxFreq(std::vector *max_freqs) { - MACE_CHECK_NOTNULL(max_freqs); - int cpu_count = GetCPUCount(); - if (cpu_count < 0) { - return MaceStatus::MACE_RUNTIME_ERROR; - } - for (int cpu_id = 0; cpu_id < cpu_count; ++cpu_id) { - std::string cpuinfo_max_freq_sys_conf = MakeString( - "/sys/devices/system/cpu/cpu", - cpu_id, - "/cpufreq/cpuinfo_max_freq"); - std::ifstream f(cpuinfo_max_freq_sys_conf); - if (!f.is_open()) { - LOG(ERROR) << "failed to open " << cpuinfo_max_freq_sys_conf; - return MaceStatus::MACE_RUNTIME_ERROR; - } - std::string line; - if (std::getline(f, line)) { - float freq = strtof(line.c_str(), nullptr); - max_freqs->push_back(freq); - } - if (f.bad()) { - LOG(ERROR) << "failed to read " << cpuinfo_max_freq_sys_conf; - } - f.close(); - } - - VLOG(1) << "CPU freq: " << MakeString(*max_freqs); - - return MaceStatus::MACE_SUCCESS; -} - MaceStatus AndroidEnv::SchedSetAffinity(const std::vector &cpu_ids) { // compute mask cpu_set_t mask; diff --git a/mace/port/android/env.h b/mace/port/android/env.h index c51c57727d999ee2709fa14302ac51a7dbe021cf..071340367bf39a03b65837eaea68f105852fce2f 100644 --- a/mace/port/android/env.h +++ b/mace/port/android/env.h @@ -20,18 +20,16 @@ #include #include "mace/port/android/logger.h" -#include "mace/port/posix/file_system.h" #include "mace/port/env.h" +#include "mace/port/linux_base/env.h" +#include "mace/port/posix/file_system.h" namespace mace { namespace port { -class AndroidEnv : public Env { +class AndroidEnv : public LinuxBaseEnv { public: - int64_t NowMicros() override; - MaceStatus GetCPUMaxFreq(std::vector *max_freqs) override; MaceStatus SchedSetAffinity(const std::vector &cpu_ids) override; - FileSystem *GetFileSystem() override; LogWriter *GetLogWriter() override; std::vector GetBackTraceUnsafe(int max_steps) override; std::unique_ptr NewMallocLogger( @@ -39,7 +37,6 @@ class AndroidEnv : public Env { const std::string &name) override; private: - PosixFileSystem posix_file_system_; AndroidLogWriter log_writer_; }; diff --git a/mace/port/darwin/BUILD.bazel b/mace/port/darwin/BUILD.bazel index 987dafd16ea22f3f8b5b97052d0672f18c81c98d..e3343dd67ee8366e36de5f59d4f86d2cd6d6dfb5 100644 --- a/mace/port/darwin/BUILD.bazel +++ b/mace/port/darwin/BUILD.bazel @@ -20,3 +20,8 @@ cc_library( ], alwayslink = 1, ) + +sh_library( + name = "darwin_ar_merge", + srcs = ["ar_merge_on_darwin.sh"], +) diff --git a/mace/port/darwin/ar_merge_on_darwin.sh b/mace/port/darwin/ar_merge_on_darwin.sh new file mode 100644 index 0000000000000000000000000000000000000000..5b104634109106454513f9b68f59c88f39126da4 --- /dev/null +++ b/mace/port/darwin/ar_merge_on_darwin.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +# Copyright 2018 The MACE Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +output_file_path="" +object_files="" +workspace=`mktemp -d ./tmpd.XXXXXX` +ar_command=$1 + +while read script_line; do + command="" + lib_path="" + eval $(echo ${script_line} | awk -F" " \ + '{printf("command=%s\nlib_path=%s", $1, $2);}') + upper_command=`echo ${command} | tr 'a-z' 'A-Z'` + if [[ ${upper_command} == "CREATE" ]]; then + output_file_path=${lib_path} + elif [[ ${upper_command} == "ADDLIB" ]]; then + lib_name=$(basename ${lib_path}) + lib_dir=${workspace}"/"${lib_name} + mkdir ${lib_dir} + cp ${lib_path} ${lib_dir} + cur_path=`pwd` + cd ${lib_dir} + ${cur_path}"/"${ar_command} -x ${lib_name} + object_files=${object_files}" "${lib_dir}"/*.o" + cd ${cur_path} + elif [[ ${upper_command} == "SAVE" ]]; then + ${ar_command} -rcsu $output_file_path ${object_files} + elif [[ ${upper_command} == "END" ]]; then + echo "========== ar_merge_on_darwin end ==========" + else + echo "error: Get an invalid input line: "$script_line + fi +done diff --git a/mace/port/darwin/env.cc b/mace/port/darwin/env.cc index f951e64753b9736705b67153a7ef3ba82cb72e73..3344adbbc487b6bedbd745157c205ab6680ddfb0 100644 --- a/mace/port/darwin/env.cc +++ b/mace/port/darwin/env.cc @@ -15,7 +15,10 @@ #include "mace/port/darwin/env.h" #include +#include +#include #include +#include #include #include @@ -24,14 +27,37 @@ #include "mace/port/posix/backtrace.h" #include "mace/port/posix/file_system.h" #include "mace/port/posix/time.h" +#include "mace/utils/logging.h" namespace mace { namespace port { +namespace { +const char kCpuFrequencyMax[] = "hw.cpufrequency_max"; +} + int64_t DarwinEnv::NowMicros() { return mace::port::posix::NowMicros(); } +// TODO(luxuhui): this func is not accurate, darwin does not support +// acquiring CPU frequencies, we need to reconsider the CPU scheduling +// strategy. +MaceStatus DarwinEnv::GetCPUMaxFreq(std::vector *max_freqs) { + MACE_CHECK_NOTNULL(max_freqs); + + uint64_t freq = 0; + size_t size = sizeof(freq); + int ret = sysctlbyname(kCpuFrequencyMax, &freq, &size, NULL, 0); + if (ret < 0) { + LOG(ERROR) << "failed to get property: " << kCpuFrequencyMax; + return MaceStatus::MACE_RUNTIME_ERROR; + } + max_freqs->push_back(freq); + + return MaceStatus::MACE_SUCCESS; +} + FileSystem *DarwinEnv::GetFileSystem() { return &posix_file_system_; } diff --git a/mace/port/darwin/env.h b/mace/port/darwin/env.h index 667cf9f0a0e2f102c1ddc183605eea1f22dfa0c6..7205bb7fa97fd020a294198d45b47114f6ee4873 100644 --- a/mace/port/darwin/env.h +++ b/mace/port/darwin/env.h @@ -28,6 +28,7 @@ namespace port { class DarwinEnv : public Env { public: int64_t NowMicros() override; + MaceStatus GetCPUMaxFreq(std::vector *max_freqs) override; FileSystem *GetFileSystem() override; LogWriter *GetLogWriter() override; std::vector GetBackTraceUnsafe(int max_steps) override; diff --git a/mace/port/linux/BUILD.bazel b/mace/port/linux/BUILD.bazel index 5d1351baf844c4e90f6259fddb97b6217dd769b2..cd31278fc5167c71e4c6822a780095a315167eab 100644 --- a/mace/port/linux/BUILD.bazel +++ b/mace/port/linux/BUILD.bazel @@ -14,9 +14,8 @@ cc_library( hdrs = if_linux(glob([ "*.h", ])), - deps = [ - "//mace/port:port_base", - "//mace/port/posix:port_posix", - ], + deps = if_linux([ + "//mace/port/linux_base:port_linux_base", + ]), alwayslink = 1, ) diff --git a/mace/port/linux/env.cc b/mace/port/linux/env.cc index 2a50b4a1198049d5610f3daad5b33f47efb97c4a..00831c5ed89c11a1163c57a1a83a6bdbdd386f62 100644 --- a/mace/port/linux/env.cc +++ b/mace/port/linux/env.cc @@ -21,6 +21,7 @@ #include #include +#include "mace/port/env.h" #include "mace/port/posix/backtrace.h" #include "mace/port/posix/file_system.h" #include "mace/port/posix/time.h" @@ -28,14 +29,6 @@ namespace mace { namespace port { -int64_t LinuxEnv::NowMicros() { - return mace::port::posix::NowMicros(); -} - -FileSystem *LinuxEnv::GetFileSystem() { - return &posix_file_system_; -} - LogWriter *LinuxEnv::GetLogWriter() { return &log_writer_; } diff --git a/mace/port/linux/env.h b/mace/port/linux/env.h index 5d1d243a1ab616c3bf13d6d9069147e7cced4519..825dd29d9afe11fe1fd234ad1e1ba888381a403d 100644 --- a/mace/port/linux/env.h +++ b/mace/port/linux/env.h @@ -18,22 +18,18 @@ #include #include -#include "mace/port/env.h" +#include "mace/port/linux_base/env.h" #include "mace/port/logger.h" -#include "mace/port/posix/file_system.h" namespace mace { namespace port { -class LinuxEnv : public Env { +class LinuxEnv : public LinuxBaseEnv { public: - int64_t NowMicros() override; - FileSystem *GetFileSystem() override; LogWriter *GetLogWriter() override; std::vector GetBackTraceUnsafe(int max_steps) override; private: - PosixFileSystem posix_file_system_; LogWriter log_writer_; }; diff --git a/mace/port/linux_base/BUILD.bazel b/mace/port/linux_base/BUILD.bazel new file mode 100644 index 0000000000000000000000000000000000000000..7c74acaad61c5f32e0e942acaec447fb8f63c594 --- /dev/null +++ b/mace/port/linux_base/BUILD.bazel @@ -0,0 +1,20 @@ +package( + default_visibility = ["//visibility:public"], +) + +licenses(["notice"]) # Apache 2.0 + +cc_library( + name = "port_linux_base", + srcs = glob([ + "*.cc", + ]), + hdrs = glob([ + "*.h", + ]), + deps = [ + "//mace/port:port_base", + "//mace/port/posix:port_posix", + ], + alwayslink = 1, +) diff --git a/mace/port/linux_base/env.cc b/mace/port/linux_base/env.cc new file mode 100644 index 0000000000000000000000000000000000000000..335e0e31b60f8a70afd3666b5dd04d3118458c7a --- /dev/null +++ b/mace/port/linux_base/env.cc @@ -0,0 +1,104 @@ +// Copyright 2019 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "mace/port/linux_base/env.h" + +#include + +#include +#include +#include +#include + +#include "mace/port/posix/file_system.h" +#include "mace/port/posix/time.h" +#include "mace/utils/logging.h" + +namespace mace { +namespace port { + + +namespace { + +int GetCPUCount() { + int cpu_count = 0; + std::string cpu_sys_conf = "/proc/cpuinfo"; + std::ifstream f(cpu_sys_conf); + if (!f.is_open()) { + LOG(ERROR) << "failed to open " << cpu_sys_conf; + return -1; + } + std::string line; + const std::string processor_key = "processor"; + while (std::getline(f, line)) { + if (line.size() >= processor_key.size() + && line.compare(0, processor_key.size(), processor_key) == 0) { + ++cpu_count; + } + } + if (f.bad()) { + LOG(ERROR) << "failed to read " << cpu_sys_conf; + } + if (!f.eof()) { + LOG(ERROR) << "failed to read end of " << cpu_sys_conf; + } + f.close(); + VLOG(1) << "CPU cores: " << cpu_count; + return cpu_count; +} + +} // namespace + +int64_t LinuxBaseEnv::NowMicros() { + return mace::port::posix::NowMicros(); +} + +FileSystem *LinuxBaseEnv::GetFileSystem() { + return &posix_file_system_; +} + +MaceStatus LinuxBaseEnv::GetCPUMaxFreq(std::vector *max_freqs) { + MACE_CHECK_NOTNULL(max_freqs); + int cpu_count = GetCPUCount(); + if (cpu_count < 0) { + return MaceStatus::MACE_RUNTIME_ERROR; + } + for (int cpu_id = 0; cpu_id < cpu_count; ++cpu_id) { + std::string cpuinfo_max_freq_sys_conf = MakeString( + "/sys/devices/system/cpu/cpu", + cpu_id, + "/cpufreq/cpuinfo_max_freq"); + std::ifstream f(cpuinfo_max_freq_sys_conf); + if (!f.is_open()) { + LOG(ERROR) << "failed to open " << cpuinfo_max_freq_sys_conf; + return MaceStatus::MACE_RUNTIME_ERROR; + } + std::string line; + if (std::getline(f, line)) { + float freq = strtof(line.c_str(), nullptr); + max_freqs->push_back(freq); + } + if (f.bad()) { + LOG(ERROR) << "failed to read " << cpuinfo_max_freq_sys_conf; + } + f.close(); + } + + VLOG(1) << "CPU freq: " << MakeString(*max_freqs); + + return MaceStatus::MACE_SUCCESS; +} + +} // namespace port +} // namespace mace diff --git a/mace/port/linux_base/env.h b/mace/port/linux_base/env.h new file mode 100644 index 0000000000000000000000000000000000000000..07270f2a7b3eaef3997f5a94e87a218fa5b64ca0 --- /dev/null +++ b/mace/port/linux_base/env.h @@ -0,0 +1,39 @@ +// Copyright 2019 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MACE_PORT_LINUX_BASE_ENV_H_ +#define MACE_PORT_LINUX_BASE_ENV_H_ + +#include + +#include "mace/port/env.h" +#include "mace/port/posix/file_system.h" + +namespace mace { +namespace port { + +class LinuxBaseEnv : public Env { + public: + int64_t NowMicros() override; + MaceStatus GetCPUMaxFreq(std::vector *max_freqs) override; + FileSystem *GetFileSystem() override; + + protected: + PosixFileSystem posix_file_system_; +}; + +} // namespace port +} // namespace mace + +#endif // MACE_PORT_LINUX_BASE_ENV_H_ diff --git a/mace/test/BUILD.bazel b/mace/test/BUILD.bazel index a5c5f974552dd13b35faff26f7e14266e042b3fc..bab093e1aa0e2c04b6ab530b0cd87df85b20e559 100644 --- a/mace/test/BUILD.bazel +++ b/mace/test/BUILD.bazel @@ -7,12 +7,12 @@ licenses(["notice"]) # Apache 2.0 load( "//mace:mace.bzl", "if_android", - "if_neon_enabled", - "if_openmp_enabled", "if_android_armv7", "if_hexagon_enabled", "if_hta_enabled", + "if_neon_enabled", "if_opencl_enabled", + "if_openmp_enabled", "if_quantize_enabled", ) @@ -21,7 +21,11 @@ cc_library( hdrs = [ "mace_api_test.h", ], - copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], + copts = [ + "-Werror", + "-Wextra", + "-Wno-missing-field-initializers", + ], ) cc_test( @@ -33,7 +37,7 @@ cc_test( "-Wextra", "-Wno-missing-field-initializers", ] + if_openmp_enabled([ - "-fopenmp" + "-fopenmp", ]) + if_neon_enabled([ "-DMACE_ENABLE_NEON", ]) + if_android_armv7([ @@ -49,12 +53,14 @@ cc_test( ]) + if_hta_enabled([ "-DMACE_ENABLE_HTA", ]), - linkopts = ["-fopenmp"], + linkopts = if_openmp_enabled([ + "-fopenmp", + ]), linkstatic = 1, deps = [ ":mace_api_test_header", + "//mace/libmace", "//mace/ops:test", - "//mace/libmace:libmace", "@gtest//:gtest_main", ], ) @@ -68,7 +74,7 @@ cc_test( "-Wextra", "-Wno-missing-field-initializers", ] + if_openmp_enabled([ - "-fopenmp" + "-fopenmp", ]) + if_neon_enabled([ "-DMACE_ENABLE_NEON", ]) + if_android_armv7([ @@ -84,12 +90,14 @@ cc_test( ]) + if_hta_enabled([ "-DMACE_ENABLE_HTA", ]), - linkopts = ["-fopenmp"], + linkopts = if_openmp_enabled([ + "-fopenmp", + ]), linkstatic = 1, deps = [ ":mace_api_test_header", + "//mace/libmace", "//mace/ops:test", - "//mace/libmace:libmace", "@gtest//:gtest_main", ], ) @@ -103,7 +111,7 @@ cc_test( "-Wextra", "-Wno-missing-field-initializers", ] + if_openmp_enabled([ - "-fopenmp" + "-fopenmp", ]) + if_neon_enabled([ "-DMACE_ENABLE_NEON", ]) + if_android_armv7([ @@ -119,11 +127,13 @@ cc_test( ]) + if_hta_enabled([ "-DMACE_ENABLE_HTA", ]), - linkopts = ["-fopenmp"], + linkopts = if_openmp_enabled([ + "-fopenmp", + ]), linkstatic = 1, deps = [ + "//mace/libmace", "//mace/ops:test", - "//mace/libmace:libmace", "@gtest//:gtest_main", ], ) @@ -137,7 +147,7 @@ cc_test( "-Wextra", "-Wno-missing-field-initializers", ] + if_openmp_enabled([ - "-fopenmp" + "-fopenmp", ]) + if_neon_enabled([ "-DMACE_ENABLE_NEON", ]) + if_android_armv7([ @@ -153,11 +163,13 @@ cc_test( ]) + if_hta_enabled([ "-DMACE_ENABLE_HTA", ]), - linkopts = ["-fopenmp"], + linkopts = if_openmp_enabled([ + "-fopenmp", + ]), linkstatic = 1, deps = [ + "//mace/libmace", "//mace/ops:test", - "//mace/libmace:libmace", "@gtest//:gtest_main", ], ) diff --git a/mace/test/mace_api_exception_test.cc b/mace/test/mace_api_exception_test.cc index 232023dace17584f49c15a499b196c538f6598eb..7bfb39d0d0d369e4b6071d7eb17d04abfffbccfc 100644 --- a/mace/test/mace_api_exception_test.cc +++ b/mace/test/mace_api_exception_test.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifdef MACE_ENABLE_OPENCL + #include "mace/ops/ops_test_util.h" namespace mace { @@ -40,3 +42,5 @@ TEST(MaceAPIExceptionTest, WrongInputTest) { } // namespace test } // namespace mace + +#endif // MACE_ENABLE_OPENCL diff --git a/mace/test/mace_api_mt_test.cc b/mace/test/mace_api_mt_test.cc index ee14129a05dd23d7d2fa6b3bcc491da375c12096..4bf5f40bdd7300c6aa7f3ff2965e0b8be47a07a0 100644 --- a/mace/test/mace_api_mt_test.cc +++ b/mace/test/mace_api_mt_test.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#ifdef MACE_ENABLE_OPENCL + #include // NOLINT(build/c++11) #include "mace/test/mace_api_test.h" @@ -102,3 +104,5 @@ TEST_F(MaceMTAPITest, MultipleThread) { } // namespace test } // namespace mace + +#endif // MACE_ENABLE_OPENCL diff --git a/mace/tools/validation/BUILD.bazel b/mace/tools/validation/BUILD.bazel index 6793b898c2e6dbe826ecc9acf893d4da3334aa9d..3d49f635465d15928625586cfb152565c67fc39d 100644 --- a/mace/tools/validation/BUILD.bazel +++ b/mace/tools/validation/BUILD.bazel @@ -1,5 +1,11 @@ # Examples -load("//mace:mace.bzl", "if_openmp_enabled", "if_android", "if_opencl_enabled") +load( + "//mace:mace.bzl", + "if_android", + "if_darwin", + "if_opencl_enabled", + "if_openmp_enabled", +) cc_binary( name = "mace_run_static", @@ -10,9 +16,10 @@ cc_binary( ] + if_opencl_enabled([ "-DMACE_ENABLE_OPENCL", ]), - linkopts = [ - "-fuse-ld=gold", - ] + if_openmp_enabled([ + linkopts = if_darwin( + [], + default_value = ["-fuse-ld=gold"], + ) + if_openmp_enabled([ "-fopenmp", ]), linkstatic = 1, @@ -33,9 +40,10 @@ cc_binary( ] + if_opencl_enabled([ "-DMACE_ENABLE_OPENCL", ]), - linkopts = [ - "-fuse-ld=gold", - ] + if_openmp_enabled([ + linkopts = if_darwin( + [], + default_value = ["-fuse-ld=gold"], + ) + if_openmp_enabled([ "-fopenmp", ]), linkstatic = 0, diff --git a/mace/tools/validation/mace_run.cc b/mace/tools/validation/mace_run.cc index ed8bb0b9b482f62cfadd53b3a5d34e791cf71744..0e74c8bac991ea05a7eadac573d9beaefe1a7b87 100644 --- a/mace/tools/validation/mace_run.cc +++ b/mace/tools/validation/mace_run.cc @@ -24,7 +24,6 @@ * --model_data_file=model_data.data \ * --device=GPU */ -#include #include #include #include @@ -96,56 +95,6 @@ DataFormat ParseDataFormat(const std::string &data_format_str) { } } -struct mallinfo LogMallinfoChange(struct mallinfo prev) { - struct mallinfo curr = mallinfo(); - if (prev.arena != curr.arena) { - LOG(INFO) << "Non-mmapped space allocated (bytes): " << curr.arena - << ", diff: " << ((int64_t) curr.arena - (int64_t) prev.arena); - } - if (prev.ordblks != curr.ordblks) { - LOG(INFO) << "Number of free chunks: " << curr.ordblks - << ", diff: " - << ((int64_t) curr.ordblks - (int64_t) prev.ordblks); - } - if (prev.smblks != curr.smblks) { - LOG(INFO) << "Number of free fastbin blocks: " << curr.smblks - << ", diff: " << ((int64_t) curr.smblks - (int64_t) prev.smblks); - } - if (prev.hblks != curr.hblks) { - LOG(INFO) << "Number of mmapped regions: " << curr.hblks - << ", diff: " << ((int64_t) curr.hblks - (int64_t) prev.hblks); - } - if (prev.hblkhd != curr.hblkhd) { - LOG(INFO) << "Space allocated in mmapped regions (bytes): " << curr.hblkhd - << ", diff: " << ((int64_t) curr.hblkhd - (int64_t) prev.hblkhd); - } - if (prev.usmblks != curr.usmblks) { - LOG(INFO) << "Maximum total allocated space (bytes): " << curr.usmblks - << ", diff: " - << ((int64_t) curr.usmblks - (int64_t) prev.usmblks); - } - if (prev.fsmblks != curr.fsmblks) { - LOG(INFO) << "Space in freed fastbin blocks (bytes): " << curr.fsmblks - << ", diff: " - << ((int64_t) curr.fsmblks - (int64_t) prev.fsmblks); - } - if (prev.uordblks != curr.uordblks) { - LOG(INFO) << "Total allocated space (bytes): " << curr.uordblks - << ", diff: " - << ((int64_t) curr.uordblks - (int64_t) prev.uordblks); - } - if (prev.fordblks != curr.fordblks) { - LOG(INFO) << "Total free space (bytes): " << curr.fordblks << ", diff: " - << ((int64_t) curr.fordblks - (int64_t) prev.fordblks); - } - if (prev.keepcost != curr.keepcost) { - LOG(INFO) << "Top-most, releasable space (bytes): " << curr.keepcost - << ", diff: " - << ((int64_t) curr.keepcost - (int64_t) prev.keepcost); - } - return curr; -} - DEFINE_string(model_name, "", "model name in yaml"); @@ -395,8 +344,14 @@ bool RunModel(const std::string &model_name, if (FLAGS_round > 0) { LOG(INFO) << "Run model"; int64_t total_run_duration = 0; - struct mallinfo prev = mallinfo(); for (int i = 0; i < FLAGS_round; ++i) { + std::unique_ptr info_log; + std::unique_ptr malloc_logger; + if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) { + info_log = LOG_PTR(INFO); + malloc_logger = port::Env::Default()->NewMallocLogger( + info_log.get(), MakeString(i)); + } MaceStatus run_status; while (true) { int64_t t0 = NowMicros(); @@ -436,10 +391,6 @@ bool RunModel(const std::string &model_name, break; } } - if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) { - LOG(INFO) << "=== check malloc info change #" << i << " ==="; - prev = LogMallinfoChange(prev); - } } model_run_millis = total_run_duration / 1000.0 / FLAGS_round; LOG(INFO) << "Average latency: " << model_run_millis << " ms"; diff --git a/mace/utils/logging.h b/mace/utils/logging.h index 8a5f2f8e025f1ad350a9503243dd66ad9628691f..a935105a43e4ebe525a3f5100206e5fb1fd3d16f 100644 --- a/mace/utils/logging.h +++ b/mace/utils/logging.h @@ -16,6 +16,7 @@ #define MACE_UTILS_LOGGING_H_ #include +#include #include #include #include @@ -24,6 +25,7 @@ #include "mace/port/env.h" #include "mace/port/logger.h" #include "mace/utils/macros.h" +#include "mace/utils/memory.h" #include "mace/utils/string_util.h" @@ -33,6 +35,9 @@ namespace logging_internal { #define LOG(severity) \ ::mace::port::Logger(__FILE__, __LINE__, mace::severity) +#define LOG_PTR(severity) \ + make_unique(__FILE__, __LINE__, mace::severity) + #define VLOG_IS_ON(vll) (mace::ShouldGenerateVLogMessage(vll)) #define VLOG(vll) if (VLOG_IS_ON(vll)) LOG(INFO) diff --git a/tools/bazel.rc b/tools/bazel.rc index 1dddfdd56349b9d67a1840c2df2d6c2c84dd3dac..ef5fd59791bcb68cb0bc1ffc75ad936b7f3d58c4 100644 --- a/tools/bazel.rc +++ b/tools/bazel.rc @@ -24,25 +24,17 @@ build:linux --define linux=true # MacOS host build, --config darwin build:darwin --define darwin=true - -# iOS and other darwin platforms, --config ios -build:ios --define darwin=true -build:ios --distinct_host_configuration=true -build:ios --host_crosstool_top=@bazel_tools//tools/cpp:toolchain -build:ios --cpu=arm64 - -# Linux host build, --config linux -build:linux --define linux=true - -# MacOS host build, --config darwin -build:darwin --define darwin=true +build:darwin --cpu=darwin_x86_64 +build:darwin --copt -Wno-unused-lambda-capture +build:darwin --copt -Wno-missing-braces # iOS and other darwin platforms, --config ios build:ios --define darwin=true build:ios --distinct_host_configuration=true build:ios --host_crosstool_top=@bazel_tools//tools/cpp:toolchain build:ios --cpu=ios_arm64 - +build:ios --copt -Wno-unused-lambda-capture +build:ios --copt -Wno-missing-braces # Usage example: bazel build --config arm_linux_gnueabihf # Used to fix library not find linking issue, see also: @@ -81,6 +73,12 @@ build:optimization --copt=-ffunction-sections build:optimization --copt=-fdata-sections build:optimization --linkopt=-Wl,--gc-sections +# Usage example: bazel build --config optimization_darwin +build:optimization_darwin --copt=-O3 +build:optimization_darwin --copt=-ffunction-sections +build:optimization_darwin --copt=-fdata-sections +build:optimization_darwin --linkopt=-Wl,-dead_strip + # Usage example: bazel build --config symbol_hidden build:symbol_hidden --copt=-fvisibility=hidden diff --git a/tools/bazel_adb_run.py b/tools/bazel_adb_run.py index 4bf5e019dbd70bfb32ec483bdf07d75b0938aefe..7083d3180b94acca93778ad33667bb7869b46f17 100644 --- a/tools/bazel_adb_run.py +++ b/tools/bazel_adb_run.py @@ -20,7 +20,6 @@ # --stdout_processor=stdout_processor import argparse -import re import sys import sh_commands @@ -105,6 +104,11 @@ def parse_args(): type=str2bool, default=True, help="Whether to use neon optimization") + parser.add_argument( + "--enable_openmp", + type=str2bool, + default=True, + help="Disable openmp for multiple thread.") parser.add_argument( '--address_sanitizer', action="store_true", @@ -140,7 +144,8 @@ def main(unused_args): toolchain=toolchain, enable_neon=FLAGS.enable_neon, address_sanitizer=FLAGS.address_sanitizer, - debug_mode=FLAGS.debug_mode) + debug_mode=FLAGS.debug_mode, + enable_openmp=FLAGS.enable_openmp) if FLAGS.run_target: target_devices = DeviceManager.list_devices(FLAGS.device_yml) if FLAGS.target_socs != TargetSOCTag.all and\ diff --git a/tools/device.py b/tools/device.py index 76beb0557ae72f0a6e99e906113d99b7c3812b96..c1a9456f30c3460d9fe415cfac13874d358d4373 100644 --- a/tools/device.py +++ b/tools/device.py @@ -59,6 +59,9 @@ class DeviceWrapper: raise e self.data_dir = DEVICE_DATA_DIR self.interior_dir = self.data_dir + '/interior' + elif self.system == SystemType.host: + self.data_dir = DEVICE_DATA_DIR + self.interior_dir = self.data_dir + '/interior' ################## # internal use # diff --git a/tools/sh_commands.py b/tools/sh_commands.py index 02b8ffbaee2dcf2d979432fab3195a07b5a40591..478460fda7209321d24cd1369db63b214bbb3fa5 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -275,10 +275,11 @@ def bazel_build(target, extra_args=""): six.print_("* Build %s with ABI %s" % (target, abi)) if abi == "host": + toolchain = platform.system().lower() bazel_args = ( "build", "--config", - platform.system().lower(), + toolchain, "--define", "openmp=%s" % str(enable_openmp).lower(), "--define", @@ -310,7 +311,10 @@ def bazel_build(target, if debug_mode: bazel_args += ("--config", "debug") if not address_sanitizer and not debug_mode: - bazel_args += ("--config", "optimization") + if toolchain == "darwin" or toolchain == "ios": + bazel_args += ("--config", "optimization_darwin") + else: + bazel_args += ("--config", "optimization") if symbol_hidden: bazel_args += ("--config", "symbol_hidden") if extra_args: