提交 c35775c7 编写于 作者: 李寅

Merge branch 'portability' into 'master'

fix mace run tools for darwin

See merge request !1036
......@@ -59,6 +59,14 @@ jobs:
env: TYPE=Ops-Test
os: osx
osx_image: xcode7.2
- stage: Unit Test
script:
- echo "Ops Test On Darwin"
- python tools/bazel_adb_run.py --target="//mace/ops:ops_test" --run_target=False --enable_openmp=false --target_abis=host || exit 1;
- bazel build "//mace/ops:ops_test" --config=ios --config=optimization_darwin --define openmp=false --define quantize=true --define neon=true --config symbol_hidden || exit 1;
env: TYPE=Ops-Test
os: osx
osx_image: xcode7.2
- stage: Unit Test
script:
- echo "Ops Test Without NEON"
......@@ -80,6 +88,13 @@ jobs:
env: TYPE=Ops-Benchmark
os: osx
osx_image: xcode7.2
- stage: Unit Test
script:
- python tools/bazel_adb_run.py --target="//mace/ops:ops_benchmark" --run_target=False --enable_openmp=false --target_abis=host || exit 1;
- bazel build "//mace/ops:ops_benchmark" --config=ios --config=optimization_darwin --define openmp=false --define quantize=true --define neon=true --config symbol_hidden || exit 1;
env: TYPE=Ops-Benchmark
os: osx
osx_image: xcode7.2
- stage: Unit Test
script:
- DYNAMIC_LIB_PATH="bazel-bin/mace/libmace/libmace.so"
......@@ -114,7 +129,7 @@ jobs:
- python tools/bazel_adb_run.py --target="//mace/test:mace_api_test" --run_target=False --target_abis=armeabi-v7a || exit 1
- python tools/bazel_adb_run.py --target="//mace/test:mace_api_mt_test" --run_target=False --target_abis=armeabi-v7a || exit 1
- echo 'Extra Test'
- python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=False --target_abis=armeabi-v7a || exit 1
- python tools/bazel_adb_run.py --target="//mace/utils:utils_test" --run_target=False --target_abis=armeabi-v7a || exit 1
env: TYPE=Extra-Test-ARMEABI-v7a
os: linux
dist: xenial
......@@ -130,3 +145,26 @@ jobs:
os: linux
dist: xenial
sudo: required
- stage: Extra Test
script:
- bazel build "//mace/libmace:libmace_static" --config=darwin --config=optimization_darwin --define openmp=false --define quantize=true --config symbol_hidden || exit 1;
- bazel build "//mace/libmace:libmace_dynamic" --config=darwin --config=optimization_darwin --define openmp=false --define quantize=true --config symbol_hidden || exit 1;
env: TYPE=Build-Library
os: osx
osx_image: xcode7.2
- stage: Extra Test
script:
- bazel build "//mace/libmace:libmace_static" --config=ios --config=optimization_darwin --define openmp=false --define quantize=true --define neon=true --config symbol_hidden || exit 1;
- bazel build "//mace/libmace:libmace_dynamic" --config=ios --config=optimization_darwin --define openmp=false --define quantize=true --define neon=true --config symbol_hidden || exit 1;
env: TYPE=Build-Library
os: osx
osx_image: xcode7.2
- stage: Extra Test
script:
- echo 'API Test'
- python tools/bazel_adb_run.py --target="//mace/test:mace_api_test" --run_target=False --enable_openmp=false --target_abis=host || exit 1
- python tools/bazel_adb_run.py --target="//mace/test:mace_api_mt_test" --run_target=False --enable_openmp=false --target_abis=host || exit 1
- python tools/bazel_adb_run.py --target="//mace/utils:utils_test" --run_target=False --enable_openmp=false --target_abis=host || exit 1
env: TYPE=Extra-Test-darwin_x86_64
os: osx
osx_image: xcode7.2
......@@ -88,10 +88,10 @@ http_archive(
http_archive(
name = "tflite",
sha256 = "c886d46ad8c91fcafed2d910ad9e7bc5aeb29856c387bdf9b6b4903cc16e6e60",
sha256 = "1bb4571ee5cbde427ecfed076b39edaad96ace897ab86bb2495bdb93c706b203",
strip_prefix = "tensorflow-mace-ffc8cc7e8c9d1894753509e88b17e251bc6255e3",
urls = [
"https://cnbj1.fds.api.xiaomi.com/mace/third-party/tflite/tensorflow-mace-ffc8cc7e8c9d1894753509e88b17e251bc6255e3.zip",
"http://cnbj1.fds.api.xiaomi.com/mace/third-party/tflite/tensorflow-mace-ffc8cc7e8c9d1894753509e88b17e251bc6255e3_custom.zip",
],
)
......
......@@ -12,8 +12,8 @@ load(
"if_android",
"if_android_armv7",
"if_hexagon_enabled",
"if_hta_enabled",
"if_hexagon_or_hta_enabled",
"if_hta_enabled",
"if_neon_enabled",
"if_not_hexagon_enabled",
"if_opencl_enabled",
......
......@@ -17,9 +17,30 @@
// Do not include cl2.hpp directly, include this header instead.
#include "mace/port/port-arch.h"
#define CL_HPP_MINIMUM_OPENCL_VERSION 110
#ifdef MACE_OS_MAC
#define CL_HPP_TARGET_OPENCL_VERSION 120
#define CL_TARGET_OPENCL_VERSION 120
#else
#define CL_HPP_TARGET_OPENCL_VERSION 200
#define CL_TARGET_OPENCL_VERSION 200
#endif // MACE_OS_MAC
#ifdef MACE_OS_MAC
// disable deprecated warning in macOS 10.14
#define CL_SILENCE_DEPRECATION
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
#pragma GCC diagnostic ignored "-Wignored-attributes"
#endif // MACE_OS_MAC
#include "include/CL/cl2.hpp"
#ifdef MACE_OS_MAC
#pragma GCC diagnostic pop
#endif
#endif // MACE_CORE_RUNTIME_OPENCL_CL2_HEADER_H_
......@@ -154,16 +154,19 @@ const std::string OpenCLErrorToString(cl_int error) {
return "CL_INVALID_LINKER_OPTIONS";
case CL_INVALID_DEVICE_PARTITION_COUNT:
return "CL_INVALID_DEVICE_PARTITION_COUNT";
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
case CL_INVALID_PIPE_SIZE:
return "CL_INVALID_PIPE_SIZE";
case CL_INVALID_DEVICE_QUEUE:
return "CL_INVALID_DEVICE_QUEUE";
#endif
default:
return MakeString("UNKNOWN: ", error);
}
}
namespace {
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
void OpenCLPrintfCallback(const char *buffer,
size_t length,
size_t final,
......@@ -172,6 +175,7 @@ void OpenCLPrintfCallback(const char *buffer,
MACE_UNUSED(user_data);
fwrite(buffer, 1, length, stdout);
}
#endif
void GetAdrenoContextProperties(std::vector<cl_context_properties> *properties,
GPUPerfHint gpu_perf_hint,
......@@ -340,6 +344,7 @@ OpenCLRuntime::OpenCLRuntime(
new cl::Context({*device_}, context_properties.data(),
nullptr, nullptr, &err));
} else {
#if CL_HPP_TARGET_OPENCL_VERSION >= 200
if (is_profiling_enabled_ && gpu_type_ == GPUType::MALI) {
std::vector<cl_context_properties> context_properties = {
CL_CONTEXT_PLATFORM, (cl_context_properties)default_platform(),
......@@ -353,6 +358,10 @@ OpenCLRuntime::OpenCLRuntime(
context_ = std::shared_ptr<cl::Context>(
new cl::Context({*device_}, nullptr, nullptr, nullptr, &err));
}
#else
context_ = std::shared_ptr<cl::Context>(
new cl::Context({*device_}, nullptr, nullptr, nullptr, &err));
#endif
}
if (err != CL_SUCCESS) {
LOG(ERROR) << "error: " << OpenCLErrorToString(err);
......
......@@ -17,14 +17,25 @@
#include <string>
#include <vector>
#include "mace/port/port-arch.h"
#include "mace/utils/logging.h"
/**
* Wrapper of OpenCL 2.0, based on file opencl20/CL/cl.h
*/
#ifdef MACE_OS_MAC
typedef cl_queue_properties_APPLE cl_queue_properties;
#endif
#if CL_HPP_TARGET_OPENCL_VERSION < 200
#define CL_API_SUFFIX__VERSION_2_0
#endif
namespace mace {
namespace runtime {
class OpenCLLibrary final {
private:
OpenCLLibrary();
......
......@@ -17,6 +17,8 @@
#include <utility>
#include <vector>
#include "mace/utils/memory.h"
namespace mace {
ScratchImageManager::ScratchImageManager() = default;
......@@ -43,8 +45,7 @@ Image *ScratchImageManager::Spawn(
// if not found
if (found_image_idx == -1) {
reference_count_.push_back(0);
images_[image_count] =
std::move(std::unique_ptr<Image>(new Image(allocator)));
images_[image_count] = make_unique<Image>(allocator);
if (images_.at(image_count)->Allocate(shape, dt) !=
MaceStatus::MACE_SUCCESS) {
return nullptr;
......
......@@ -2,6 +2,7 @@
load(
"//mace:mace.bzl",
"if_android",
"if_darwin",
"if_hexagon_enabled",
"if_hta_enabled",
"if_opencl_enabled",
......@@ -18,10 +19,12 @@ cc_binary(
"-DMACE_ENABLE_OPENCL",
]),
linkopts = [
"-fuse-ld=gold",
"-lm",
"-ldl",
] + if_openmp_enabled([
] + if_darwin(
[],
default_value = ["-fuse-ld=gold"],
) + if_openmp_enabled([
"-fopenmp",
]) + if_android([
"-ldl",
......@@ -33,10 +36,11 @@ cc_binary(
"//external:gflags_nothreads",
"//mace/codegen:generated_mace_engine_factory",
"//mace/codegen:generated_libmace",
"//mace/utils:utils_hdrs",
] + if_opencl_enabled([
"//mace/codegen:generated_opencl_binary",
"//mace/codegen:generated_opencl_parameter",
"//mace/utils:utils_hdrs",
] + if_hexagon_enabled([
]) + if_hexagon_enabled([
"//third_party/nnlib:libhexagon",
]) + if_hta_enabled([
"//third_party/hta",
......@@ -54,10 +58,12 @@ cc_binary(
"-DMACE_ENABLE_OPENCL",
]),
linkopts = [
"-fuse-ld=gold",
"-lm",
"-ldl",
] + if_android([
] + if_darwin(
[],
default_value = ["-fuse-ld=gold"],
) + if_android([
"-ldl",
"-pie",
"-llog",
......@@ -67,8 +73,9 @@ cc_binary(
"//external:gflags_nothreads",
"//mace/codegen:generated_libmace",
"//mace/codegen:generated_mace_engine_factory",
"//mace/utils:utils_hdrs",
] + if_opencl_enabled([
"//mace/codegen:generated_opencl_binary",
"//mace/codegen:generated_opencl_parameter",
"//mace/utils:utils_hdrs",
],
]),
)
......@@ -15,7 +15,6 @@
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <malloc.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
......
......@@ -10,14 +10,14 @@ licenses(["notice"]) # Apache 2.0
load(
"//mace:mace.bzl",
"if_android",
"if_linux",
"if_darwin",
"if_neon_enabled",
"if_openmp_enabled",
"if_android_armv7",
"if_darwin",
"if_hexagon_enabled",
"if_hta_enabled",
"if_linux",
"if_neon_enabled",
"if_opencl_enabled",
"if_openmp_enabled",
"if_quantize_enabled",
)
......@@ -53,11 +53,14 @@ cc_library(
cc_binary(
name = "libmace.so",
linkopts = [
"-Wl,-soname,libmace.so",
"-Wl,--version-script",
"$(location //mace/libmace:mace_version_script.lds)",
] + if_openmp_enabled([
linkopts = if_darwin(
["-Wl,-install_name,libmace.so"],
[
"-Wl,-soname,libmace.so",
"-Wl,--version-script",
"$(location //mace/libmace:mace_version_script.lds)",
],
) + if_openmp_enabled([
"-fopenmp",
]),
linkshared = 1,
......@@ -96,6 +99,7 @@ genrule(
"//mace/public",
"//mace/utils",
"//mace/proto:mace_cc",
"//mace/port/linux_base:port_linux_base",
"@com_google_protobuf//:protobuf_lite",
] + if_android([
"//mace/port/android:port_android",
......@@ -103,6 +107,7 @@ genrule(
"//mace/port/linux:port_linux",
]) + if_darwin([
"//mace/port/darwin:port_darwin",
"//mace/port/darwin:darwin_ar_merge",
]) + if_opencl_enabled([
"//mace/ops:opencl_kernels",
"//mace/codegen:generated_opencl",
......@@ -130,10 +135,12 @@ genrule(
"$(locations //mace/port:port_base) " +
"$(locations //mace/port/posix:port_posix) " +
if_android(
"$(locations //mace/port/linux_base:port_linux_base) " +
"$(locations //mace/port/android:port_android) ",
default_value = "",
) +
if_linux(
"$(locations //mace/port/linux_base:port_linux_base) " +
"$(locations //mace/port/linux:port_linux) ",
default_value = "",
) +
......@@ -151,7 +158,10 @@ genrule(
) +
"$@ " +
"$$tmp_mri_file);" +
"$(AR) -M <$$tmp_mri_file;" +
if_darwin(
"bash $(locations //mace/port/darwin:darwin_ar_merge) $(AR) < $$tmp_mri_file;",
default_value = "$(AR) -M <$$tmp_mri_file;",
) +
"rm -rf $$tmp_mri_file;",
# "$(STRIP) -x $@;", # FIXME this will crash
tools = ["//mace/python/tools:archive_static_lib"],
......
......@@ -9,11 +9,11 @@ licenses(["notice"]) # Apache 2.0
load(
"//mace:mace.bzl",
"if_android",
"if_neon_enabled",
"if_openmp_enabled",
"if_android_armv7",
"if_hexagon_enabled",
"if_neon_enabled",
"if_opencl_enabled",
"if_openmp_enabled",
"if_quantize_enabled",
)
......@@ -55,7 +55,7 @@ cc_library(
cc_library(
name = "testing",
hdrs = [
"testing/test_utils.h",
"testing/test_utils.h",
],
copts = [
"-Werror",
......@@ -450,7 +450,9 @@ cc_test(
]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON",
]),
linkopts = ["-fopenmp"],
linkopts = if_openmp_enabled([
"-fopenmp",
]),
linkstatic = 1,
deps = [
":ops",
......@@ -485,12 +487,15 @@ cc_test(
]) + if_hexagon_enabled([
"-DMACE_ENABLE_HEXAGON",
]),
linkopts = ["-fopenmp"],
linkopts = if_openmp_enabled([
"-fopenmp",
]),
linkstatic = 1,
deps = [
":ops",
"//mace/benchmark:statistics",
"//mace/core:test_benchmark_main",
"//third_party/eigen3",
"@gemmlowp",
],
)
......@@ -67,10 +67,15 @@ void ReluBenchmark(int iters, int batch, int channels, int height, int width) {
} \
MACE_BENCHMARK(MACE_BM_RELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_RELU(N, C, H, W) \
MACE_BM_RELU_MACRO(N, C, H, W, float, CPU); \
MACE_BM_RELU_MACRO(N, C, H, W, float, GPU); \
MACE_BM_RELU_MACRO(N, C, H, W, half, GPU);
MACE_BM_RELU_MACRO(N, C, H, W, half, GPU)
#else
#define MACE_BM_RELU(N, C, H, W) \
MACE_BM_RELU_MACRO(N, C, H, W, float, CPU)
#endif
MACE_BM_RELU(1, 1, 512, 512);
MACE_BM_RELU(1, 3, 128, 128);
......@@ -123,10 +128,15 @@ void ReluxBenchmark(int iters, int batch, int channels, int height, int width) {
} \
MACE_BENCHMARK(MACE_BM_RELUX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_RELUX(N, C, H, W) \
MACE_BM_RELUX_MACRO(N, C, H, W, float, CPU); \
MACE_BM_RELUX_MACRO(N, C, H, W, float, GPU); \
MACE_BM_RELUX_MACRO(N, C, H, W, half, GPU);
MACE_BM_RELUX_MACRO(N, C, H, W, half, GPU)
#else
#define MACE_BM_RELUX(N, C, H, W) \
MACE_BM_RELUX_MACRO(N, C, H, W, float, CPU)
#endif
MACE_BM_RELUX(1, 1, 512, 512);
MACE_BM_RELUX(1, 3, 128, 128);
......@@ -182,10 +192,15 @@ void PreluBenchmark(int iters, int batch, int channels, int height, int width) {
} \
MACE_BENCHMARK(MACE_BM_PRELU_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_PRELU(N, C, H, W) \
MACE_BM_PRELU_MACRO(N, C, H, W, float, CPU); \
MACE_BM_PRELU_MACRO(N, C, H, W, float, GPU); \
MACE_BM_PRELU_MACRO(N, C, H, W, half, GPU);
MACE_BM_PRELU_MACRO(N, C, H, W, half, GPU)
#else
#define MACE_BM_PRELU(N, C, H, W) \
MACE_BM_PRELU_MACRO(N, C, H, W, float, CPU)
#endif
MACE_BM_PRELU(1, 1, 512, 512);
MACE_BM_PRELU(1, 3, 128, 128);
......@@ -237,10 +252,15 @@ void TanhBenchmark(int iters, int batch, int channels, int height, int width) {
} \
MACE_BENCHMARK(MACE_BM_TANH_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_TANH(N, C, H, W) \
MACE_BM_TANH_MACRO(N, C, H, W, float, CPU); \
MACE_BM_TANH_MACRO(N, C, H, W, float, GPU); \
MACE_BM_TANH_MACRO(N, C, H, W, half, GPU);
MACE_BM_TANH_MACRO(N, C, H, W, half, GPU)
#else
#define MACE_BM_TANH(N, C, H, W) \
MACE_BM_TANH_MACRO(N, C, H, W, float, CPU)
#endif
MACE_BM_TANH(1, 1, 512, 512);
MACE_BM_TANH(1, 3, 128, 128);
......@@ -293,10 +313,15 @@ void SigmoidBenchmark(
} \
MACE_BENCHMARK(MACE_BM_SIGMOID_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_SIGMOID(N, C, H, W) \
MACE_BM_SIGMOID_MACRO(N, C, H, W, float, CPU); \
MACE_BM_SIGMOID_MACRO(N, C, H, W, float, GPU); \
MACE_BM_SIGMOID_MACRO(N, C, H, W, half, GPU);
MACE_BM_SIGMOID_MACRO(N, C, H, W, half, GPU)
#else
#define MACE_BM_SIGMOID(N, C, H, W) \
MACE_BM_SIGMOID_MACRO(N, C, H, W, float, CPU)
#endif
MACE_BM_SIGMOID(1, 1, 512, 512);
MACE_BM_SIGMOID(1, 3, 128, 128);
......
......@@ -65,10 +65,15 @@ void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
MACE_BENCHMARK( \
MACE_BM_ADDN_##INPUTS##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_ADDN(INPUTS, N, H, W, C) \
MACE_BM_ADDN_MACRO(INPUTS, N, H, W, C, float, CPU); \
MACE_BM_ADDN_MACRO(INPUTS, N, H, W, C, float, GPU); \
MACE_BM_ADDN_MACRO(INPUTS, N, H, W, C, half, GPU);
#else
#define MACE_BM_ADDN(INPUTS, N, H, W, C) \
MACE_BM_ADDN_MACRO(INPUTS, N, H, W, C, float, CPU);
#endif
MACE_BM_ADDN(2, 1, 256, 256, 32);
MACE_BM_ADDN(2, 1, 128, 128, 32);
......
......@@ -80,10 +80,15 @@ void BatchNorm(
} \
MACE_BENCHMARK(MACE_BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_BATCH_NORM(N, C, H, W) \
MACE_BM_BATCH_NORM_MACRO(N, C, H, W, float, CPU); \
MACE_BM_BATCH_NORM_MACRO(N, C, H, W, float, GPU); \
MACE_BM_BATCH_NORM_MACRO(N, C, H, W, half, GPU);
#else
#define MACE_BM_BATCH_NORM(N, C, H, W) \
MACE_BM_BATCH_NORM_MACRO(N, C, H, W, float, CPU);
#endif
MACE_BM_BATCH_NORM(1, 1, 512, 512);
MACE_BM_BATCH_NORM(1, 3, 128, 128);
......
......@@ -64,9 +64,14 @@ void BMBatchToSpace(
MACE_BENCHMARK( \
MACE_BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_BATCH_TO_SPACE(N, H, W, C, ARG) \
MACE_BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, float, GPU); \
MACE_BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, float, CPU);
#else
#define MACE_BM_BATCH_TO_SPACE(N, H, W, C, ARG) \
MACE_BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, float, CPU);
#endif
MACE_BM_BATCH_TO_SPACE(128, 8, 8, 128, 2);
MACE_BM_BATCH_TO_SPACE(4, 128, 128, 32, 2);
......
......@@ -100,7 +100,7 @@ class BiasAddOp<DeviceType::GPU, T> : public Operation {
explicit BiasAddOp(OpConstructContext *context)
: Operation(context),
has_data_format_(Operation::GetOptionalArg<int>("has_data_format", 1)) {
MemoryType mem_type;
MemoryType mem_type = MemoryType::CPU_BUFFER;
if (context->device()->gpu_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE;
kernel_ = make_unique<opencl::image::BiasAddKernel<T>>();
......
......@@ -12,7 +12,6 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h"
......@@ -70,10 +69,15 @@ void BiasAdd(int iters, int batch, int channels, int height, int width) {
} \
MACE_BENCHMARK(MACE_BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_BIAS_ADD(N, C, H, W) \
MACE_BM_BIAS_ADD_MACRO(N, C, H, W, float, CPU); \
MACE_BM_BIAS_ADD_MACRO(N, C, H, W, float, GPU); \
MACE_BM_BIAS_ADD_MACRO(N, C, H, W, half, GPU);
#else
#define MACE_BM_BIAS_ADD(N, C, H, W) \
MACE_BM_BIAS_ADD_MACRO(N, C, H, W, float, CPU);
#endif
MACE_BM_BIAS_ADD(1, 1, 512, 512);
MACE_BM_BIAS_ADD(1, 3, 128, 128);
......
......@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/ops/opencl/buffer_transformer.h"
......@@ -96,3 +98,5 @@ MACE_BM_B2I(256, 32, 3, 3);
} // namespace test
} // namespace ops
} // namespace mace
#endif // MACE_ENABLE_OPENCL
......@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef MACE_ENABLE_OPENCL
#include "gtest/gtest.h"
#include "mace/ops/ops_test_util.h"
#include "mace/ops/opencl/buffer_transformer.h"
......@@ -242,3 +244,5 @@ TEST(BufferToImageTest, ArgStringHalfToHalfSmall) {
} // namespace test
} // namespace ops
} // namespace mace
#endif // MACE_ENABLE_OPENCL
......@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef MACE_ENABLE_OPENCL
#include <cstring>
#include "gtest/gtest.h"
......@@ -111,3 +113,6 @@ TEST_F(BufferTransformTest, Argument) {
} // namespace test
} // namespace ops
} // namespace mace
#endif // MACE_ENABLE_OPENCL
......@@ -67,10 +67,15 @@ void ChannelShuffle(
MACE_BENCHMARK( \
MACE_BM_CHANNEL_SHUFFLE_##N##_##C##_##H##_##W##_##G##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_CHANNEL_SHUFFLE(N, C, H, W, G) \
MACE_BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, CPU); \
MACE_BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, GPU); \
MACE_BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, half, GPU);
#else
#define MACE_BM_CHANNEL_SHUFFLE(N, C, H, W, G) \
MACE_BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, CPU);
#endif
MACE_BM_CHANNEL_SHUFFLE(1, 64, 64, 64, 8);
MACE_BM_CHANNEL_SHUFFLE(1, 64, 128, 128, 8);
......
......@@ -63,9 +63,14 @@ void ConcatHelper(int iters, int concat_dim, int dim0, int dim1) {
} \
MACE_BENCHMARK(MACE_BM_CONCAT_CPU_##AXIS##_##DIM0##_##DIM1##_##TYPE)
#ifdef MACE_ENABLE_QUANTIZE
#define MACE_BM_CONCAT_CPU(AXIS, DIM0, DIM1) \
MACE_BM_CONCAT_CPU_MACRO(AXIS, DIM0, DIM1, float); \
MACE_BM_CONCAT_CPU_MACRO(AXIS, DIM0, DIM1, uint8_t); \
MACE_BM_CONCAT_CPU_MACRO(AXIS, DIM0, DIM1, uint8_t)
#else
#define MACE_BM_CONCAT_CPU(AXIS, DIM0, DIM1) \
MACE_BM_CONCAT_CPU_MACRO(AXIS, DIM0, DIM1, float)
#endif
MACE_BM_CONCAT_CPU(0, 100, 1000);
MACE_BM_CONCAT_CPU(0, 100, 100000);
......@@ -73,6 +78,7 @@ MACE_BM_CONCAT_CPU(1, 100, 1000);
MACE_BM_CONCAT_CPU(1, 100, 100000);
MACE_BM_CONCAT_CPU(1, 1225, 128);
#ifdef MACE_ENABLE_OPENCL
namespace {
template <typename T>
void OpenCLConcatHelper(int iters,
......@@ -129,6 +135,8 @@ MACE_BM_CONCAT_OPENCL_MACRO(3, 32, 32, 64, half);
MACE_BM_CONCAT_OPENCL_MACRO(3, 32, 32, 128, half);
MACE_BM_CONCAT_OPENCL_MACRO(3, 32, 32, 256, half);
#endif // MACE_ENABLE_OPENCL
} // namespace test
} // namespace ops
} // namespace mace
......@@ -79,6 +79,7 @@ void Conv2d(int iters,
}
}
#ifdef MACE_ENABLE_QUANTIZE
template <>
void Conv2d<CPU, uint8_t>(int iters,
int batch,
......@@ -132,6 +133,7 @@ void Conv2d<CPU, uint8_t>(int iters,
net.Sync();
}
}
#endif
} // namespace
......@@ -167,12 +169,25 @@ void Conv2d<CPU, uint8_t>(int iters,
MACE_BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##D##\
DILATION##_##P##_##OC##_##TYPE##_##DEVICE)
#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_CONV_2D(N, C, H, W, KH, KW, S, D, P, OC) \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, CPU); \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, GPU); \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, half, GPU); \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, uint8_t, CPU);
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, uint8_t, CPU)
#elif defined(MACE_ENABLE_OPENCL)
#define MACE_BM_CONV_2D(N, C, H, W, KH, KW, S, D, P, OC) \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, CPU); \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, GPU); \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, half, GPU)
#elif defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_CONV_2D(N, C, H, W, KH, KW, S, D, P, OC) \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, CPU); \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, uint8_t, CPU)
#else
#define MACE_BM_CONV_2D(N, C, H, W, KH, KW, S, D, P, OC) \
MACE_BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, CPU)
#endif
// Filter sizes and data alignments
......
......@@ -25,7 +25,9 @@ namespace test {
class Conv2dOpTest : public OpsTestBase {
protected:
virtual void SetUp() {
#ifdef MACE_ENABLE_OPENCL
OpTestContext::Get()->SetOCLImageTestFlag();
#endif
}
};
......
......@@ -41,10 +41,12 @@ void CropHelper(int iters,
auto input_shape1 = TransposeShape<index_t, index_t>(shape1, {0, 3, 1, 2});
net.AddRandomInput<D, float>("Input0", input_shape0);
net.AddRandomInput<D, float>("Input1", input_shape1);
#ifdef MACE_ENABLE_OPENCL
} else if (D == DeviceType::GPU) {
// Add input data
net.AddRandomInput<D, T>("Input0", shape0);
net.AddRandomInput<D, T>("Input1", shape1);
#endif // MACE_ENABLE_OPENCL
} else {
MACE_NOT_IMPLEMENTED;
}
......@@ -85,17 +87,21 @@ void CropHelper(int iters,
MACE_BENCHMARK(MACE_BM_CROP_##N##_##H##_##W##_##C##_##AXIS##_##OFFSET\
##_##DEVICE##_##TYPE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_CROP(N, H, W, C, AXIS, OFFSET) \
MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, CPU, float); \
MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, GPU, float); \
MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, GPU, half);
MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, GPU, half)
#else
#define MACE_BM_CROP(N, H, W, C, AXIS, OFFSET) \
MACE_BM_CROP_MACRO(N, H, W, C, AXIS, OFFSET, CPU, float)
#endif // MACE_ENABLE_OPENCL
MACE_BM_CROP(4, 32, 32, 32, 2, 4);
MACE_BM_CROP(8, 32, 32, 64, 1, 0);
MACE_BM_CROP(8, 32, 32, 128, 0, 0);
MACE_BM_CROP(8, 32, 32, 256, 2, 4);
} // namespace test
} // namespace ops
} // namespace mace
......@@ -89,8 +89,6 @@ static void Deconv2d(int iters,
##OW##_##P##_##OC##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
int64_t oh = OH; \
int64_t ow = OW; \
const int64_t macs = \
static_cast<int64_t>(iters) * mace::benchmark::StatMACs( \
"Deconv2D", {OC, C, KH, KW}, {N, OH, OW, OC}); \
......@@ -104,10 +102,15 @@ static void Deconv2d(int iters,
##OW##_##P##_##OC##_##TYPE##_##DEVICE)
// TODO(liutuo): add cpu benchmark when optimized.
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_DECONV_2D(N, C, H, W, KH, KW, S, OH, OW, P, OC) \
MACE_BM_DECONV_2D_MACRO(N, C, H, W, KH, KW, S, OH, OW, P, OC, float, CPU); \
MACE_BM_DECONV_2D_MACRO(N, C, H, W, KH, KW, S, OH, OW, P, OC, float, GPU); \
MACE_BM_DECONV_2D_MACRO(N, C, H, W, KH, KW, S, OH, OW, P, OC, half, GPU);
MACE_BM_DECONV_2D_MACRO(N, C, H, W, KH, KW, S, OH, OW, P, OC, half, GPU)
#else
#define MACE_BM_DECONV_2D(N, C, H, W, KH, KW, S, OH, OW, P, OC) \
MACE_BM_DECONV_2D_MACRO(N, C, H, W, KH, KW, S, OH, OW, P, OC, float, CPU)
#endif
MACE_BM_DECONV_2D(1, 32, 60, 60, 1, 1, 1, 60, 60, VALID, 128);
......
......@@ -68,10 +68,15 @@ void DepthToSpace(
MACE_BENCHMARK( \
MACE_BM_DEPTH_TO_SPACE_##N##_##C##_##H##_##W##_##G##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_DEPTH_TO_SPACE(N, C, H, W, G) \
MACE_BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, float, CPU); \
MACE_BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, float, GPU); \
MACE_BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, half, GPU);
#else
#define MACE_BM_DEPTH_TO_SPACE(N, C, H, W, G) \
MACE_BM_DEPTH_TO_SPACE_MACRO(N, C, H, W, G, float, CPU)
#endif
MACE_BM_DEPTH_TO_SPACE(1, 64, 64, 64, 4);
MACE_BM_DEPTH_TO_SPACE(1, 64, 128, 128, 4);
......
......@@ -128,11 +128,25 @@ void DepthwiseConv2d(int iters,
MACE_BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE\
##_##P##_##M##_##TYPE##_##DEVICE)
#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, M) \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, CPU); \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, GPU); \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, half, GPU); \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, uint8_t, CPU);
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, uint8_t, CPU)
#elif defined(MACE_ENABLE_OPENCL)
#define MACE_BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, M) \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, CPU); \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, GPU); \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, half, GPU)
#elif defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, M) \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, CPU); \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, uint8_t, CPU)
#else
#define MACE_BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, M) \
MACE_BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, CPU)
#endif
MACE_BM_DEPTHWISE_CONV_2D(1, 32, 112, 112, 3, 3, 1, SAME, 1);
MACE_BM_DEPTHWISE_CONV_2D(1, 32, 56, 56, 3, 3, 2, VALID, 1);
......
......@@ -93,10 +93,15 @@ static void DepthwiseDeconv2d(int iters,
MACE_BM_DEPTHWISE_DECONV2D_##N##_##C##_##H##_##W##_##KH##_##KW##_##S##_##P\
##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_DEPTHWISE_DECONV2D(N, C, H, W, KH, KW, S, P) \
MACE_BM_DEPTHWISE_DECONV2D_MACRO(N, C, H, W, KH, KW, S, P, float, CPU); \
MACE_BM_DEPTHWISE_DECONV2D_MACRO(N, C, H, W, KH, KW, S, P, float, GPU); \
MACE_BM_DEPTHWISE_DECONV2D_MACRO(N, C, H, W, KH, KW, S, P, half, GPU);
#else
#define MACE_BM_DEPTHWISE_DECONV2D(N, C, H, W, KH, KW, S, P) \
MACE_BM_DEPTHWISE_DECONV2D_MACRO(N, C, H, W, KH, KW, S, P, float, CPU)
#endif
MACE_BM_DEPTHWISE_DECONV2D(1, 128, 15, 15, 1, 1, 1, 0);
MACE_BM_DEPTHWISE_DECONV2D(1, 32, 60, 60, 1, 1, 1, 0);
......
......@@ -80,10 +80,15 @@ void EltwiseBenchmark(
MACE_BENCHMARK( \
MACE_BM_ELTWISE_##ELT_TYPE##_##N##_##H##_##W##_##C##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_ELTWISE(ELT_TYPE, N, H, W, C) \
MACE_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, CPU); \
MACE_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, GPU); \
MACE_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, half, GPU);
MACE_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, half, GPU)
#else
#define MACE_BM_ELTWISE(ELT_TYPE, N, H, W, C) \
MACE_BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, CPU)
#endif
MACE_BM_ELTWISE(2, 1, 128, 128, 32);
MACE_BM_ELTWISE(2, 1, 240, 240, 256);
......@@ -93,8 +98,10 @@ MACE_BM_ELTWISE(0, 1, 240, 240, 256);
MACE_BM_ELTWISE(5, 1, 128, 128, 32);
MACE_BM_ELTWISE(5, 1, 240, 240, 256);
#ifdef MACE_ENABLE_QUANTIZE
MACE_BM_ELTWISE_MACRO(0, 1, 128, 128, 32, uint8_t, CPU);
MACE_BM_ELTWISE_MACRO(1, 1, 128, 128, 32, uint8_t, CPU);
#endif
} // namespace test
} // namespace ops
......
......@@ -185,7 +185,7 @@ class FullyConnectedOp<DeviceType::GPU, T> : public FullyConnectedOpBase {
public:
explicit FullyConnectedOp(OpConstructContext *context)
: FullyConnectedOpBase(context) {
MemoryType mem_type;
MemoryType mem_type = MemoryType::CPU_BUFFER;
if (context->device()->gpu_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE;
kernel_ = make_unique<opencl::image::FullyConnectedKernel<T>>();
......
......@@ -41,13 +41,15 @@ void FCBenchmark(
{out_channel, channel, height, width}, true);
net.AddRandomInput<D, float>("Bias", {out_channel}, true);
OpenCLBufferType weight_type = OpenCLBufferType::WEIGHT_WIDTH;
OpDefBuilder("FullyConnected", "FullyConnectedTest")
.Input("Input")
.Input("Weight")
.Input("Bias")
.Output("Output")
.AddIntArg("weight_type", static_cast<int>(weight_type))
#ifdef MACE_ENABLE_OPENCL
.AddIntArg("weight_type",
static_cast<int>(OpenCLBufferType::WEIGHT_WIDTH))
#endif
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
......@@ -64,6 +66,7 @@ void FCBenchmark(
net.Sync();
}
#ifdef MACE_ENABLE_QUANTIZE
template <>
void FCBenchmark<CPU, uint8_t>(
int iters, int batch, int height, int width, int channel, int out_channel) {
......@@ -100,6 +103,8 @@ void FCBenchmark<CPU, uint8_t>(
net.Run();
}
}
#endif // MACE_ENABLE_QUANTIZE
} // namespace
#define MACE_BM_FC_MACRO(N, H, W, C, OC, TYPE, DEVICE) \
......@@ -116,11 +121,25 @@ void FCBenchmark<CPU, uint8_t>(
} \
MACE_BENCHMARK(MACE_BM_FC_##N##_##H##_##W##_##C##_##OC##_##TYPE##_##DEVICE)
#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_FC(N, H, W, C, OC) \
MACE_BM_FC_MACRO(N, H, W, C, OC, float, CPU); \
MACE_BM_FC_MACRO(N, H, W, C, OC, float, GPU); \
MACE_BM_FC_MACRO(N, H, W, C, OC, half, GPU); \
MACE_BM_FC_MACRO(N, H, W, C, OC, uint8_t, CPU);
MACE_BM_FC_MACRO(N, H, W, C, OC, uint8_t, CPU)
#elif defined(MACE_ENABLE_OPENCL)
#define MACE_BM_FC(N, H, W, C, OC) \
MACE_BM_FC_MACRO(N, H, W, C, OC, float, CPU); \
MACE_BM_FC_MACRO(N, H, W, C, OC, float, GPU); \
MACE_BM_FC_MACRO(N, H, W, C, OC, half, GPU)
#elif defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_FC(N, H, W, C, OC) \
MACE_BM_FC_MACRO(N, H, W, C, OC, float, CPU); \
MACE_BM_FC_MACRO(N, H, W, C, OC, uint8_t, CPU)
#else
#define MACE_BM_FC(N, H, W, C, OC) \
MACE_BM_FC_MACRO(N, H, W, C, OC, float, CPU)
#endif
MACE_BM_FC(1, 16, 16, 32, 32);
MACE_BM_FC(1, 8, 8, 32, 1000);
......
......@@ -90,10 +90,15 @@ void LSTMCell(int iters, int batch, int input_size, int hidden_units) {
MACE_BENCHMARK( \
MACE_BM_LSTMCELL_##N##_##INPUT_SIZE##_##HIDDEN_UNITS##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_LSTMCELL(N, INPUT_SIZE, HIDDEN_UNITS) \
MACE_BM_LSTMCELL_MACRO(N, INPUT_SIZE, HIDDEN_UNITS, float, CPU); \
MACE_BM_LSTMCELL_MACRO(N, INPUT_SIZE, HIDDEN_UNITS, float, GPU); \
MACE_BM_LSTMCELL_MACRO(N, INPUT_SIZE, HIDDEN_UNITS, half, GPU);
MACE_BM_LSTMCELL_MACRO(N, INPUT_SIZE, HIDDEN_UNITS, half, GPU)
#else
#define MACE_BM_LSTMCELL(N, INPUT_SIZE, HIDDEN_UNITS) \
MACE_BM_LSTMCELL_MACRO(N, INPUT_SIZE, HIDDEN_UNITS, float, CPU)
#endif
MACE_BM_LSTMCELL(1, 64, 256);
MACE_BM_LSTMCELL(30, 64, 256);
......
......@@ -106,6 +106,7 @@ void MatmulBenchmark_Eigen(int iters, int m, int k, int n) {
}
}
#ifdef MACE_ENABLE_QUANTIZE
void MatmulBenchmark_gemmlowp_uint8(int iters, int rows, int depth, int cols) {
mace::testing::StopTiming();
......@@ -181,6 +182,7 @@ void MatmulBenchmark_gemmlowp_int32(int iters, int rows, int depth, int cols) {
-128, output_pipeline);
}
}
#endif
} // namespace
......@@ -195,10 +197,16 @@ void MatmulBenchmark_gemmlowp_int32(int iters, int rows, int depth, int cols) {
} \
MACE_BENCHMARK(MACE_BM_MATMUL_##M##_##K##_##N##_##FUNC)
#ifdef MACE_ENABLE_QUANTIZE
#define MACE_BM_MATMUL(M, K, N) \
MACE_BM_MATMUL_FUNC(M, K, N, Eigen, float); \
MACE_BM_MATMUL_FUNC(M, K, N, gemmlowp_uint8, uint8_t); \
MACE_BM_MATMUL_FUNC(M, K, N, gemmlowp_int32, uint8_t);
#else
#define MACE_BM_MATMUL(M, K, N) \
MACE_BM_MATMUL_FUNC(M, K, N, Eigen, float)
#endif
// Embedding size 384
MACE_BM_MATMUL(7, 384, 384);
......@@ -247,7 +255,7 @@ MACE_BM_MATMUL(512, 512, 196);
MACE_BM_MATMUL(1024, 1024, 49);
namespace {
template <DeviceType D, typename T>
template<DeviceType D, typename T>
void MatMulBenchmark(
int iters, int batch, int height, int channels, int out_width) {
mace::testing::StopTiming();
......@@ -289,7 +297,7 @@ void MatMulBenchmark(
net.Sync();
}
template <DeviceType D, typename T>
template<DeviceType D, typename T>
void MatMulTransposeBenchmark(
int iters, int batch, int height, int channels, int out_width) {
mace::testing::StopTiming();
......@@ -349,9 +357,14 @@ void MatMulTransposeBenchmark(
} \
MACE_BENCHMARK(MACE_BM_MATMUL_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_QUANTIZE
#define MACE_BM_MATMUL_OP(N, H, C, W) \
MACE_BM_MATMUL_MACRO(N, H, C, W, float, CPU); \
MACE_BM_MATMUL_MACRO(N, H, C, W, uint8_t, CPU);
MACE_BM_MATMUL_MACRO(N, H, C, W, uint8_t, CPU)
#else
#define MACE_BM_MATMUL_OP(N, H, C, W) \
MACE_BM_MATMUL_MACRO(N, H, C, W, float, CPU)
#endif
#define MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, TYPE, DEVICE) \
static void MACE_BM_MATMUL_##T_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE( \
......@@ -365,9 +378,14 @@ void MatMulTransposeBenchmark(
} \
MACE_BENCHMARK(MACE_BM_MATMUL_##T_##N##_##H##_##C##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_QUANTIZE
#define MACE_BM_MATMUL_TRANPOSE(N, H, C, W) \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float, CPU); \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, uint8_t, CPU);
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, uint8_t, CPU)
#else
#define MACE_BM_MATMUL_TRANPOSE(N, H, C, W) \
MACE_BM_MATMUL_TRANSPOSE_MACRO(N, H, C, W, float, CPU)
#endif
MACE_BM_MATMUL_OP(1, 30000, 256, 1);
MACE_BM_MATMUL_OP(1, 128, 256, 128);
......
......@@ -20,7 +20,6 @@ namespace mace {
namespace ops {
namespace test {
OpDefBuilder::OpDefBuilder(const char *type, const std::string &name) {
op_def_.set_type(type);
op_def_.set_name(name);
......@@ -102,11 +101,13 @@ void OpDefBuilder::Finalize(OperatorDef *op_def) const {
}
namespace {
#ifdef MACE_ENABLE_OPENCL
std::string GetStoragePathFromEnv() {
char *storage_path_str = getenv("MACE_INTERNAL_STORAGE_PATH");
if (storage_path_str == nullptr) return "";
return storage_path_str;
}
#endif
} // namespace
OpTestContext *OpTestContext::Get(int num_threads,
......@@ -120,27 +121,35 @@ OpTestContext *OpTestContext::Get(int num_threads,
OpTestContext::OpTestContext(int num_threads,
CPUAffinityPolicy cpu_affinity_policy,
#ifdef MACE_ENABLE_OPENCL
bool use_gemmlowp)
: gpu_context_(std::make_shared<GPUContext>(GetStoragePathFromEnv())),
opencl_mem_types_({MemoryType::GPU_IMAGE}) {
#else
bool use_gemmlowp) {
#endif
device_map_[DeviceType::CPU] = make_unique<CPUDevice>(
num_threads, cpu_affinity_policy, use_gemmlowp);
#ifdef MACE_ENABLE_OPENCL
device_map_[DeviceType::GPU] = make_unique<GPUDevice>(
gpu_context_->opencl_tuner(),
gpu_context_->opencl_cache_storage(),
GPUPriorityHint::PRIORITY_NORMAL,
GPUPerfHint::PERF_HIGH);
}
std::shared_ptr<GPUContext> OpTestContext::gpu_context() const {
return gpu_context_;
#endif // MACE_ENABLE_OPENCL
}
Device *OpTestContext::GetDevice(DeviceType device_type) {
return device_map_[device_type].get();
}
#ifdef MACE_ENABLE_OPENCL
std::shared_ptr<GPUContext> OpTestContext::gpu_context() const {
return gpu_context_;
}
std::vector<MemoryType> OpTestContext::opencl_mem_types() {
return opencl_mem_types_;
}
......@@ -156,6 +165,7 @@ void OpTestContext::SetOCLImageTestFlag() {
void OpTestContext::SetOCLImageAndBufferTestFlag() {
opencl_mem_types_ = {MemoryType::GPU_IMAGE, MemoryType::GPU_BUFFER};
}
#endif // MACE_ENABLE_OPENCL
bool OpsTestNet::Setup(mace::DeviceType device) {
NetDef net_def;
......@@ -227,6 +237,7 @@ MaceStatus OpsTestNet::Run() {
MaceStatus OpsTestNet::RunOp(mace::DeviceType device) {
if (device == DeviceType::GPU) {
#ifdef MACE_ENABLE_OPENCL
auto opencl_mem_types = OpTestContext::Get()->opencl_mem_types();
for (auto type : opencl_mem_types) {
OpTestContext::Get()->GetDevice(device)
......@@ -235,6 +246,9 @@ MaceStatus OpsTestNet::RunOp(mace::DeviceType device) {
MACE_RETURN_IF_ERROR(Run());
}
return MaceStatus::MACE_SUCCESS;
#else
return MaceStatus::MACE_UNSUPPORTED;
#endif
} else {
Setup(device);
return Run();
......
......@@ -28,8 +28,6 @@
#include "gtest/gtest.h"
#include "mace/core/net.h"
#include "mace/core/device_context.h"
#include "mace/core/runtime/opencl/gpu_device.h"
#include "mace/core/runtime/opencl/opencl_util.h"
#include "mace/core/tensor.h"
#include "mace/core/workspace.h"
#include "mace/ops/ops_registry.h"
......@@ -39,6 +37,11 @@
#include "mace/utils/quantize.h"
#include "mace/ops/testing/test_utils.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/gpu_device.h"
#include "mace/core/runtime/opencl/opencl_util.h"
#endif
namespace mace {
namespace ops {
namespace test {
......@@ -78,21 +81,28 @@ class OpTestContext {
int num_threads = -1,
CPUAffinityPolicy cpu_affinity_policy = AFFINITY_BIG_ONLY,
bool use_gemmlowp = true);
std::shared_ptr<GPUContext> gpu_context() const;
Device *GetDevice(DeviceType device_type);
#ifdef MACE_ENABLE_OPENCL
std::shared_ptr<GPUContext> gpu_context() const;
std::vector<MemoryType> opencl_mem_types();
void SetOCLBufferTestFlag();
void SetOCLImageTestFlag();
void SetOCLImageAndBufferTestFlag();
#endif
private:
OpTestContext(int num_threads,
CPUAffinityPolicy cpu_affinity_policy,
bool use_gemmlowp);
MACE_DISABLE_COPY_AND_ASSIGN(OpTestContext);
std::map<DeviceType, std::unique_ptr<Device>> device_map_;
#ifdef MACE_ENABLE_OPENCL
std::shared_ptr<GPUContext> gpu_context_;
std::vector<MemoryType> opencl_mem_types_;
std::map<DeviceType, std::unique_ptr<Device>> device_map_;
#endif
};
class OpsTestNet {
......@@ -420,7 +430,9 @@ class OpsTestBase : public ::testing::Test {
}
virtual void TearDown() {
#ifdef MACE_ENABLE_OPENCL
OpTestContext::Get()->SetOCLImageTestFlag();
#endif
}
};
......
......@@ -71,10 +71,15 @@ void Pad(int iters, int batch, int height,
MACE_BENCHMARK(MACE_BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##MODE##_##TYPE \
##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_PAD_MODE(N, H, W, C, PAD, MODE) \
MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, float, CPU); \
MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, float, GPU); \
MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, half, GPU);
MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, half, GPU)
#else
#define MACE_BM_PAD_MODE(N, H, W, C, PAD, MODE) \
MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, float, CPU)
#endif
#define MACE_BM_PAD(N, H, W, C, PAD) \
MACE_BM_PAD_MODE(N, H, W, C, PAD, CONSTANT); \
......
......@@ -89,11 +89,25 @@ void Pooling(int iters,
MACE_BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_\
##TYPE##_##DEVICE)
#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_POOLING(N, C, H, W, K, S, PA, PO) \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, CPU); \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, GPU); \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, half, GPU); \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, uint8_t, CPU);
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, uint8_t, CPU)
#elif defined(MACE_ENABLE_OPENCL)
#define MACE_BM_POOLING(N, C, H, W, K, S, PA, PO) \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, CPU); \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, GPU); \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, half, GPU)
#elif defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_POOLING(N, C, H, W, K, S, PA, PO) \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, CPU); \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, uint8_t, CPU)
#else
#define MACE_BM_POOLING(N, C, H, W, K, S, PA, PO) \
MACE_BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, float, CPU)
#endif
MACE_BM_POOLING(1, 3, 129, 129, 2, 2, SAME, MAX);
......
......@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef MACE_ENABLE_QUANTIZE
#include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h"
......@@ -113,3 +115,5 @@ MACE_BM_DEQUANTIZE(1470000);
} // namespace test
} // namespace ops
} // namespace mace
#endif // MACE_ENABLE_QUANTIZE
......@@ -67,10 +67,15 @@ void Reduce(int iters, int batch, int channels,
MACE_BENCHMARK( \
MACE_BM_REDUCE_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_REDUCE(N, C, H, W) \
MACE_BM_REDUCE_MACRO(N, C, H, W, float, GPU); \
MACE_BM_REDUCE_MACRO(N, C, H, W, half, GPU); \
MACE_BM_REDUCE_MACRO(N, C, H, W, float, CPU);
MACE_BM_REDUCE_MACRO(N, C, H, W, float, CPU)
#else
#define MACE_BM_REDUCE(N, C, H, W) \
MACE_BM_REDUCE_MACRO(N, C, H, W, float, CPU)
#endif
MACE_BM_REDUCE(1, 1, 512, 512);
......
......@@ -82,10 +82,15 @@ void ResizeBicubicBenchmark(int iters,
MACE_BM_RESIZE_BICUBIC_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_\
##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_RESIZE_BICUBIC(N, C, H0, W0, H1, W1) \
MACE_BM_RESIZE_BICUBIC_MACRO(N, C, H0, W0, H1, W1, float, CPU); \
MACE_BM_RESIZE_BICUBIC_MACRO(N, C, H0, W0, H1, W1, float, GPU); \
MACE_BM_RESIZE_BICUBIC_MACRO(N, C, H0, W0, H1, W1, half, GPU);
MACE_BM_RESIZE_BICUBIC_MACRO(N, C, H0, W0, H1, W1, half, GPU)
#else
#define MACE_BM_RESIZE_BICUBIC(N, C, H0, W0, H1, W1) \
MACE_BM_RESIZE_BICUBIC_MACRO(N, C, H0, W0, H1, W1, float, CPU)
#endif
MACE_BM_RESIZE_BICUBIC(1, 128, 120, 120, 480, 480);
MACE_BM_RESIZE_BICUBIC(1, 256, 7, 7, 15, 15);
......
......@@ -88,11 +88,25 @@ void ResizeBilinearBenchmark(int iters,
MACE_BM_RESIZE_BILINEAR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_\
##DEVICE)
#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1) \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, uint8_t, CPU); \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, GPU); \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, half, GPU);
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, half, GPU)
#elif defined(MACE_ENABLE_OPENCL)
#define MACE_BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1) \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, GPU); \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, half, GPU)
#elif defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1) \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, uint8_t, CPU)
#else
#define MACE_BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1) \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, CPU)
#endif
MACE_BM_RESIZE_BILINEAR(1, 128, 120, 120, 480, 480);
MACE_BM_RESIZE_BILINEAR(1, 256, 7, 7, 15, 15);
......
......@@ -87,10 +87,15 @@ void ResizeNearestNeighborBenchmark(int iters,
MACE_BM_RESIZE_NEAREST_NEIGHBOR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_\
##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_RESIZE_NEAREST_NEIGHBOR(N, C, H0, W0, H1, W1) \
MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \
MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, float, GPU); \
MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, half, GPU);
MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, half, GPU)
#else
#define MACE_BM_RESIZE_NEAREST_NEIGHBOR(N, C, H0, W0, H1, W1) \
MACE_BM_RESIZE_NEAREST_NEIGHBOR_MACRO(N, C, H0, W0, H1, W1, float, CPU)
#endif
MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 128, 120, 120, 480, 480);
MACE_BM_RESIZE_NEAREST_NEIGHBOR(1, 256, 7, 7, 15, 15);
......
......@@ -51,8 +51,6 @@ void Reverse(int iters, int batch, int channels, int height, int width) {
#define MACE_BM_REVERSE_MACRO(N, C, H, W, TYPE, DEVICE) \
static void MACE_BM_REVERSE_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t macs = \
static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
Reverse<DEVICE, TYPE>(iters, N, C, H, W); \
......
......@@ -57,6 +57,7 @@ void SoftmaxBenchmark(
net.Sync();
}
#ifdef MACE_ENABLE_QUANTIZE
template <>
void SoftmaxBenchmark<CPU, uint8_t>(
int iters, int batch, int channels, int height, int width) {
......@@ -80,6 +81,9 @@ void SoftmaxBenchmark<CPU, uint8_t>(
output->SetScale(0);
output->SetZeroPoint(1);
Tensor *input = net.GetTensor("Input");
input->SetScale(0.1);
// Warm-up
for (int i = 0; i < 2; ++i) {
net.Run();
......@@ -92,6 +96,8 @@ void SoftmaxBenchmark<CPU, uint8_t>(
}
net.Sync();
}
#endif // MACE_ENABLE_QUANTIZE
} // namespace
#define MACE_BM_SOFTMAX_MACRO(N, C, H, W, TYPE, DEVICE) \
......@@ -103,11 +109,25 @@ void SoftmaxBenchmark<CPU, uint8_t>(
} \
MACE_BENCHMARK(MACE_BM_SOFTMAX_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_SOFTMAX(N, C, H, W) \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, CPU); \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, uint8_t, CPU); \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, GPU); \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, half, GPU);
MACE_BM_SOFTMAX_MACRO(N, C, H, W, half, GPU)
#elif defined(MACE_ENABLE_OPENCL)
#define MACE_BM_SOFTMAX(N, C, H, W) \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, CPU); \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, GPU); \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, half, GPU)
#elif defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_SOFTMAX(N, C, H, W) \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, CPU); \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, uint8_t, CPU)
#else
#define MACE_BM_SOFTMAX(N, C, H, W) \
MACE_BM_SOFTMAX_MACRO(N, C, H, W, float, CPU)
#endif
MACE_BM_SOFTMAX(1, 2, 512, 512);
MACE_BM_SOFTMAX(1, 3, 512, 512);
......
......@@ -70,10 +70,23 @@ void BMSpaceToBatch(
MACE_BENCHMARK( \
MACE_BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE)
#if defined(MACE_ENABLE_OPENCL) && defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_SPACE_TO_BATCH(N, H, W, C, SHAPE) \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, GPU); \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, CPU); \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, uint8_t, CPU);
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, uint8_t, CPU)
#elif defined(MACE_ENABLE_OPENCL)
#define MACE_BM_SPACE_TO_BATCH(N, H, W, C, SHAPE) \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, GPU); \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, CPU)
#elif defined(MACE_ENABLE_QUANTIZE)
#define MACE_BM_SPACE_TO_BATCH(N, H, W, C, SHAPE) \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, CPU); \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, uint8_t, CPU)
#else
#define MACE_BM_SPACE_TO_BATCH(N, H, W, C, SHAPE) \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, CPU)
#endif
MACE_BM_SPACE_TO_BATCH(128, 16, 16, 128, 2);
MACE_BM_SPACE_TO_BATCH(1, 256, 256, 32, 2);
......
......@@ -68,10 +68,15 @@ void SpaceToDepth(
MACE_BENCHMARK( \
MACE_BM_SPACE_TO_DEPTH_##N##_##C##_##H##_##W##_##G##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_SPACE_TO_DEPTH(N, C, H, W, G) \
MACE_BM_SPACE_TO_DEPTH_MACRO(N, C, H, W, G, float, CPU); \
MACE_BM_SPACE_TO_DEPTH_MACRO(N, C, H, W, G, float, GPU); \
MACE_BM_SPACE_TO_DEPTH_MACRO(N, C, H, W, G, half, GPU);
MACE_BM_SPACE_TO_DEPTH_MACRO(N, C, H, W, G, half, GPU)
#else
#define MACE_BM_SPACE_TO_DEPTH(N, C, H, W, G) \
MACE_BM_SPACE_TO_DEPTH_MACRO(N, C, H, W, G, float, CPU)
#endif
MACE_BM_SPACE_TO_DEPTH(1, 64, 64, 64, 4);
MACE_BM_SPACE_TO_DEPTH(1, 64, 128, 128, 4);
......
......@@ -73,10 +73,15 @@ void BMSplitHelper(int iters,
MACE_BENCHMARK( \
MACE_BM_SPLIT_##N##_##H##_##W##_##C##_##NO##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_SPLIT(N, H, W, C, NO) \
MACE_BM_SPLIT_MACRO(N, H, W, C, NO, float, CPU); \
MACE_BM_SPLIT_MACRO(N, H, W, C, NO, float, GPU); \
MACE_BM_SPLIT_MACRO(N, H, W, C, NO, half, GPU);
MACE_BM_SPLIT_MACRO(N, H, W, C, NO, half, GPU)
#else
#define MACE_BM_SPLIT(N, H, W, C, NO) \
MACE_BM_SPLIT_MACRO(N, H, W, C, NO, float, CPU)
#endif
MACE_BM_SPLIT(1, 32, 32, 32, 2);
MACE_BM_SPLIT(1, 32, 32, 128, 2);
......
......@@ -13,7 +13,6 @@
// limitations under the License.
#include "mace/core/operator.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h"
......@@ -69,10 +68,15 @@ void SqrDiffMean(int iters, int batch, int channels,
MACE_BENCHMARK( \
MACE_BM_SQRDIFF_MEAN_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#ifdef MACE_ENABLE_OPENCL
#define MACE_BM_SQRDIFF_MEAN(N, C, H, W) \
MACE_BM_SQRDIFF_MEAN_MACRO(N, C, H, W, float, GPU); \
MACE_BM_SQRDIFF_MEAN_MACRO(N, C, H, W, half, GPU); \
MACE_BM_SQRDIFF_MEAN_MACRO(N, C, H, W, float, CPU);
MACE_BM_SQRDIFF_MEAN_MACRO(N, C, H, W, float, CPU)
#else
#define MACE_BM_SQRDIFF_MEAN(N, C, H, W) \
MACE_BM_SQRDIFF_MEAN_MACRO(N, C, H, W, float, CPU)
#endif
MACE_BM_SQRDIFF_MEAN(1, 1, 512, 512);
......
......@@ -48,7 +48,7 @@ cc_test(
linkstatic = 1,
deps = [
":port",
"@gtest//:gtest",
"@gtest",
"@gtest//:gtest_main",
],
)
......@@ -14,9 +14,8 @@ cc_library(
hdrs = if_android(glob([
"*.h",
])),
deps = [
"//mace/port:port_base",
"//mace/port/posix:port_posix",
],
deps = if_android([
"//mace/port/linux_base:port_linux_base",
]),
alwayslink = 1,
)
......@@ -43,51 +43,12 @@
namespace mace {
namespace port {
int64_t AndroidEnv::NowMicros() {
#ifdef __hexagon__
return HAP_perf_get_time_us();
#else
return mace::port::posix::NowMicros();
#endif
}
FileSystem *AndroidEnv::GetFileSystem() {
return &posix_file_system_;
}
LogWriter *AndroidEnv::GetLogWriter() {
return &log_writer_;
}
namespace {
int GetCPUCount() {
int cpu_count = 0;
std::string cpu_sys_conf = "/proc/cpuinfo";
std::ifstream f(cpu_sys_conf);
if (!f.is_open()) {
LOG(ERROR) << "failed to open " << cpu_sys_conf;
return -1;
}
std::string line;
const std::string processor_key = "processor";
while (std::getline(f, line)) {
if (line.size() >= processor_key.size()
&& line.compare(0, processor_key.size(), processor_key) == 0) {
++cpu_count;
}
}
if (f.bad()) {
LOG(ERROR) << "failed to read " << cpu_sys_conf;
}
if (!f.eof()) {
LOG(ERROR) << "failed to read end of " << cpu_sys_conf;
}
f.close();
VLOG(1) << "CPU cores: " << cpu_count;
return cpu_count;
}
struct BacktraceState {
void** current;
void** end;
......@@ -115,38 +76,6 @@ size_t BackTrace(void** buffer, size_t max) {
} // namespace
MaceStatus AndroidEnv::GetCPUMaxFreq(std::vector<float> *max_freqs) {
MACE_CHECK_NOTNULL(max_freqs);
int cpu_count = GetCPUCount();
if (cpu_count < 0) {
return MaceStatus::MACE_RUNTIME_ERROR;
}
for (int cpu_id = 0; cpu_id < cpu_count; ++cpu_id) {
std::string cpuinfo_max_freq_sys_conf = MakeString(
"/sys/devices/system/cpu/cpu",
cpu_id,
"/cpufreq/cpuinfo_max_freq");
std::ifstream f(cpuinfo_max_freq_sys_conf);
if (!f.is_open()) {
LOG(ERROR) << "failed to open " << cpuinfo_max_freq_sys_conf;
return MaceStatus::MACE_RUNTIME_ERROR;
}
std::string line;
if (std::getline(f, line)) {
float freq = strtof(line.c_str(), nullptr);
max_freqs->push_back(freq);
}
if (f.bad()) {
LOG(ERROR) << "failed to read " << cpuinfo_max_freq_sys_conf;
}
f.close();
}
VLOG(1) << "CPU freq: " << MakeString(*max_freqs);
return MaceStatus::MACE_SUCCESS;
}
MaceStatus AndroidEnv::SchedSetAffinity(const std::vector<size_t> &cpu_ids) {
// compute mask
cpu_set_t mask;
......
......@@ -20,18 +20,16 @@
#include <vector>
#include "mace/port/android/logger.h"
#include "mace/port/posix/file_system.h"
#include "mace/port/env.h"
#include "mace/port/linux_base/env.h"
#include "mace/port/posix/file_system.h"
namespace mace {
namespace port {
class AndroidEnv : public Env {
class AndroidEnv : public LinuxBaseEnv {
public:
int64_t NowMicros() override;
MaceStatus GetCPUMaxFreq(std::vector<float> *max_freqs) override;
MaceStatus SchedSetAffinity(const std::vector<size_t> &cpu_ids) override;
FileSystem *GetFileSystem() override;
LogWriter *GetLogWriter() override;
std::vector<std::string> GetBackTraceUnsafe(int max_steps) override;
std::unique_ptr<MallocLogger> NewMallocLogger(
......@@ -39,7 +37,6 @@ class AndroidEnv : public Env {
const std::string &name) override;
private:
PosixFileSystem posix_file_system_;
AndroidLogWriter log_writer_;
};
......
......@@ -20,3 +20,8 @@ cc_library(
],
alwayslink = 1,
)
sh_library(
name = "darwin_ar_merge",
srcs = ["ar_merge_on_darwin.sh"],
)
#!/usr/bin/env bash
# Copyright 2018 The MACE Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
output_file_path=""
object_files=""
workspace=`mktemp -d ./tmpd.XXXXXX`
ar_command=$1
while read script_line; do
command=""
lib_path=""
eval $(echo ${script_line} | awk -F" " \
'{printf("command=%s\nlib_path=%s", $1, $2);}')
upper_command=`echo ${command} | tr 'a-z' 'A-Z'`
if [[ ${upper_command} == "CREATE" ]]; then
output_file_path=${lib_path}
elif [[ ${upper_command} == "ADDLIB" ]]; then
lib_name=$(basename ${lib_path})
lib_dir=${workspace}"/"${lib_name}
mkdir ${lib_dir}
cp ${lib_path} ${lib_dir}
cur_path=`pwd`
cd ${lib_dir}
${cur_path}"/"${ar_command} -x ${lib_name}
object_files=${object_files}" "${lib_dir}"/*.o"
cd ${cur_path}
elif [[ ${upper_command} == "SAVE" ]]; then
${ar_command} -rcsu $output_file_path ${object_files}
elif [[ ${upper_command} == "END" ]]; then
echo "========== ar_merge_on_darwin end =========="
else
echo "error: Get an invalid input line: "$script_line
fi
done
......@@ -15,7 +15,10 @@
#include "mace/port/darwin/env.h"
#include <execinfo.h>
#include <stdint.h>
#include <sys/sysctl.h>
#include <sys/time.h>
#include <sys/types.h>
#include <cstddef>
#include <string>
......@@ -24,14 +27,37 @@
#include "mace/port/posix/backtrace.h"
#include "mace/port/posix/file_system.h"
#include "mace/port/posix/time.h"
#include "mace/utils/logging.h"
namespace mace {
namespace port {
namespace {
const char kCpuFrequencyMax[] = "hw.cpufrequency_max";
}
int64_t DarwinEnv::NowMicros() {
return mace::port::posix::NowMicros();
}
// TODO(luxuhui): this func is not accurate, darwin does not support
// acquiring CPU frequencies, we need to reconsider the CPU scheduling
// strategy.
MaceStatus DarwinEnv::GetCPUMaxFreq(std::vector<float> *max_freqs) {
MACE_CHECK_NOTNULL(max_freqs);
uint64_t freq = 0;
size_t size = sizeof(freq);
int ret = sysctlbyname(kCpuFrequencyMax, &freq, &size, NULL, 0);
if (ret < 0) {
LOG(ERROR) << "failed to get property: " << kCpuFrequencyMax;
return MaceStatus::MACE_RUNTIME_ERROR;
}
max_freqs->push_back(freq);
return MaceStatus::MACE_SUCCESS;
}
FileSystem *DarwinEnv::GetFileSystem() {
return &posix_file_system_;
}
......
......@@ -28,6 +28,7 @@ namespace port {
class DarwinEnv : public Env {
public:
int64_t NowMicros() override;
MaceStatus GetCPUMaxFreq(std::vector<float> *max_freqs) override;
FileSystem *GetFileSystem() override;
LogWriter *GetLogWriter() override;
std::vector<std::string> GetBackTraceUnsafe(int max_steps) override;
......
......@@ -14,9 +14,8 @@ cc_library(
hdrs = if_linux(glob([
"*.h",
])),
deps = [
"//mace/port:port_base",
"//mace/port/posix:port_posix",
],
deps = if_linux([
"//mace/port/linux_base:port_linux_base",
]),
alwayslink = 1,
)
......@@ -21,6 +21,7 @@
#include <string>
#include <vector>
#include "mace/port/env.h"
#include "mace/port/posix/backtrace.h"
#include "mace/port/posix/file_system.h"
#include "mace/port/posix/time.h"
......@@ -28,14 +29,6 @@
namespace mace {
namespace port {
int64_t LinuxEnv::NowMicros() {
return mace::port::posix::NowMicros();
}
FileSystem *LinuxEnv::GetFileSystem() {
return &posix_file_system_;
}
LogWriter *LinuxEnv::GetLogWriter() {
return &log_writer_;
}
......
......@@ -18,22 +18,18 @@
#include <string>
#include <vector>
#include "mace/port/env.h"
#include "mace/port/linux_base/env.h"
#include "mace/port/logger.h"
#include "mace/port/posix/file_system.h"
namespace mace {
namespace port {
class LinuxEnv : public Env {
class LinuxEnv : public LinuxBaseEnv {
public:
int64_t NowMicros() override;
FileSystem *GetFileSystem() override;
LogWriter *GetLogWriter() override;
std::vector<std::string> GetBackTraceUnsafe(int max_steps) override;
private:
PosixFileSystem posix_file_system_;
LogWriter log_writer_;
};
......
package(
default_visibility = ["//visibility:public"],
)
licenses(["notice"]) # Apache 2.0
cc_library(
name = "port_linux_base",
srcs = glob([
"*.cc",
]),
hdrs = glob([
"*.h",
]),
deps = [
"//mace/port:port_base",
"//mace/port/posix:port_posix",
],
alwayslink = 1,
)
// Copyright 2019 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "mace/port/linux_base/env.h"
#include <sys/time.h>
#include <cstddef>
#include <fstream>
#include <string>
#include <vector>
#include "mace/port/posix/file_system.h"
#include "mace/port/posix/time.h"
#include "mace/utils/logging.h"
namespace mace {
namespace port {
namespace {
int GetCPUCount() {
int cpu_count = 0;
std::string cpu_sys_conf = "/proc/cpuinfo";
std::ifstream f(cpu_sys_conf);
if (!f.is_open()) {
LOG(ERROR) << "failed to open " << cpu_sys_conf;
return -1;
}
std::string line;
const std::string processor_key = "processor";
while (std::getline(f, line)) {
if (line.size() >= processor_key.size()
&& line.compare(0, processor_key.size(), processor_key) == 0) {
++cpu_count;
}
}
if (f.bad()) {
LOG(ERROR) << "failed to read " << cpu_sys_conf;
}
if (!f.eof()) {
LOG(ERROR) << "failed to read end of " << cpu_sys_conf;
}
f.close();
VLOG(1) << "CPU cores: " << cpu_count;
return cpu_count;
}
} // namespace
int64_t LinuxBaseEnv::NowMicros() {
return mace::port::posix::NowMicros();
}
FileSystem *LinuxBaseEnv::GetFileSystem() {
return &posix_file_system_;
}
MaceStatus LinuxBaseEnv::GetCPUMaxFreq(std::vector<float> *max_freqs) {
MACE_CHECK_NOTNULL(max_freqs);
int cpu_count = GetCPUCount();
if (cpu_count < 0) {
return MaceStatus::MACE_RUNTIME_ERROR;
}
for (int cpu_id = 0; cpu_id < cpu_count; ++cpu_id) {
std::string cpuinfo_max_freq_sys_conf = MakeString(
"/sys/devices/system/cpu/cpu",
cpu_id,
"/cpufreq/cpuinfo_max_freq");
std::ifstream f(cpuinfo_max_freq_sys_conf);
if (!f.is_open()) {
LOG(ERROR) << "failed to open " << cpuinfo_max_freq_sys_conf;
return MaceStatus::MACE_RUNTIME_ERROR;
}
std::string line;
if (std::getline(f, line)) {
float freq = strtof(line.c_str(), nullptr);
max_freqs->push_back(freq);
}
if (f.bad()) {
LOG(ERROR) << "failed to read " << cpuinfo_max_freq_sys_conf;
}
f.close();
}
VLOG(1) << "CPU freq: " << MakeString(*max_freqs);
return MaceStatus::MACE_SUCCESS;
}
} // namespace port
} // namespace mace
// Copyright 2019 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MACE_PORT_LINUX_BASE_ENV_H_
#define MACE_PORT_LINUX_BASE_ENV_H_
#include <vector>
#include "mace/port/env.h"
#include "mace/port/posix/file_system.h"
namespace mace {
namespace port {
class LinuxBaseEnv : public Env {
public:
int64_t NowMicros() override;
MaceStatus GetCPUMaxFreq(std::vector<float> *max_freqs) override;
FileSystem *GetFileSystem() override;
protected:
PosixFileSystem posix_file_system_;
};
} // namespace port
} // namespace mace
#endif // MACE_PORT_LINUX_BASE_ENV_H_
......@@ -7,12 +7,12 @@ licenses(["notice"]) # Apache 2.0
load(
"//mace:mace.bzl",
"if_android",
"if_neon_enabled",
"if_openmp_enabled",
"if_android_armv7",
"if_hexagon_enabled",
"if_hta_enabled",
"if_neon_enabled",
"if_opencl_enabled",
"if_openmp_enabled",
"if_quantize_enabled",
)
......@@ -21,7 +21,11 @@ cc_library(
hdrs = [
"mace_api_test.h",
],
copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"],
copts = [
"-Werror",
"-Wextra",
"-Wno-missing-field-initializers",
],
)
cc_test(
......@@ -33,7 +37,7 @@ cc_test(
"-Wextra",
"-Wno-missing-field-initializers",
] + if_openmp_enabled([
"-fopenmp"
"-fopenmp",
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
......@@ -49,12 +53,14 @@ cc_test(
]) + if_hta_enabled([
"-DMACE_ENABLE_HTA",
]),
linkopts = ["-fopenmp"],
linkopts = if_openmp_enabled([
"-fopenmp",
]),
linkstatic = 1,
deps = [
":mace_api_test_header",
"//mace/libmace",
"//mace/ops:test",
"//mace/libmace:libmace",
"@gtest//:gtest_main",
],
)
......@@ -68,7 +74,7 @@ cc_test(
"-Wextra",
"-Wno-missing-field-initializers",
] + if_openmp_enabled([
"-fopenmp"
"-fopenmp",
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
......@@ -84,12 +90,14 @@ cc_test(
]) + if_hta_enabled([
"-DMACE_ENABLE_HTA",
]),
linkopts = ["-fopenmp"],
linkopts = if_openmp_enabled([
"-fopenmp",
]),
linkstatic = 1,
deps = [
":mace_api_test_header",
"//mace/libmace",
"//mace/ops:test",
"//mace/libmace:libmace",
"@gtest//:gtest_main",
],
)
......@@ -103,7 +111,7 @@ cc_test(
"-Wextra",
"-Wno-missing-field-initializers",
] + if_openmp_enabled([
"-fopenmp"
"-fopenmp",
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
......@@ -119,11 +127,13 @@ cc_test(
]) + if_hta_enabled([
"-DMACE_ENABLE_HTA",
]),
linkopts = ["-fopenmp"],
linkopts = if_openmp_enabled([
"-fopenmp",
]),
linkstatic = 1,
deps = [
"//mace/libmace",
"//mace/ops:test",
"//mace/libmace:libmace",
"@gtest//:gtest_main",
],
)
......@@ -137,7 +147,7 @@ cc_test(
"-Wextra",
"-Wno-missing-field-initializers",
] + if_openmp_enabled([
"-fopenmp"
"-fopenmp",
]) + if_neon_enabled([
"-DMACE_ENABLE_NEON",
]) + if_android_armv7([
......@@ -153,11 +163,13 @@ cc_test(
]) + if_hta_enabled([
"-DMACE_ENABLE_HTA",
]),
linkopts = ["-fopenmp"],
linkopts = if_openmp_enabled([
"-fopenmp",
]),
linkstatic = 1,
deps = [
"//mace/libmace",
"//mace/ops:test",
"//mace/libmace:libmace",
"@gtest//:gtest_main",
],
)
......@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef MACE_ENABLE_OPENCL
#include "mace/ops/ops_test_util.h"
namespace mace {
......@@ -40,3 +42,5 @@ TEST(MaceAPIExceptionTest, WrongInputTest) {
} // namespace test
} // namespace mace
#endif // MACE_ENABLE_OPENCL
......@@ -12,6 +12,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#ifdef MACE_ENABLE_OPENCL
#include <thread> // NOLINT(build/c++11)
#include "mace/test/mace_api_test.h"
......@@ -102,3 +104,5 @@ TEST_F(MaceMTAPITest, MultipleThread) {
} // namespace test
} // namespace mace
#endif // MACE_ENABLE_OPENCL
# Examples
load("//mace:mace.bzl", "if_openmp_enabled", "if_android", "if_opencl_enabled")
load(
"//mace:mace.bzl",
"if_android",
"if_darwin",
"if_opencl_enabled",
"if_openmp_enabled",
)
cc_binary(
name = "mace_run_static",
......@@ -10,9 +16,10 @@ cc_binary(
] + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
]),
linkopts = [
"-fuse-ld=gold",
] + if_openmp_enabled([
linkopts = if_darwin(
[],
default_value = ["-fuse-ld=gold"],
) + if_openmp_enabled([
"-fopenmp",
]),
linkstatic = 1,
......@@ -33,9 +40,10 @@ cc_binary(
] + if_opencl_enabled([
"-DMACE_ENABLE_OPENCL",
]),
linkopts = [
"-fuse-ld=gold",
] + if_openmp_enabled([
linkopts = if_darwin(
[],
default_value = ["-fuse-ld=gold"],
) + if_openmp_enabled([
"-fopenmp",
]),
linkstatic = 0,
......
......@@ -24,7 +24,6 @@
* --model_data_file=model_data.data \
* --device=GPU
*/
#include <malloc.h>
#include <stdint.h>
#include <cstdio>
#include <cstdlib>
......@@ -96,56 +95,6 @@ DataFormat ParseDataFormat(const std::string &data_format_str) {
}
}
struct mallinfo LogMallinfoChange(struct mallinfo prev) {
struct mallinfo curr = mallinfo();
if (prev.arena != curr.arena) {
LOG(INFO) << "Non-mmapped space allocated (bytes): " << curr.arena
<< ", diff: " << ((int64_t) curr.arena - (int64_t) prev.arena);
}
if (prev.ordblks != curr.ordblks) {
LOG(INFO) << "Number of free chunks: " << curr.ordblks
<< ", diff: "
<< ((int64_t) curr.ordblks - (int64_t) prev.ordblks);
}
if (prev.smblks != curr.smblks) {
LOG(INFO) << "Number of free fastbin blocks: " << curr.smblks
<< ", diff: " << ((int64_t) curr.smblks - (int64_t) prev.smblks);
}
if (prev.hblks != curr.hblks) {
LOG(INFO) << "Number of mmapped regions: " << curr.hblks
<< ", diff: " << ((int64_t) curr.hblks - (int64_t) prev.hblks);
}
if (prev.hblkhd != curr.hblkhd) {
LOG(INFO) << "Space allocated in mmapped regions (bytes): " << curr.hblkhd
<< ", diff: " << ((int64_t) curr.hblkhd - (int64_t) prev.hblkhd);
}
if (prev.usmblks != curr.usmblks) {
LOG(INFO) << "Maximum total allocated space (bytes): " << curr.usmblks
<< ", diff: "
<< ((int64_t) curr.usmblks - (int64_t) prev.usmblks);
}
if (prev.fsmblks != curr.fsmblks) {
LOG(INFO) << "Space in freed fastbin blocks (bytes): " << curr.fsmblks
<< ", diff: "
<< ((int64_t) curr.fsmblks - (int64_t) prev.fsmblks);
}
if (prev.uordblks != curr.uordblks) {
LOG(INFO) << "Total allocated space (bytes): " << curr.uordblks
<< ", diff: "
<< ((int64_t) curr.uordblks - (int64_t) prev.uordblks);
}
if (prev.fordblks != curr.fordblks) {
LOG(INFO) << "Total free space (bytes): " << curr.fordblks << ", diff: "
<< ((int64_t) curr.fordblks - (int64_t) prev.fordblks);
}
if (prev.keepcost != curr.keepcost) {
LOG(INFO) << "Top-most, releasable space (bytes): " << curr.keepcost
<< ", diff: "
<< ((int64_t) curr.keepcost - (int64_t) prev.keepcost);
}
return curr;
}
DEFINE_string(model_name,
"",
"model name in yaml");
......@@ -395,8 +344,14 @@ bool RunModel(const std::string &model_name,
if (FLAGS_round > 0) {
LOG(INFO) << "Run model";
int64_t total_run_duration = 0;
struct mallinfo prev = mallinfo();
for (int i = 0; i < FLAGS_round; ++i) {
std::unique_ptr<port::Logger> info_log;
std::unique_ptr<port::MallocLogger> malloc_logger;
if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) {
info_log = LOG_PTR(INFO);
malloc_logger = port::Env::Default()->NewMallocLogger(
info_log.get(), MakeString(i));
}
MaceStatus run_status;
while (true) {
int64_t t0 = NowMicros();
......@@ -436,10 +391,6 @@ bool RunModel(const std::string &model_name,
break;
}
}
if (FLAGS_malloc_check_cycle >= 1 && i % FLAGS_malloc_check_cycle == 0) {
LOG(INFO) << "=== check malloc info change #" << i << " ===";
prev = LogMallinfoChange(prev);
}
}
model_run_millis = total_run_duration / 1000.0 / FLAGS_round;
LOG(INFO) << "Average latency: " << model_run_millis << " ms";
......
......@@ -16,6 +16,7 @@
#define MACE_UTILS_LOGGING_H_
#include <limits>
#include <memory>
#include <sstream>
#include <string>
#include <vector>
......@@ -24,6 +25,7 @@
#include "mace/port/env.h"
#include "mace/port/logger.h"
#include "mace/utils/macros.h"
#include "mace/utils/memory.h"
#include "mace/utils/string_util.h"
......@@ -33,6 +35,9 @@ namespace logging_internal {
#define LOG(severity) \
::mace::port::Logger(__FILE__, __LINE__, mace::severity)
#define LOG_PTR(severity) \
make_unique<mace::port::Logger>(__FILE__, __LINE__, mace::severity)
#define VLOG_IS_ON(vll) (mace::ShouldGenerateVLogMessage(vll))
#define VLOG(vll) if (VLOG_IS_ON(vll)) LOG(INFO)
......
......@@ -24,25 +24,17 @@ build:linux --define linux=true
# MacOS host build, --config darwin
build:darwin --define darwin=true
# iOS and other darwin platforms, --config ios
build:ios --define darwin=true
build:ios --distinct_host_configuration=true
build:ios --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:ios --cpu=arm64
# Linux host build, --config linux
build:linux --define linux=true
# MacOS host build, --config darwin
build:darwin --define darwin=true
build:darwin --cpu=darwin_x86_64
build:darwin --copt -Wno-unused-lambda-capture
build:darwin --copt -Wno-missing-braces
# iOS and other darwin platforms, --config ios
build:ios --define darwin=true
build:ios --distinct_host_configuration=true
build:ios --host_crosstool_top=@bazel_tools//tools/cpp:toolchain
build:ios --cpu=ios_arm64
build:ios --copt -Wno-unused-lambda-capture
build:ios --copt -Wno-missing-braces
# Usage example: bazel build --config arm_linux_gnueabihf
# Used to fix library not find linking issue, see also:
......@@ -81,6 +73,12 @@ build:optimization --copt=-ffunction-sections
build:optimization --copt=-fdata-sections
build:optimization --linkopt=-Wl,--gc-sections
# Usage example: bazel build --config optimization_darwin
build:optimization_darwin --copt=-O3
build:optimization_darwin --copt=-ffunction-sections
build:optimization_darwin --copt=-fdata-sections
build:optimization_darwin --linkopt=-Wl,-dead_strip
# Usage example: bazel build --config symbol_hidden
build:symbol_hidden --copt=-fvisibility=hidden
......
......@@ -20,7 +20,6 @@
# --stdout_processor=stdout_processor
import argparse
import re
import sys
import sh_commands
......@@ -105,6 +104,11 @@ def parse_args():
type=str2bool,
default=True,
help="Whether to use neon optimization")
parser.add_argument(
"--enable_openmp",
type=str2bool,
default=True,
help="Disable openmp for multiple thread.")
parser.add_argument(
'--address_sanitizer',
action="store_true",
......@@ -140,7 +144,8 @@ def main(unused_args):
toolchain=toolchain,
enable_neon=FLAGS.enable_neon,
address_sanitizer=FLAGS.address_sanitizer,
debug_mode=FLAGS.debug_mode)
debug_mode=FLAGS.debug_mode,
enable_openmp=FLAGS.enable_openmp)
if FLAGS.run_target:
target_devices = DeviceManager.list_devices(FLAGS.device_yml)
if FLAGS.target_socs != TargetSOCTag.all and\
......
......@@ -59,6 +59,9 @@ class DeviceWrapper:
raise e
self.data_dir = DEVICE_DATA_DIR
self.interior_dir = self.data_dir + '/interior'
elif self.system == SystemType.host:
self.data_dir = DEVICE_DATA_DIR
self.interior_dir = self.data_dir + '/interior'
##################
# internal use #
......
......@@ -275,10 +275,11 @@ def bazel_build(target,
extra_args=""):
six.print_("* Build %s with ABI %s" % (target, abi))
if abi == "host":
toolchain = platform.system().lower()
bazel_args = (
"build",
"--config",
platform.system().lower(),
toolchain,
"--define",
"openmp=%s" % str(enable_openmp).lower(),
"--define",
......@@ -310,7 +311,10 @@ def bazel_build(target,
if debug_mode:
bazel_args += ("--config", "debug")
if not address_sanitizer and not debug_mode:
bazel_args += ("--config", "optimization")
if toolchain == "darwin" or toolchain == "ios":
bazel_args += ("--config", "optimization_darwin")
else:
bazel_args += ("--config", "optimization")
if symbol_hidden:
bazel_args += ("--config", "symbol_hidden")
if extra_args:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册