diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0e554491015218acb0593c8c8562d29c29c68bc4..d3cbcd4ce6cccc0703c95ac6bb17b8a84f1f2cf8 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,6 +1,7 @@ stages: - cpplint - pycodestyle + - platform_compitable_tests - ops_test - ops_benchmark - extra_tests @@ -34,3 +35,10 @@ extra_tests: - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi - python tools/bazel_adb_run.py --target="//mace/kernels:kernels_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS - python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS + +platform_compitable_tests: + stage: platform_compitable_tests + script: + - bazel build mace/core:core + + diff --git a/mace/core/BUILD b/mace/core/BUILD index 1b4cdaa8d51c054a9f8b55d5c7e4266273724f5d..41a3ec2d9e4e3b3286e26ff7d1ebfdaf22a24379 100644 --- a/mace/core/BUILD +++ b/mace/core/BUILD @@ -23,36 +23,41 @@ cc_library( [ "*.cc", "runtime/cpu/*.cc", - "runtime/opencl/*.cc", - "runtime/hexagon/*.cc", ], exclude = [ "*_test.cc", - "runtime/hexagon/hexagon_controller_dummy.cc", ], - ) + if_not_hexagon_enabled([ - "runtime/hexagon/hexagon_controller_dummy.cc", - ]) + if_hexagon_enabled([ + ) + if_android(glob( + [ + "runtime/opencl/*.cc", + ], + )) + if_hexagon_enabled(glob([ + "runtime/hexagon/*.cc", "runtime/hexagon/libhexagon_controller.so", - ]), + ])), hdrs = glob([ "*.h", "runtime/cpu/*.h", - "runtime/opencl/*.h", - "runtime/hexagon/*.h", - ]), - copts = if_openmp_enabled(["-fopenmp"]), + ]) + if_android(glob( + [ + "runtime/opencl/*.h", + ], + )) + if_hexagon_enabled(glob(["runtime/hexagon/*.h"])), + copts = if_openmp_enabled(["-fopenmp"]) + if_android([ + "-DMACE_ENABLE_OPENCL", + ]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), linkopts = ["-ldl"] + if_android([ "-pie", "-lm", ]), deps = [ + "//mace/utils", + ] + if_android([ ":opencl_headers", "//mace/codegen:generated_opencl", "//mace/codegen:generated_version", - "//mace/utils", "@half//:half", - ] + if_production_mode([ + ]) + if_production_mode([ "//mace/codegen:generated_tuning_params", "//mace/utils:utils_prod", ]) + if_not_production_mode([ diff --git a/mace/core/allocator.cc b/mace/core/allocator.cc index 8f65fbc56c7ddf235d5ac4006bfa0e904afbbce2..d077e58d41007cda4a5d1ea12642f913b2be9fae 100644 --- a/mace/core/allocator.cc +++ b/mace/core/allocator.cc @@ -3,7 +3,9 @@ // #include "mace/core/allocator.h" +#ifdef MACE_ENABLE_OPENCL #include "mace/core/runtime/opencl/opencl_allocator.h" +#endif namespace mace { @@ -23,7 +25,9 @@ Allocator *GetDeviceAllocator(DeviceType type) { MACE_REGISTER_ALLOCATOR(DeviceType::CPU, new CPUAllocator()); MACE_REGISTER_ALLOCATOR(DeviceType::NEON, new CPUAllocator()); +#ifdef MACE_ENABLE_OPENCL MACE_REGISTER_ALLOCATOR(DeviceType::OPENCL, new OpenCLAllocator()); +#endif MACE_REGISTER_ALLOCATOR(DeviceType::HEXAGON, new CPUAllocator()); } // namespace mace diff --git a/mace/core/allocator.h b/mace/core/allocator.h index a9f76fcab7d91d8c81a76d181f9515998fb2f72a..f9da64eae95db1845cdda346d67e23571b98393e 100644 --- a/mace/core/allocator.h +++ b/mace/core/allocator.h @@ -6,10 +6,11 @@ #ifndef MACE_CORE_ALLOCATOR_H_ #define MACE_CORE_ALLOCATOR_H_ -#include +#include #include #include #include +#include #include "mace/core/registry.h" #include "mace/core/types.h" @@ -17,7 +18,9 @@ namespace mace { -#ifdef __ANDROID__ +#if defined(__hexagon__) +constexpr size_t kMaceAlignment = 128; +#elif defined(__ANDROID__) // 16 bytes = 128 bits = 32 * 4 (Neon) constexpr size_t kMaceAlignment = 16; #else @@ -58,7 +61,7 @@ class CPUAllocator : public Allocator { void *New(size_t nbytes) const override { VLOG(3) << "Allocate CPU buffer: " << nbytes; void *data = nullptr; -#ifdef __ANDROID__ +#if defined(__ANDROID__) || defined(__hexagon__) data = memalign(kMaceAlignment, nbytes); #else MACE_CHECK(posix_memalign(&data, kMaceAlignment, nbytes) == 0); diff --git a/mace/core/buffer.h b/mace/core/buffer.h index 08cbf1a9562c69e2344d25bf8d977f1b2a94ffea..7c5de47ec550bcc64c66f3e9ff49422f94222524 100644 --- a/mace/core/buffer.h +++ b/mace/core/buffer.h @@ -7,6 +7,7 @@ #include #include +#include #include #include "mace/core/allocator.h" diff --git a/mace/core/file_storage.cc b/mace/core/file_storage.cc index de9a67fa73a197d8bfe73ec27652fff2f0afd85f..aac7ecbd82578f1651ce8a8d3d1715ae75f8ee87 100644 --- a/mace/core/file_storage.cc +++ b/mace/core/file_storage.cc @@ -13,7 +13,7 @@ #include #include #include - +#include #include "mace/utils/logging.h" diff --git a/mace/core/future.h b/mace/core/future.h index 887812b2be86efbd4643aed0a8594d1ab99e00dc..9bba829e8f3a2a85e9b145202aa825b036ebd128 100644 --- a/mace/core/future.h +++ b/mace/core/future.h @@ -11,7 +11,7 @@ namespace mace { -class CallStats; +struct CallStats; // Wait the call to finish and get the stats if param is not nullptr struct StatsFuture { diff --git a/mace/core/mace.cc b/mace/core/mace.cc index 5f7647ed4a0df7377f143191bf2bf42c74429dc6..0340f63392b0f79fa0f60da3ebd86ebfdc0410d8 100644 --- a/mace/core/mace.cc +++ b/mace/core/mace.cc @@ -4,17 +4,23 @@ #include -#include "mace/core/file_storage.h" #include "mace/core/net.h" -#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" -#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/types.h" #include "mace/public/mace.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/opencl_runtime.h" +#endif // MACE_ENABLE_OPENCL + +#ifdef MACE_ENABLE_HEXAGON +#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" +#endif // MACE_ENABLE_HEXAGON + namespace mace { // Mace Tensor -struct MaceTensor::Impl { +class MaceTensor::Impl { + public: std::vector shape; std::shared_ptr data; }; @@ -39,8 +45,8 @@ MaceTensor::MaceTensor(const MaceTensor &other) { MaceTensor::MaceTensor(const MaceTensor &&other) { impl_ = std::unique_ptr(new MaceTensor::Impl()); - impl_->shape = std::move(other.shape()); - impl_->data = std::move(other.data()); + impl_->shape = other.shape(); + impl_->data = other.data(); } MaceTensor &MaceTensor::operator=(const MaceTensor &other) { @@ -50,8 +56,8 @@ MaceTensor &MaceTensor::operator=(const MaceTensor &other) { } MaceTensor &MaceTensor::operator=(const MaceTensor &&other) { - impl_->shape = std::move(other.shape()); - impl_->data = std::move(other.data()); + impl_->shape = other.shape(); + impl_->data = other.data(); return *this; } @@ -81,7 +87,9 @@ class MaceEngine::Impl { DeviceType device_type_; std::unique_ptr ws_; std::unique_ptr net_; +#ifdef MACE_ENABLE_HEXAGON std::unique_ptr hexagon_controller_; +#endif DISABLE_COPY_AND_ASSIGN(Impl); }; @@ -93,8 +101,11 @@ MaceEngine::Impl::Impl(const NetDef *net_def, : op_registry_(new OperatorRegistry()), device_type_(device_type), ws_(new Workspace()), - net_(nullptr), - hexagon_controller_(nullptr) { + net_(nullptr) +#ifdef MACE_ENABLE_HEXAGON + , hexagon_controller_(nullptr) +#endif +{ LOG(INFO) << "MACE version: " << MaceVersion(); // Set storage path for internal usage for (auto input_name : input_nodes) { @@ -105,6 +116,7 @@ MaceEngine::Impl::Impl(const NetDef *net_def, ws_->CreateTensor(MakeString("mace_output_node_", output_name, ":0"), GetDeviceAllocator(device_type_), DT_FLOAT); } +#ifdef MACE_ENABLE_HEXAGON if (device_type == HEXAGON) { hexagon_controller_.reset(new HexagonControlWrapper()); MACE_CHECK(hexagon_controller_->Config(), "hexagon config error"); @@ -120,6 +132,7 @@ MaceEngine::Impl::Impl(const NetDef *net_def, hexagon_controller_->PrintGraph(); } } else { +#endif ws_->LoadModelTensor(*net_def, device_type); // Init model @@ -128,11 +141,14 @@ MaceEngine::Impl::Impl(const NetDef *net_def, if (!net->Run()) { LOG(FATAL) << "Net init run failed"; } - net_ = std::move(CreateNet(op_registry_, *net_def, ws_.get(), device_type)); + net_ = CreateNet(op_registry_, *net_def, ws_.get(), device_type); +#ifdef MACE_ENABLE_HEXAGON } +#endif } MaceEngine::Impl::~Impl() { +#ifdef MACE_ENABLE_HEXAGON if (device_type_ == HEXAGON) { if (VLOG_IS_ON(2)) { hexagon_controller_->GetPerfInfo(); @@ -141,6 +157,7 @@ MaceEngine::Impl::~Impl() { MACE_CHECK(hexagon_controller_->TeardownGraph(), "hexagon teardown error"); MACE_CHECK(hexagon_controller_->Finalize(), "hexagon finalize error"); } +#endif } MaceStatus MaceEngine::Impl::Run( @@ -167,18 +184,25 @@ MaceStatus MaceEngine::Impl::Run( ws_->GetTensor(MakeString("mace_output_node_", output.first + ":0")); output_tensors.push_back(output_tensor); } +#ifdef MACE_ENABLE_HEXAGON if (device_type_ == HEXAGON) { MACE_CHECK(input_tensors.size() == 1 && output_tensors.size() == 1, "HEXAGON not support multiple inputs and outputs yet."); hexagon_controller_->ExecuteGraph(*input_tensors[0], output_tensors[0]); } else { +#endif if (!net_->Run(run_metadata)) { LOG(FATAL) << "Net run failed"; } +#ifdef MACE_ENABLE_HEXAGON } +#endif + +#ifdef MACE_ENABLE_OPENCL if (device_type_ == OPENCL) { OpenCLRuntime::Global()->SaveBuiltCLProgram(); } +#endif for (auto &output : *outputs) { Tensor *output_tensor = ws_->GetTensor(MakeString("mace_output_node_", output.first + ":0")); diff --git a/mace/core/mace_runtime.cc b/mace/core/mace_runtime.cc index 4e7c835cd5056ac837ea6834df884e0b360c3557..ba4eb6499ca15f42ec0515800723ac63367819ee 100644 --- a/mace/core/mace_runtime.cc +++ b/mace/core/mace_runtime.cc @@ -3,38 +3,15 @@ // #include "mace/public/mace_runtime.h" -#include "mace/core/runtime/cpu/cpu_runtime.h" -#include "mace/core/runtime/opencl/opencl_runtime.h" +#include "mace/utils/logging.h" namespace mace { std::shared_ptr kStorageFactory = nullptr; -void SetGPUHints(GPUPerfHint gpu_perf_hint, GPUPriorityHint gpu_priority_hint) { - VLOG(1) << "Set GPU configurations, gpu_perf_hint: " << gpu_perf_hint - << ", gpu_priority_hint: " << gpu_priority_hint; - OpenCLRuntime::Configure(gpu_perf_hint, gpu_priority_hint); -} - void SetKVStorageFactory(std::shared_ptr storage_factory) { VLOG(1) << "Set internal KV Storage Engine"; kStorageFactory = storage_factory; } -MaceStatus SetOpenMPThreadPolicy(int num_threads_hint, - CPUAffinityPolicy policy) { - VLOG(1) << "Set CPU openmp num_threads_hint: " << num_threads_hint - << ", affinity policy: " << policy; - return SetOpenMPThreadsAndAffinityPolicy(num_threads_hint, policy); -} - -void SetOpenMPThreadAffinity(int num_threads, const std::vector &cpu_ids) { - return SetOpenMPThreadsAndAffinityCPUs(num_threads, cpu_ids); -} - -MaceStatus GetBigLittleCoreIDs(std::vector *big_core_ids, - std::vector *little_core_ids) { - return GetCPUBigLittleCoreIDs(big_core_ids, little_core_ids); -} - }; // namespace mace diff --git a/mace/core/mace_types.cc b/mace/core/mace_types.cc index 04f968e6a8a9066c4d76fcf862eb05b40a7746cf..a6e0fddcc691cf05ee68a7c2d7d82660f9f70c5c 100644 --- a/mace/core/mace_types.cc +++ b/mace/core/mace_types.cc @@ -142,12 +142,12 @@ void OperatorDef::CopyFrom(const OperatorDef &from) { std::copy(from_output.begin(), from_output.end(), output_.begin()); auto from_arg = from.arg(); arg_.resize(from_arg.size()); - for (int i = 0; i < from_arg.size(); ++i) { + for (size_t i = 0; i < from_arg.size(); ++i) { arg_[i].CopyFrom(from_arg[i]); } auto from_output_shape = from.output_shape(); output_shape_.resize(from_output_shape.size()); - for (int i = 0; i < from_output_shape.size(); ++i) { + for (size_t i = 0; i < from_output_shape.size(); ++i) { output_shape_[i].CopyFrom(from_output_shape[i]); } auto from_data_type = from.output_type(); @@ -164,7 +164,7 @@ void OperatorDef::CopyFrom(const OperatorDef &from) { padding_ = from.padding(); auto from_node_input = from.node_input(); node_input_.resize(from_node_input.size()); - for (int i = 0; i < from_node_input.size(); ++i) { + for (size_t i = 0; i < from_node_input.size(); ++i) { node_input_[i].CopyFrom(from_node_input[i]); } auto from_out_max_byte_size = from.out_max_byte_size(); @@ -213,7 +213,7 @@ void OperatorDef::add_out_max_byte_size(int value) { } const std::vector &OperatorDef::input() const { return input_; } const std::string &OperatorDef::input(int index) const { - MACE_CHECK(0 <= index && index <= input_.size()); + MACE_CHECK(0 <= index && index <= static_cast(input_.size())); return input_[index]; } std::string *OperatorDef::add_input() { @@ -230,7 +230,7 @@ void OperatorDef::set_input(const std::vector &value) { } const std::vector &OperatorDef::output() const { return output_; } const std::string &OperatorDef::output(int index) const { - MACE_CHECK(0 <= index && index <= output_.size()); + MACE_CHECK(0 <= index && index <= static_cast(output_.size())); return output_[index]; } std::string *OperatorDef::add_output() { diff --git a/mace/core/net.cc b/mace/core/net.cc index 7563ed5c61cf10641baffabf7f19718a53a63f57..561fae465696cee452d24ea821946461a2b42654 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -15,7 +15,7 @@ NetBase::NetBase(const std::shared_ptr op_registry, const std::shared_ptr net_def, Workspace *ws, DeviceType type) - : op_registry_(op_registry), name_(net_def->name()) {} + : name_(net_def->name()), op_registry_(op_registry) {} SerialNet::SerialNet(const std::shared_ptr op_registry, const std::shared_ptr net_def, diff --git a/mace/core/runtime/cpu/cpu_runtime.cc b/mace/core/runtime/cpu/cpu_runtime.cc index 2a9984adce819923fbd9069bd69daaa9e3ef108c..56198bffef3a55d9ca0b72a042dd69aae6af69d5 100644 --- a/mace/core/runtime/cpu/cpu_runtime.cc +++ b/mace/core/runtime/cpu/cpu_runtime.cc @@ -6,12 +6,15 @@ #include #include +#include #include #include #include #include "mace/public/mace.h" +#include "mace/public/mace_runtime.h" #include "mace/utils/logging.h" + namespace mace { namespace { @@ -70,8 +73,11 @@ void SortCPUIdsByMaxFreqAsc(std::vector *cpu_ids, int *big_core_offset) { } void SetThreadAffinity(cpu_set_t mask) { - int sys_call_res; +#if defined(__ANDROID__) pid_t pid = gettid(); +#else + pid_t pid = pthread_self(); +#endif int err = sched_setaffinity(pid, sizeof(mask), &mask); MACE_CHECK(err == 0, "set affinity error: ", errno); } @@ -163,12 +169,28 @@ MaceStatus SetOpenMPThreadsAndAffinityPolicy(int omp_num_threads_hint, } if (omp_num_threads_hint <= 0 || - omp_num_threads_hint > use_cpu_ids.size()) { + omp_num_threads_hint > static_cast(use_cpu_ids.size())) { omp_num_threads_hint = use_cpu_ids.size(); } SetOpenMPThreadsAndAffinityCPUs(omp_num_threads_hint, use_cpu_ids); return MACE_SUCCESS; } +MaceStatus SetOpenMPThreadPolicy(int num_threads_hint, + CPUAffinityPolicy policy) { + VLOG(1) << "Set CPU openmp num_threads_hint: " << num_threads_hint + << ", affinity policy: " << policy; + return SetOpenMPThreadsAndAffinityPolicy(num_threads_hint, policy); +} + +void SetOpenMPThreadAffinity(int num_threads, const std::vector &cpu_ids) { + return SetOpenMPThreadsAndAffinityCPUs(num_threads, cpu_ids); +} + +MaceStatus GetBigLittleCoreIDs(std::vector *big_core_ids, + std::vector *little_core_ids) { + return GetCPUBigLittleCoreIDs(big_core_ids, little_core_ids); +} + } // namespace mace diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index fda211d4860a32fc3bec56152b06eefb6b3a386f..992223c4e0a11f082c98e4174707011ab246fd22 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -12,6 +12,7 @@ #include #include +#include "mace/public/mace_runtime.h" #include "mace/core/file_storage.h" #include "mace/core/runtime/opencl/opencl_extension.h" #include "mace/public/mace.h" @@ -25,6 +26,12 @@ extern const std::string kCompiledProgramPlatform; extern const std::map> kEncryptedProgramMap; +void SetGPUHints(GPUPerfHint gpu_perf_hint, GPUPriorityHint gpu_priority_hint) { + VLOG(1) << "Set GPU configurations, gpu_perf_hint: " << gpu_perf_hint + << ", gpu_priority_hint: " << gpu_priority_hint; + OpenCLRuntime::Configure(gpu_perf_hint, gpu_priority_hint); +} + const std::string OpenCLErrorToString(cl_int error) { switch (error) { case CL_SUCCESS: diff --git a/mace/core/tensor.h b/mace/core/tensor.h index 29571d96140c436eb71bfe64128842e297899ee8..f3debc501d10ecf7ded7944c504eb6deaf9e02c8 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -11,8 +11,10 @@ #include "mace/core/buffer.h" #include "mace/core/preallocated_pooled_allocator.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/types.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif #include "mace/public/mace.h" #include "mace/utils/logging.h" @@ -26,6 +28,7 @@ namespace mace { break; \ } +#ifdef MACE_ENABLE_OPENCL #define CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, INVALID, DEFAULT) \ switch (TYPE_ENUM) { \ CASE(half, SINGLE_ARG(STMTS)) \ @@ -46,6 +49,27 @@ namespace mace { DEFAULT; \ break; \ } +#else +#define CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, INVALID, DEFAULT) \ + switch (TYPE_ENUM) { \ + CASE(float, SINGLE_ARG(STMTS)) \ + CASE(double, SINGLE_ARG(STMTS)) \ + CASE(int32_t, SINGLE_ARG(STMTS)) \ + CASE(uint8_t, SINGLE_ARG(STMTS)) \ + CASE(uint16_t, SINGLE_ARG(STMTS)) \ + CASE(int16_t, SINGLE_ARG(STMTS)) \ + CASE(int8_t, SINGLE_ARG(STMTS)) \ + CASE(std::string, SINGLE_ARG(STMTS)) \ + CASE(int64_t, SINGLE_ARG(STMTS)) \ + CASE(bool, SINGLE_ARG(STMTS)) \ + case DT_INVALID: \ + INVALID; \ + break; \ + default: \ + DEFAULT; \ + break; \ + } +#endif #define CASES(TYPE_ENUM, STMTS) \ CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, LOG(FATAL) << "Type not set"; \ @@ -127,6 +151,7 @@ class Tensor { return buffer_ != nullptr && !buffer_->OnHost() && !has_opencl_image(); } +#ifdef MACE_ENABLE_OPENCL inline cl::Image *opencl_image() const { MACE_CHECK(has_opencl_image(), "do not have image"); return static_cast(buffer_->buffer()); @@ -136,6 +161,7 @@ class Tensor { MACE_CHECK(has_opencl_buffer(), "do not have opencl buffer"); return static_cast(buffer_->buffer()); } +#endif inline index_t buffer_offset() const { return buffer_->offset(); } diff --git a/mace/core/types.cc b/mace/core/types.cc index ef0a1755110bf47cc231b35c5c0a60e9563e9b66..d11325f6846b3e7af7cf0c69012d209b9bef5c3e 100644 --- a/mace/core/types.cc +++ b/mace/core/types.cc @@ -30,11 +30,18 @@ bool DataTypeCanUseMemcpy(DataType dt) { std::string DataTypeToString(const DataType dt) { static std::map dtype_string_map = { - {DT_FLOAT, "DT_FLOAT"}, {DT_HALF, "DT_HALF"}, - {DT_DOUBLE, "DT_DOUBLE"}, {DT_UINT8, "DT_UINT8"}, - {DT_INT8, "DT_INT8"}, {DT_INT32, "DT_INT32"}, - {DT_UINT32, "DT_UINT32"}, {DT_UINT16, "DT_UINT16"}, - {DT_INT64, "DT_INT64"}, {DT_BOOL, "DT_BOOL"}, + {DT_FLOAT, "DT_FLOAT"}, +#ifdef MACE_ENABLE_OPENCL + {DT_HALF, "DT_HALF"}, +#endif + {DT_DOUBLE, "DT_DOUBLE"}, + {DT_UINT8, "DT_UINT8"}, + {DT_INT8, "DT_INT8"}, + {DT_INT32, "DT_INT32"}, + {DT_UINT32, "DT_UINT32"}, + {DT_UINT16, "DT_UINT16"}, + {DT_INT64, "DT_INT64"}, + {DT_BOOL, "DT_BOOL"}, {DT_STRING, "DT_STRING"}}; MACE_CHECK(dt != DT_INVALID) << "Not support Invalid data type"; return dtype_string_map[dt]; @@ -44,8 +51,10 @@ size_t GetEnumTypeSize(const DataType dt) { switch (dt) { case DT_FLOAT: return sizeof(float); +#ifdef MACE_ENABLE_OPENCL case DT_HALF: return sizeof(half); +#endif case DT_UINT8: return sizeof(uint8_t); case DT_INT8: diff --git a/mace/core/types.h b/mace/core/types.h index f038d5be034a3e9140bea2cbcdcde174c9f9918b..b7f431682ca0906b678f8918d3a1bb0c6f6f93e5 100644 --- a/mace/core/types.h +++ b/mace/core/types.h @@ -9,13 +9,17 @@ #include #include "mace/public/mace_types.h" +#ifdef MACE_ENABLE_OPENCL #include "include/half.hpp" +#endif namespace mace { typedef int64_t index_t; +#ifdef MACE_ENABLE_OPENCL using half = half_float::half; +#endif bool DataTypeCanUseMemcpy(DataType dt); @@ -52,7 +56,9 @@ struct EnumToDataType {}; // Specializations below typedef TYPE Type; \ } +#ifdef MACE_ENABLE_OPENCL MATCH_TYPE_AND_ENUM(half, DT_HALF); +#endif MATCH_TYPE_AND_ENUM(float, DT_FLOAT); MATCH_TYPE_AND_ENUM(double, DT_DOUBLE); MATCH_TYPE_AND_ENUM(int32_t, DT_INT32); diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index d068cbd8920ab2b155f05f07ea776925c2d75813..1a0e8236afee8292839ef612df0acee8787ce4f4 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -22,8 +22,7 @@ Tensor *Workspace::CreateTensor(const std::string &name, VLOG(3) << "Tensor " << name << " already exists. Skipping."; } else { VLOG(3) << "Creating Tensor " << name; - tensor_map_[name] = - std::move(std::unique_ptr(new Tensor(alloc, type))); + tensor_map_[name] = std::unique_ptr(new Tensor(alloc, type)); tensor_map_[name]->SetSourceOpName(name); } return GetTensor(name); @@ -72,12 +71,12 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { } VLOG(3) << "Model data size: " << model_data_size; - if (type == DeviceType::CPU) { - tensor_buffer_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(type), model_data_ptr, model_data_size))); + if (type == DeviceType::CPU || type == DeviceType::NEON) { + tensor_buffer_ = std::unique_ptr( + new Buffer(GetDeviceAllocator(type), model_data_ptr, model_data_size)); } else { - tensor_buffer_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(type), model_data_size))); + tensor_buffer_ = std::unique_ptr( + new Buffer(GetDeviceAllocator(type), model_data_size)); tensor_buffer_->Map(nullptr); tensor_buffer_->Copy(model_data_ptr, 0, model_data_size); tensor_buffer_->UnMap(); diff --git a/mace/kernels/BUILD b/mace/kernels/BUILD index 1551eb47e05ba4dac9face7e2e6f783f60448817..4eb4b8508409e3c1a57a28cb4a1f198409573334 100644 --- a/mace/kernels/BUILD +++ b/mace/kernels/BUILD @@ -7,7 +7,7 @@ package( licenses(["notice"]) # Apache 2.0 -load("//mace:mace.bzl", "if_android", "if_neon_enabled", "if_openmp_enabled", "if_android_armv7") +load("//mace:mace.bzl", "if_android", "if_neon_enabled", "if_openmp_enabled", "if_android_armv7", "if_hexagon_enabled") cc_library( name = "kernels", @@ -28,7 +28,9 @@ cc_library( "opencl/*.h", "arm/*.h", ]), - copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]), + copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ + "-DMACE_ENABLE_OPENCL", + ]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), linkopts = if_android(["-lm"]), deps = [ "//mace/core", @@ -46,7 +48,9 @@ cc_test( "opencl/*_test.cc", ], ), - copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]), + copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ + "-DMACE_ENABLE_OPENCL", + ]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), linkopts = ["-fopenmp"], linkstatic = 1, deps = [ @@ -55,15 +59,3 @@ cc_test( "@gtest//:gtest_main", ], ) - -cc_test( - name = "benchmark", - testonly = 1, - srcs = glob(["benchmark/*.cc"]), - linkstatic = 1, - deps = [ - ":kernels", - "//mace/core", - "//mace/core:test_benchmark_main", - ], -) diff --git a/mace/ops/BUILD b/mace/ops/BUILD index c7a9b95b40bc8588ebf9dbbae11e6433ea26defe..131beceb222f34accb201e731205cb8b03425718 100644 --- a/mace/ops/BUILD +++ b/mace/ops/BUILD @@ -7,7 +7,7 @@ package( licenses(["notice"]) # Apache 2.0 -load("//mace:mace.bzl", "if_android", "if_neon_enabled", "if_openmp_enabled", "if_android_armv7") +load("//mace:mace.bzl", "if_android", "if_neon_enabled", "if_openmp_enabled", "if_android_armv7", "if_hexagon_enabled") cc_library( name = "test", @@ -34,7 +34,9 @@ cc_library( ["*.h"], exclude = ["ops_test_util.h"], ), - copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]), + copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ + "-DMACE_ENABLE_OPENCL", + ]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), deps = [ "//mace/kernels", ], @@ -47,6 +49,9 @@ cc_test( srcs = glob( ["*_test.cc"], ), + copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ + "-DMACE_ENABLE_OPENCL", + ]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), linkopts = ["-fopenmp"], linkstatic = 1, deps = [ @@ -60,6 +65,9 @@ cc_test( name = "ops_benchmark", testonly = 1, srcs = glob(["*_benchmark.cc"]), + copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ + "-DMACE_ENABLE_OPENCL", + ]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), linkopts = ["-fopenmp"], linkstatic = 1, deps = [ diff --git a/mace/public/mace_types.h b/mace/public/mace_types.h index daa3441de765f23821a31674f786c4ab7823c94c..bd1a2c822eacb67fddbac10a514aff901dbd20e3 100644 --- a/mace/public/mace_types.h +++ b/mace/public/mace_types.h @@ -342,7 +342,8 @@ struct OperatorStats { CallStats stats; }; -struct RunMetadata { +class RunMetadata { + public: std::vector op_stats; }; diff --git a/mace/utils/env_time.h b/mace/utils/env_time.h index 0fecab568e4a48ca138802e65d59142538946167..3bee2873495346516a4d50c76f2554bfcaa38359 100644 --- a/mace/utils/env_time.h +++ b/mace/utils/env_time.h @@ -6,15 +6,22 @@ #define MACE_UTILS_ENV_TIME_H_ #include +#ifdef __hexagon__ +#include +#else #include -#include +#endif namespace mace { inline int64_t NowMicros() { +#ifdef __hexagon__ + return HAP_perf_get_time_us(); +#else struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; +#endif } } // namespace mace diff --git a/mace/utils/memory_logging.h b/mace/utils/memory_logging.h index bd46f58049d13ed6b7cbda455ad7c883a7e33f88..0c149091db39d3f909a4dbd00e73b91d1fbf4366 100644 --- a/mace/utils/memory_logging.h +++ b/mace/utils/memory_logging.h @@ -5,13 +5,16 @@ #ifndef MACE_UTILS_MEMORY_LOGGING_H_ #define MACE_UTILS_MEMORY_LOGGING_H_ +#ifndef __hexagon__ #include +#endif #include #include "mace/utils/logging.h" namespace mace { +#ifdef MACE_ENABLE_MEMORY_LOGGING class MallinfoChangeLogger { public: explicit MallinfoChangeLogger(const std::string &name) : name_(name) { @@ -87,7 +90,6 @@ class MallinfoChangeLogger { } }; -#ifdef MACE_ENABLE_MEMORY_LOGGING #define MACE_MEMORY_LOGGING_GUARD() \ MallinfoChangeLogger mem_logger_##__line__(std::string(__FILE__) + ":" + \ std::string(__func__)); diff --git a/mace/utils/utils.h b/mace/utils/utils.h index a9ca2a40aae252c10ce96d5968987b68af9894ea..184cb5aa737d904b0ead721200794bbecd64eb07 100644 --- a/mace/utils/utils.h +++ b/mace/utils/utils.h @@ -5,7 +5,6 @@ #ifndef MACE_UTILS_UTILS_H_ #define MACE_UTILS_UTILS_H_ -#include #include #include #include @@ -50,7 +49,7 @@ inline std::string ObfuscateString(const std::string &src, for (size_t i = 0; i < src.size(); i++) { dest[i] = src[i] ^ lookup_table[i % lookup_table.size()]; } - return std::move(dest); + return dest; } // ObfuscateString(ObfuscateString(str)) ==> str @@ -86,7 +85,7 @@ inline std::string ObfuscateSymbol(const std::string &src) { // There is no collision if it's true for every char at every position dest[i] = encode_dict[(idx + i + 31) % encode_dict.size()]; } - return std::move(dest); + return dest; } #ifdef MACE_OBFUSCATE_LITERALS