diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0e554491015218acb0593c8c8562d29c29c68bc4..d3cbcd4ce6cccc0703c95ac6bb17b8a84f1f2cf8 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -1,6 +1,7 @@ stages: - cpplint - pycodestyle + - platform_compitable_tests - ops_test - ops_benchmark - extra_tests @@ -34,3 +35,10 @@ extra_tests: - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi - python tools/bazel_adb_run.py --target="//mace/kernels:kernels_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS - python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS + +platform_compitable_tests: + stage: platform_compitable_tests + script: + - bazel build mace/core:core + + diff --git a/mace/core/BUILD b/mace/core/BUILD index 1b4cdaa8d51c054a9f8b55d5c7e4266273724f5d..41a3ec2d9e4e3b3286e26ff7d1ebfdaf22a24379 100644 --- a/mace/core/BUILD +++ b/mace/core/BUILD @@ -23,36 +23,41 @@ cc_library( [ "*.cc", "runtime/cpu/*.cc", - "runtime/opencl/*.cc", - "runtime/hexagon/*.cc", ], exclude = [ "*_test.cc", - "runtime/hexagon/hexagon_controller_dummy.cc", ], - ) + if_not_hexagon_enabled([ - "runtime/hexagon/hexagon_controller_dummy.cc", - ]) + if_hexagon_enabled([ + ) + if_android(glob( + [ + "runtime/opencl/*.cc", + ], + )) + if_hexagon_enabled(glob([ + "runtime/hexagon/*.cc", "runtime/hexagon/libhexagon_controller.so", - ]), + ])), hdrs = glob([ "*.h", "runtime/cpu/*.h", - "runtime/opencl/*.h", - "runtime/hexagon/*.h", - ]), - copts = if_openmp_enabled(["-fopenmp"]), + ]) + if_android(glob( + [ + "runtime/opencl/*.h", + ], + )) + if_hexagon_enabled(glob(["runtime/hexagon/*.h"])), + copts = if_openmp_enabled(["-fopenmp"]) + if_android([ + "-DMACE_ENABLE_OPENCL", + ]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), linkopts = ["-ldl"] + if_android([ "-pie", "-lm", ]), deps = [ + "//mace/utils", + ] + if_android([ ":opencl_headers", "//mace/codegen:generated_opencl", "//mace/codegen:generated_version", - "//mace/utils", "@half//:half", - ] + if_production_mode([ + ]) + if_production_mode([ "//mace/codegen:generated_tuning_params", "//mace/utils:utils_prod", ]) + if_not_production_mode([ diff --git a/mace/core/allocator.cc b/mace/core/allocator.cc index 567887301a6bb7ea5b5c16738fad79c454b91a60..53fd5851f22fe5d625e1121803c3713b6bee7892 100644 --- a/mace/core/allocator.cc +++ b/mace/core/allocator.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "mace/core/allocator.h" +#ifdef MACE_ENABLE_OPENCL #include "mace/core/runtime/opencl/opencl_allocator.h" +#endif namespace mace { @@ -33,7 +35,9 @@ Allocator *GetDeviceAllocator(DeviceType type) { MACE_REGISTER_ALLOCATOR(DeviceType::CPU, new CPUAllocator()); MACE_REGISTER_ALLOCATOR(DeviceType::NEON, new CPUAllocator()); +#ifdef MACE_ENABLE_OPENCL MACE_REGISTER_ALLOCATOR(DeviceType::OPENCL, new OpenCLAllocator()); +#endif MACE_REGISTER_ALLOCATOR(DeviceType::HEXAGON, new CPUAllocator()); } // namespace mace diff --git a/mace/core/allocator.h b/mace/core/allocator.h index 80466a9c1b79d936fe2c239e2872539fd828201c..c22ea47c738d551ce586c3fecb169f1f68a18e4d 100644 --- a/mace/core/allocator.h +++ b/mace/core/allocator.h @@ -15,10 +15,11 @@ #ifndef MACE_CORE_ALLOCATOR_H_ #define MACE_CORE_ALLOCATOR_H_ -#include +#include #include #include #include +#include #include "mace/core/registry.h" #include "mace/core/types.h" @@ -26,7 +27,9 @@ namespace mace { -#ifdef __ANDROID__ +#if defined(__hexagon__) +constexpr size_t kMaceAlignment = 128; +#elif defined(__ANDROID__) // 16 bytes = 128 bits = 32 * 4 (Neon) constexpr size_t kMaceAlignment = 16; #else @@ -67,7 +70,7 @@ class CPUAllocator : public Allocator { void *New(size_t nbytes) const override { VLOG(3) << "Allocate CPU buffer: " << nbytes; void *data = nullptr; -#ifdef __ANDROID__ +#if defined(__ANDROID__) || defined(__hexagon__) data = memalign(kMaceAlignment, nbytes); #else MACE_CHECK(posix_memalign(&data, kMaceAlignment, nbytes) == 0); diff --git a/mace/core/buffer.h b/mace/core/buffer.h index d209d52f4fea0b31370d9074705a8c865f000276..3c30e8d430bd637fd9489b7a2330a711c32c0f37 100644 --- a/mace/core/buffer.h +++ b/mace/core/buffer.h @@ -17,6 +17,7 @@ #include #include +#include #include #include "mace/core/allocator.h" diff --git a/mace/core/file_storage.cc b/mace/core/file_storage.cc index 9a3f812be64a6860e144e34aec00ef9ccaad2051..5f6f07774dc12b71e259343c93f2d5b3ac603d3e 100644 --- a/mace/core/file_storage.cc +++ b/mace/core/file_storage.cc @@ -23,7 +23,7 @@ #include #include #include - +#include #include "mace/utils/logging.h" diff --git a/mace/core/future.h b/mace/core/future.h index 47c461bf03b9ec4e27d4256b764455f26e4a17d5..4dc778ad5da83613dd033bd18159086c7bed39be 100644 --- a/mace/core/future.h +++ b/mace/core/future.h @@ -21,7 +21,7 @@ namespace mace { -class CallStats; +struct CallStats; // Wait the call to finish and get the stats if param is not nullptr struct StatsFuture { diff --git a/mace/core/mace.cc b/mace/core/mace.cc index 27d676d9b6f8cfeddb4e153f9fd82f46014f027b..76e243b138616b5dffc3cac8c7072a6bf3e18000 100644 --- a/mace/core/mace.cc +++ b/mace/core/mace.cc @@ -14,17 +14,23 @@ #include -#include "mace/core/file_storage.h" #include "mace/core/net.h" -#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" -#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/types.h" #include "mace/public/mace.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/opencl_runtime.h" +#endif // MACE_ENABLE_OPENCL + +#ifdef MACE_ENABLE_HEXAGON +#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h" +#endif // MACE_ENABLE_HEXAGON + namespace mace { // Mace Tensor -struct MaceTensor::Impl { +class MaceTensor::Impl { + public: std::vector shape; std::shared_ptr data; }; @@ -49,8 +55,8 @@ MaceTensor::MaceTensor(const MaceTensor &other) { MaceTensor::MaceTensor(const MaceTensor &&other) { impl_ = std::unique_ptr(new MaceTensor::Impl()); - impl_->shape = std::move(other.shape()); - impl_->data = std::move(other.data()); + impl_->shape = other.shape(); + impl_->data = other.data(); } MaceTensor &MaceTensor::operator=(const MaceTensor &other) { @@ -60,8 +66,8 @@ MaceTensor &MaceTensor::operator=(const MaceTensor &other) { } MaceTensor &MaceTensor::operator=(const MaceTensor &&other) { - impl_->shape = std::move(other.shape()); - impl_->data = std::move(other.data()); + impl_->shape = other.shape(); + impl_->data = other.data(); return *this; } @@ -91,7 +97,9 @@ class MaceEngine::Impl { DeviceType device_type_; std::unique_ptr ws_; std::unique_ptr net_; +#ifdef MACE_ENABLE_HEXAGON std::unique_ptr hexagon_controller_; +#endif DISABLE_COPY_AND_ASSIGN(Impl); }; @@ -103,8 +111,11 @@ MaceEngine::Impl::Impl(const NetDef *net_def, : op_registry_(new OperatorRegistry()), device_type_(device_type), ws_(new Workspace()), - net_(nullptr), - hexagon_controller_(nullptr) { + net_(nullptr) +#ifdef MACE_ENABLE_HEXAGON + , hexagon_controller_(nullptr) +#endif +{ LOG(INFO) << "MACE version: " << MaceVersion(); // Set storage path for internal usage for (auto input_name : input_nodes) { @@ -115,6 +126,7 @@ MaceEngine::Impl::Impl(const NetDef *net_def, ws_->CreateTensor(MakeString("mace_output_node_", output_name, ":0"), GetDeviceAllocator(device_type_), DT_FLOAT); } +#ifdef MACE_ENABLE_HEXAGON if (device_type == HEXAGON) { hexagon_controller_.reset(new HexagonControlWrapper()); MACE_CHECK(hexagon_controller_->Config(), "hexagon config error"); @@ -130,6 +142,7 @@ MaceEngine::Impl::Impl(const NetDef *net_def, hexagon_controller_->PrintGraph(); } } else { +#endif ws_->LoadModelTensor(*net_def, device_type); // Init model @@ -138,11 +151,14 @@ MaceEngine::Impl::Impl(const NetDef *net_def, if (!net->Run()) { LOG(FATAL) << "Net init run failed"; } - net_ = std::move(CreateNet(op_registry_, *net_def, ws_.get(), device_type)); + net_ = CreateNet(op_registry_, *net_def, ws_.get(), device_type); +#ifdef MACE_ENABLE_HEXAGON } +#endif } MaceEngine::Impl::~Impl() { +#ifdef MACE_ENABLE_HEXAGON if (device_type_ == HEXAGON) { if (VLOG_IS_ON(2)) { hexagon_controller_->GetPerfInfo(); @@ -151,6 +167,7 @@ MaceEngine::Impl::~Impl() { MACE_CHECK(hexagon_controller_->TeardownGraph(), "hexagon teardown error"); MACE_CHECK(hexagon_controller_->Finalize(), "hexagon finalize error"); } +#endif } MaceStatus MaceEngine::Impl::Run( @@ -177,18 +194,25 @@ MaceStatus MaceEngine::Impl::Run( ws_->GetTensor(MakeString("mace_output_node_", output.first + ":0")); output_tensors.push_back(output_tensor); } +#ifdef MACE_ENABLE_HEXAGON if (device_type_ == HEXAGON) { MACE_CHECK(input_tensors.size() == 1 && output_tensors.size() == 1, "HEXAGON not support multiple inputs and outputs yet."); hexagon_controller_->ExecuteGraph(*input_tensors[0], output_tensors[0]); } else { +#endif if (!net_->Run(run_metadata)) { LOG(FATAL) << "Net run failed"; } +#ifdef MACE_ENABLE_HEXAGON } +#endif + +#ifdef MACE_ENABLE_OPENCL if (device_type_ == OPENCL) { OpenCLRuntime::Global()->SaveBuiltCLProgram(); } +#endif for (auto &output : *outputs) { Tensor *output_tensor = ws_->GetTensor(MakeString("mace_output_node_", output.first + ":0")); diff --git a/mace/core/mace_runtime.cc b/mace/core/mace_runtime.cc index 4ce55861c6acba40e0a6f441c37ff4ca239d9477..8fe2b6c3b5222b7a9f68229349db040a58ea6b4d 100644 --- a/mace/core/mace_runtime.cc +++ b/mace/core/mace_runtime.cc @@ -13,38 +13,15 @@ // limitations under the License. #include "mace/public/mace_runtime.h" -#include "mace/core/runtime/cpu/cpu_runtime.h" -#include "mace/core/runtime/opencl/opencl_runtime.h" +#include "mace/utils/logging.h" namespace mace { std::shared_ptr kStorageFactory = nullptr; -void SetGPUHints(GPUPerfHint gpu_perf_hint, GPUPriorityHint gpu_priority_hint) { - VLOG(1) << "Set GPU configurations, gpu_perf_hint: " << gpu_perf_hint - << ", gpu_priority_hint: " << gpu_priority_hint; - OpenCLRuntime::Configure(gpu_perf_hint, gpu_priority_hint); -} - void SetKVStorageFactory(std::shared_ptr storage_factory) { VLOG(1) << "Set internal KV Storage Engine"; kStorageFactory = storage_factory; } -MaceStatus SetOpenMPThreadPolicy(int num_threads_hint, - CPUAffinityPolicy policy) { - VLOG(1) << "Set CPU openmp num_threads_hint: " << num_threads_hint - << ", affinity policy: " << policy; - return SetOpenMPThreadsAndAffinityPolicy(num_threads_hint, policy); -} - -void SetOpenMPThreadAffinity(int num_threads, const std::vector &cpu_ids) { - return SetOpenMPThreadsAndAffinityCPUs(num_threads, cpu_ids); -} - -MaceStatus GetBigLittleCoreIDs(std::vector *big_core_ids, - std::vector *little_core_ids) { - return GetCPUBigLittleCoreIDs(big_core_ids, little_core_ids); -} - }; // namespace mace diff --git a/mace/core/mace_types.cc b/mace/core/mace_types.cc index c356116197e20035871a3b4e7b8fac31605f6cae..a98b1d5e7a93efaa3b36474ae7e087ac91d6cf43 100644 --- a/mace/core/mace_types.cc +++ b/mace/core/mace_types.cc @@ -152,12 +152,12 @@ void OperatorDef::CopyFrom(const OperatorDef &from) { std::copy(from_output.begin(), from_output.end(), output_.begin()); auto from_arg = from.arg(); arg_.resize(from_arg.size()); - for (int i = 0; i < from_arg.size(); ++i) { + for (size_t i = 0; i < from_arg.size(); ++i) { arg_[i].CopyFrom(from_arg[i]); } auto from_output_shape = from.output_shape(); output_shape_.resize(from_output_shape.size()); - for (int i = 0; i < from_output_shape.size(); ++i) { + for (size_t i = 0; i < from_output_shape.size(); ++i) { output_shape_[i].CopyFrom(from_output_shape[i]); } auto from_data_type = from.output_type(); @@ -174,7 +174,7 @@ void OperatorDef::CopyFrom(const OperatorDef &from) { padding_ = from.padding(); auto from_node_input = from.node_input(); node_input_.resize(from_node_input.size()); - for (int i = 0; i < from_node_input.size(); ++i) { + for (size_t i = 0; i < from_node_input.size(); ++i) { node_input_[i].CopyFrom(from_node_input[i]); } auto from_out_max_byte_size = from.out_max_byte_size(); @@ -223,7 +223,7 @@ void OperatorDef::add_out_max_byte_size(int value) { } const std::vector &OperatorDef::input() const { return input_; } const std::string &OperatorDef::input(int index) const { - MACE_CHECK(0 <= index && index <= input_.size()); + MACE_CHECK(0 <= index && index <= static_cast(input_.size())); return input_[index]; } std::string *OperatorDef::add_input() { @@ -240,7 +240,7 @@ void OperatorDef::set_input(const std::vector &value) { } const std::vector &OperatorDef::output() const { return output_; } const std::string &OperatorDef::output(int index) const { - MACE_CHECK(0 <= index && index <= output_.size()); + MACE_CHECK(0 <= index && index <= static_cast(output_.size())); return output_[index]; } std::string *OperatorDef::add_output() { diff --git a/mace/core/net.cc b/mace/core/net.cc index 44c889b1e1480b1d4b50001bb3e4f1dc8b7c2835..72f186c53d297db51f2a246be40b646a0ba47c23 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -25,7 +25,7 @@ NetBase::NetBase(const std::shared_ptr op_registry, const std::shared_ptr net_def, Workspace *ws, DeviceType type) - : op_registry_(op_registry), name_(net_def->name()) {} + : name_(net_def->name()), op_registry_(op_registry) {} SerialNet::SerialNet(const std::shared_ptr op_registry, const std::shared_ptr net_def, diff --git a/mace/core/runtime/cpu/cpu_runtime.cc b/mace/core/runtime/cpu/cpu_runtime.cc index 4c4e58ff035c41d2921d6c9f58a29b5f90bf2f07..f55115d6b36a22b90a0fdc3db1f8431a15642345 100644 --- a/mace/core/runtime/cpu/cpu_runtime.cc +++ b/mace/core/runtime/cpu/cpu_runtime.cc @@ -16,12 +16,15 @@ #include #include +#include #include #include #include #include "mace/public/mace.h" +#include "mace/public/mace_runtime.h" #include "mace/utils/logging.h" + namespace mace { namespace { @@ -80,8 +83,11 @@ void SortCPUIdsByMaxFreqAsc(std::vector *cpu_ids, int *big_core_offset) { } void SetThreadAffinity(cpu_set_t mask) { - int sys_call_res; +#if defined(__ANDROID__) pid_t pid = gettid(); +#else + pid_t pid = pthread_self(); +#endif int err = sched_setaffinity(pid, sizeof(mask), &mask); MACE_CHECK(err == 0, "set affinity error: ", errno); } @@ -173,12 +179,28 @@ MaceStatus SetOpenMPThreadsAndAffinityPolicy(int omp_num_threads_hint, } if (omp_num_threads_hint <= 0 || - omp_num_threads_hint > use_cpu_ids.size()) { + omp_num_threads_hint > static_cast(use_cpu_ids.size())) { omp_num_threads_hint = use_cpu_ids.size(); } SetOpenMPThreadsAndAffinityCPUs(omp_num_threads_hint, use_cpu_ids); return MACE_SUCCESS; } +MaceStatus SetOpenMPThreadPolicy(int num_threads_hint, + CPUAffinityPolicy policy) { + VLOG(1) << "Set CPU openmp num_threads_hint: " << num_threads_hint + << ", affinity policy: " << policy; + return SetOpenMPThreadsAndAffinityPolicy(num_threads_hint, policy); +} + +void SetOpenMPThreadAffinity(int num_threads, const std::vector &cpu_ids) { + return SetOpenMPThreadsAndAffinityCPUs(num_threads, cpu_ids); +} + +MaceStatus GetBigLittleCoreIDs(std::vector *big_core_ids, + std::vector *little_core_ids) { + return GetCPUBigLittleCoreIDs(big_core_ids, little_core_ids); +} + } // namespace mace diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index 4314edbf394b713b02d7bb70daf779c36c0b0245..baec40b2dc9c3690f4e237788e8a8c081e8fa475 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -22,6 +22,7 @@ #include #include +#include "mace/public/mace_runtime.h" #include "mace/core/file_storage.h" #include "mace/core/runtime/opencl/opencl_extension.h" #include "mace/public/mace.h" @@ -35,6 +36,12 @@ extern const std::string kCompiledProgramPlatform; extern const std::map> kEncryptedProgramMap; +void SetGPUHints(GPUPerfHint gpu_perf_hint, GPUPriorityHint gpu_priority_hint) { + VLOG(1) << "Set GPU configurations, gpu_perf_hint: " << gpu_perf_hint + << ", gpu_priority_hint: " << gpu_priority_hint; + OpenCLRuntime::Configure(gpu_perf_hint, gpu_priority_hint); +} + const std::string OpenCLErrorToString(cl_int error) { switch (error) { case CL_SUCCESS: diff --git a/mace/core/tensor.h b/mace/core/tensor.h index fa383aa58e75fc0846d9e7eca5ad5a9feebaa6c7..0f0b2827cbca9948402e2413942b93f629e2c85d 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -21,8 +21,10 @@ #include "mace/core/buffer.h" #include "mace/core/preallocated_pooled_allocator.h" -#include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/types.h" +#ifdef MACE_ENABLE_OPENCL +#include "mace/core/runtime/opencl/cl2_header.h" +#endif #include "mace/public/mace.h" #include "mace/utils/logging.h" @@ -36,6 +38,7 @@ namespace mace { break; \ } +#ifdef MACE_ENABLE_OPENCL #define CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, INVALID, DEFAULT) \ switch (TYPE_ENUM) { \ CASE(half, SINGLE_ARG(STMTS)) \ @@ -56,6 +59,27 @@ namespace mace { DEFAULT; \ break; \ } +#else +#define CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, INVALID, DEFAULT) \ + switch (TYPE_ENUM) { \ + CASE(float, SINGLE_ARG(STMTS)) \ + CASE(double, SINGLE_ARG(STMTS)) \ + CASE(int32_t, SINGLE_ARG(STMTS)) \ + CASE(uint8_t, SINGLE_ARG(STMTS)) \ + CASE(uint16_t, SINGLE_ARG(STMTS)) \ + CASE(int16_t, SINGLE_ARG(STMTS)) \ + CASE(int8_t, SINGLE_ARG(STMTS)) \ + CASE(std::string, SINGLE_ARG(STMTS)) \ + CASE(int64_t, SINGLE_ARG(STMTS)) \ + CASE(bool, SINGLE_ARG(STMTS)) \ + case DT_INVALID: \ + INVALID; \ + break; \ + default: \ + DEFAULT; \ + break; \ + } +#endif #define CASES(TYPE_ENUM, STMTS) \ CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, LOG(FATAL) << "Type not set"; \ @@ -137,6 +161,7 @@ class Tensor { return buffer_ != nullptr && !buffer_->OnHost() && !has_opencl_image(); } +#ifdef MACE_ENABLE_OPENCL inline cl::Image *opencl_image() const { MACE_CHECK(has_opencl_image(), "do not have image"); return static_cast(buffer_->buffer()); @@ -146,6 +171,7 @@ class Tensor { MACE_CHECK(has_opencl_buffer(), "do not have opencl buffer"); return static_cast(buffer_->buffer()); } +#endif inline index_t buffer_offset() const { return buffer_->offset(); } diff --git a/mace/core/types.cc b/mace/core/types.cc index 9bab2dab2f8e5cbe8e5cf7f47bff699ff22a5587..44fd1d9271c1e5e62d94a7ff2de10e5322568124 100644 --- a/mace/core/types.cc +++ b/mace/core/types.cc @@ -40,11 +40,18 @@ bool DataTypeCanUseMemcpy(DataType dt) { std::string DataTypeToString(const DataType dt) { static std::map dtype_string_map = { - {DT_FLOAT, "DT_FLOAT"}, {DT_HALF, "DT_HALF"}, - {DT_DOUBLE, "DT_DOUBLE"}, {DT_UINT8, "DT_UINT8"}, - {DT_INT8, "DT_INT8"}, {DT_INT32, "DT_INT32"}, - {DT_UINT32, "DT_UINT32"}, {DT_UINT16, "DT_UINT16"}, - {DT_INT64, "DT_INT64"}, {DT_BOOL, "DT_BOOL"}, + {DT_FLOAT, "DT_FLOAT"}, +#ifdef MACE_ENABLE_OPENCL + {DT_HALF, "DT_HALF"}, +#endif + {DT_DOUBLE, "DT_DOUBLE"}, + {DT_UINT8, "DT_UINT8"}, + {DT_INT8, "DT_INT8"}, + {DT_INT32, "DT_INT32"}, + {DT_UINT32, "DT_UINT32"}, + {DT_UINT16, "DT_UINT16"}, + {DT_INT64, "DT_INT64"}, + {DT_BOOL, "DT_BOOL"}, {DT_STRING, "DT_STRING"}}; MACE_CHECK(dt != DT_INVALID) << "Not support Invalid data type"; return dtype_string_map[dt]; @@ -54,8 +61,10 @@ size_t GetEnumTypeSize(const DataType dt) { switch (dt) { case DT_FLOAT: return sizeof(float); +#ifdef MACE_ENABLE_OPENCL case DT_HALF: return sizeof(half); +#endif case DT_UINT8: return sizeof(uint8_t); case DT_INT8: diff --git a/mace/core/types.h b/mace/core/types.h index 7b63fa290c076fba23063ce13207f0b5429ceae6..c07c48297f046bb1f3624c36c523ab7c7797353b 100644 --- a/mace/core/types.h +++ b/mace/core/types.h @@ -19,13 +19,17 @@ #include #include "mace/public/mace_types.h" +#ifdef MACE_ENABLE_OPENCL #include "include/half.hpp" +#endif namespace mace { typedef int64_t index_t; +#ifdef MACE_ENABLE_OPENCL using half = half_float::half; +#endif bool DataTypeCanUseMemcpy(DataType dt); @@ -62,7 +66,9 @@ struct EnumToDataType {}; // Specializations below typedef TYPE Type; \ } +#ifdef MACE_ENABLE_OPENCL MATCH_TYPE_AND_ENUM(half, DT_HALF); +#endif MATCH_TYPE_AND_ENUM(float, DT_FLOAT); MATCH_TYPE_AND_ENUM(double, DT_DOUBLE); MATCH_TYPE_AND_ENUM(int32_t, DT_INT32); diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index 107ac8fa5e74a5258d7b7a674a676e66ebe3d0fb..0c681b14b70d2df9c81773652413b0a140513358 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -32,8 +32,7 @@ Tensor *Workspace::CreateTensor(const std::string &name, VLOG(3) << "Tensor " << name << " already exists. Skipping."; } else { VLOG(3) << "Creating Tensor " << name; - tensor_map_[name] = - std::move(std::unique_ptr(new Tensor(alloc, type))); + tensor_map_[name] = std::unique_ptr(new Tensor(alloc, type)); tensor_map_[name]->SetSourceOpName(name); } return GetTensor(name); @@ -82,12 +81,12 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { } VLOG(3) << "Model data size: " << model_data_size; - if (type == DeviceType::CPU) { - tensor_buffer_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(type), model_data_ptr, model_data_size))); + if (type == DeviceType::CPU || type == DeviceType::NEON) { + tensor_buffer_ = std::unique_ptr( + new Buffer(GetDeviceAllocator(type), model_data_ptr, model_data_size)); } else { - tensor_buffer_ = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(type), model_data_size))); + tensor_buffer_ = std::unique_ptr( + new Buffer(GetDeviceAllocator(type), model_data_size)); tensor_buffer_->Map(nullptr); tensor_buffer_->Copy(model_data_ptr, 0, model_data_size); tensor_buffer_->UnMap(); diff --git a/mace/kernels/BUILD b/mace/kernels/BUILD index 1551eb47e05ba4dac9face7e2e6f783f60448817..4eb4b8508409e3c1a57a28cb4a1f198409573334 100644 --- a/mace/kernels/BUILD +++ b/mace/kernels/BUILD @@ -7,7 +7,7 @@ package( licenses(["notice"]) # Apache 2.0 -load("//mace:mace.bzl", "if_android", "if_neon_enabled", "if_openmp_enabled", "if_android_armv7") +load("//mace:mace.bzl", "if_android", "if_neon_enabled", "if_openmp_enabled", "if_android_armv7", "if_hexagon_enabled") cc_library( name = "kernels", @@ -28,7 +28,9 @@ cc_library( "opencl/*.h", "arm/*.h", ]), - copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]), + copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ + "-DMACE_ENABLE_OPENCL", + ]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), linkopts = if_android(["-lm"]), deps = [ "//mace/core", @@ -46,7 +48,9 @@ cc_test( "opencl/*_test.cc", ], ), - copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]), + copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ + "-DMACE_ENABLE_OPENCL", + ]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), linkopts = ["-fopenmp"], linkstatic = 1, deps = [ @@ -55,15 +59,3 @@ cc_test( "@gtest//:gtest_main", ], ) - -cc_test( - name = "benchmark", - testonly = 1, - srcs = glob(["benchmark/*.cc"]), - linkstatic = 1, - deps = [ - ":kernels", - "//mace/core", - "//mace/core:test_benchmark_main", - ], -) diff --git a/mace/ops/BUILD b/mace/ops/BUILD index c7a9b95b40bc8588ebf9dbbae11e6433ea26defe..131beceb222f34accb201e731205cb8b03425718 100644 --- a/mace/ops/BUILD +++ b/mace/ops/BUILD @@ -7,7 +7,7 @@ package( licenses(["notice"]) # Apache 2.0 -load("//mace:mace.bzl", "if_android", "if_neon_enabled", "if_openmp_enabled", "if_android_armv7") +load("//mace:mace.bzl", "if_android", "if_neon_enabled", "if_openmp_enabled", "if_android_armv7", "if_hexagon_enabled") cc_library( name = "test", @@ -34,7 +34,9 @@ cc_library( ["*.h"], exclude = ["ops_test_util.h"], ), - copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]), + copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ + "-DMACE_ENABLE_OPENCL", + ]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), deps = [ "//mace/kernels", ], @@ -47,6 +49,9 @@ cc_test( srcs = glob( ["*_test.cc"], ), + copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ + "-DMACE_ENABLE_OPENCL", + ]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), linkopts = ["-fopenmp"], linkstatic = 1, deps = [ @@ -60,6 +65,9 @@ cc_test( name = "ops_benchmark", testonly = 1, srcs = glob(["*_benchmark.cc"]), + copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([ + "-DMACE_ENABLE_OPENCL", + ]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), linkopts = ["-fopenmp"], linkstatic = 1, deps = [ diff --git a/mace/public/mace_types.h b/mace/public/mace_types.h index 4a7fcc30bb58fb7e06aa2c3275142f2caebd15cb..48da1dd3bc6f9b063a09465d97ec461676882b0e 100644 --- a/mace/public/mace_types.h +++ b/mace/public/mace_types.h @@ -352,7 +352,8 @@ struct OperatorStats { CallStats stats; }; -struct RunMetadata { +class RunMetadata { + public: std::vector op_stats; }; diff --git a/mace/utils/env_time.h b/mace/utils/env_time.h index c670e83fe500267e18730418fdf2660f8f26d80e..d70124b585cacf174facfb94728515d2dc5f48ba 100644 --- a/mace/utils/env_time.h +++ b/mace/utils/env_time.h @@ -16,15 +16,22 @@ #define MACE_UTILS_ENV_TIME_H_ #include +#ifdef __hexagon__ +#include +#else #include -#include +#endif namespace mace { inline int64_t NowMicros() { +#ifdef __hexagon__ + return HAP_perf_get_time_us(); +#else struct timeval tv; gettimeofday(&tv, nullptr); return static_cast(tv.tv_sec) * 1000000 + tv.tv_usec; +#endif } } // namespace mace diff --git a/mace/utils/memory_logging.h b/mace/utils/memory_logging.h index a2f861fad4562832c3117374294ac4a99ecbec92..7111d7301352ae2f6208905e4e22af5321624ac0 100644 --- a/mace/utils/memory_logging.h +++ b/mace/utils/memory_logging.h @@ -15,13 +15,16 @@ #ifndef MACE_UTILS_MEMORY_LOGGING_H_ #define MACE_UTILS_MEMORY_LOGGING_H_ +#ifndef __hexagon__ #include +#endif #include #include "mace/utils/logging.h" namespace mace { +#ifdef MACE_ENABLE_MEMORY_LOGGING class MallinfoChangeLogger { public: explicit MallinfoChangeLogger(const std::string &name) : name_(name) { @@ -97,7 +100,6 @@ class MallinfoChangeLogger { } }; -#ifdef MACE_ENABLE_MEMORY_LOGGING #define MACE_MEMORY_LOGGING_GUARD() \ MallinfoChangeLogger mem_logger_##__line__(std::string(__FILE__) + ":" + \ std::string(__func__)); diff --git a/mace/utils/utils.h b/mace/utils/utils.h index fecf03ec64ce92d6f2aad208dd1f1fa4c678d7f1..9085006061762d8304303f4258eba503947b60ec 100644 --- a/mace/utils/utils.h +++ b/mace/utils/utils.h @@ -15,7 +15,6 @@ #ifndef MACE_UTILS_UTILS_H_ #define MACE_UTILS_UTILS_H_ -#include #include #include #include @@ -60,7 +59,7 @@ inline std::string ObfuscateString(const std::string &src, for (size_t i = 0; i < src.size(); i++) { dest[i] = src[i] ^ lookup_table[i % lookup_table.size()]; } - return std::move(dest); + return dest; } // ObfuscateString(ObfuscateString(str)) ==> str @@ -96,7 +95,7 @@ inline std::string ObfuscateSymbol(const std::string &src) { // There is no collision if it's true for every char at every position dest[i] = encode_dict[(idx + i + 31) % encode_dict.size()]; } - return std::move(dest); + return dest; } #ifdef MACE_OBFUSCATE_LITERALS