From 840aa2d0c077acaf003b5dd50a8a3c402904560e Mon Sep 17 00:00:00 2001 From: Bin Li Date: Mon, 17 Sep 2018 15:13:01 +0800 Subject: [PATCH] Refactor CPURuntime --- mace/core/BUILD | 5 +- mace/core/device.cc | 8 ++- mace/core/device.h | 4 +- mace/core/runtime/cpu/cpu_runtime.cc | 27 ++++----- mace/core/runtime/cpu/cpu_runtime.h | 46 +++++++++++---- mace/core/runtime/opencl/gpu_device.cc | 6 +- mace/core/runtime/opencl/gpu_device.h | 4 +- mace/core/testing/test_benchmark_main.cc | 7 +-- mace/kernels/conv_2d.h | 5 +- mace/kernels/fully_connected.h | 5 +- mace/kernels/gemmlowp_util.h | 2 - mace/kernels/matmul.h | 5 +- mace/kernels/matmul_benchmark.cc | 22 ++++--- mace/libmace/mace.cc | 74 +++++++++--------------- mace/ops/ops_test_util.cc | 19 ++++-- mace/ops/ops_test_util.h | 12 ++-- mace/public/mace.h | 43 +------------- 17 files changed, 143 insertions(+), 151 deletions(-) diff --git a/mace/core/BUILD b/mace/core/BUILD index bacde19a..8b97ed91 100644 --- a/mace/core/BUILD +++ b/mace/core/BUILD @@ -104,10 +104,13 @@ cc_library( "-Werror", "-Wextra", "-Wno-missing-field-initializers", - ], + ] + if_opencl_enabled([ + "-DMACE_ENABLE_OPENCL", + ]), deps = [ ":core", "//external:gflags_nothreads", + "//mace/ops:test", "//mace/utils", ], ) diff --git a/mace/core/device.cc b/mace/core/device.cc index 09f5a068..aa0d1663 100644 --- a/mace/core/device.cc +++ b/mace/core/device.cc @@ -16,8 +16,12 @@ namespace mace { -CPUDevice::CPUDevice(const int num_threads) - : cpu_runtime_(new CPURuntime(num_threads)) {} +CPUDevice::CPUDevice(const int num_threads, + const CPUAffinityPolicy policy, + const bool use_gemmlowp) + : cpu_runtime_(new CPURuntime(num_threads, + policy, + use_gemmlowp)) {} CPUDevice::~CPUDevice() = default; diff --git a/mace/core/device.h b/mace/core/device.h index 7336d79f..ec1c7e6a 100644 --- a/mace/core/device.h +++ b/mace/core/device.h @@ -41,7 +41,9 @@ class Device { class CPUDevice : public Device { public: - explicit CPUDevice(const int num_threads); + CPUDevice(const int num_threads, + const CPUAffinityPolicy policy, + const bool use_gemmlowp); virtual ~CPUDevice(); #ifdef MACE_ENABLE_OPENCL diff --git a/mace/core/runtime/cpu/cpu_runtime.cc b/mace/core/runtime/cpu/cpu_runtime.cc index 5e76e499..ac8a3582 100644 --- a/mace/core/runtime/cpu/cpu_runtime.cc +++ b/mace/core/runtime/cpu/cpu_runtime.cc @@ -27,7 +27,6 @@ #include #include -#include "public/gemmlowp.h" #include "mace/core/macros.h" #include "mace/public/mace.h" #include "mace/utils/logging.h" @@ -92,13 +91,6 @@ MaceStatus SetThreadAffinity(cpu_set_t mask) { } } -} // namespace - -gemmlowp::GemmContext& GetGemmlowpContext() { - static auto *gemm_context = new gemmlowp::GemmContext; - return *gemm_context; -} - MaceStatus GetCPUBigLittleCoreIDs(std::vector *big_core_ids, std::vector *little_core_ids) { MACE_CHECK_NOTNULL(big_core_ids); @@ -174,13 +166,15 @@ MaceStatus SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads, #endif } -MaceStatus SetOpenMPThreadsAndAffinityPolicy(int omp_num_threads_hint, - CPUAffinityPolicy policy, - bool use_gemmlowp) { +} // namespace + +MaceStatus CPURuntime::SetOpenMPThreadsAndAffinityPolicy( + int omp_num_threads_hint, + CPUAffinityPolicy policy, + gemmlowp::GemmContext *gemm_context) { if (policy == CPUAffinityPolicy::AFFINITY_NONE) { - if (use_gemmlowp) { - gemmlowp::GemmContext& gemm_context = GetGemmlowpContext(); - gemm_context.set_max_num_threads(std::max(0, omp_num_threads_hint)); + if (gemm_context) { + gemm_context->set_max_num_threads(std::max(0, omp_num_threads_hint)); } #ifdef MACE_ENABLE_OPENMP if (omp_num_threads_hint > 0) { @@ -211,9 +205,8 @@ MaceStatus SetOpenMPThreadsAndAffinityPolicy(int omp_num_threads_hint, omp_num_threads_hint = use_cpu_ids.size(); } - if (use_gemmlowp) { - gemmlowp::GemmContext& gemm_context = GetGemmlowpContext(); - gemm_context.set_max_num_threads(omp_num_threads_hint); + if (gemm_context) { + gemm_context->set_max_num_threads(omp_num_threads_hint); } return SetOpenMPThreadsAndAffinityCPUs(omp_num_threads_hint, use_cpu_ids); diff --git a/mace/core/runtime/cpu/cpu_runtime.h b/mace/core/runtime/cpu/cpu_runtime.h index 83d397ee..4b0f796b 100644 --- a/mace/core/runtime/cpu/cpu_runtime.h +++ b/mace/core/runtime/cpu/cpu_runtime.h @@ -15,33 +15,55 @@ #ifndef MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H_ #define MACE_CORE_RUNTIME_CPU_CPU_RUNTIME_H_ +#include #include +#include "public/gemmlowp.h" #include "mace/public/mace.h" +#include "mace/utils/logging.h" namespace mace { extern int MaceOpenMPThreadCount; -MaceStatus GetCPUBigLittleCoreIDs(std::vector *big_core_ids, - std::vector *little_core_ids); - -MaceStatus SetOpenMPThreadsAndAffinityCPUs(int omp_num_threads, - const std::vector &cpu_ids); - -MaceStatus SetOpenMPThreadsAndAffinityPolicy(int omp_num_threads_hint, - CPUAffinityPolicy policy, - bool use_gemmlowp = false); - class CPURuntime { public: - explicit CPURuntime(const int num_threads) : num_threads_(num_threads) {} + CPURuntime(const int num_threads, + CPUAffinityPolicy policy, + bool use_gemmlowp) + : num_threads_(num_threads), + policy_(policy), + gemm_context_(nullptr) { + if (use_gemmlowp) { + MACE_CHECK_NOTNULL(GetGemmlowpContext()); + } + + SetOpenMPThreadsAndAffinityPolicy(num_threads_, + policy_, + gemm_context_.get()); + } ~CPURuntime() = default; - inline int num_threads() const { + + gemmlowp::GemmContext *GetGemmlowpContext() { + if (!gemm_context_) { + gemm_context_.reset(new gemmlowp::GemmContext()); + } + return gemm_context_.get(); + } + + int num_threads() const { return num_threads_; } + private: + MaceStatus SetOpenMPThreadsAndAffinityPolicy( + int omp_num_threads_hint, + CPUAffinityPolicy policy, + gemmlowp::GemmContext *gemm_context); + int num_threads_; + CPUAffinityPolicy policy_; + std::unique_ptr gemm_context_; }; } // namespace mace diff --git a/mace/core/runtime/opencl/gpu_device.cc b/mace/core/runtime/opencl/gpu_device.cc index cd9e41bb..65686f83 100644 --- a/mace/core/runtime/opencl/gpu_device.cc +++ b/mace/core/runtime/opencl/gpu_device.cc @@ -21,8 +21,10 @@ GPUDevice::GPUDevice(Tuner *tuner, const GPUPriorityHint priority, const GPUPerfHint perf, KVStorage *opencl_binary_storage, - const int num_threads) : - CPUDevice(num_threads), + const int num_threads, + CPUAffinityPolicy cpu_affinity_policy, + bool use_gemmlowp) : + CPUDevice(num_threads, cpu_affinity_policy, use_gemmlowp), runtime_(new OpenCLRuntime(opencl_cache_storage, priority, perf, opencl_binary_storage, tuner)), allocator_(new OpenCLAllocator(runtime_.get())) {} diff --git a/mace/core/runtime/opencl/gpu_device.h b/mace/core/runtime/opencl/gpu_device.h index 1526ba0a..350d53c8 100644 --- a/mace/core/runtime/opencl/gpu_device.h +++ b/mace/core/runtime/opencl/gpu_device.h @@ -30,7 +30,9 @@ class GPUDevice : public CPUDevice { const GPUPriorityHint priority = GPUPriorityHint::PRIORITY_LOW, const GPUPerfHint perf = GPUPerfHint::PERF_NORMAL, KVStorage *opencl_binary_storage = nullptr, - const int num_threads = -1); + const int num_threads = -1, + CPUAffinityPolicy cpu_affinity_policy = AFFINITY_NONE, + bool use_gemmlowp = false); ~GPUDevice(); OpenCLRuntime *opencl_runtime() override; Allocator *allocator() override; diff --git a/mace/core/testing/test_benchmark_main.cc b/mace/core/testing/test_benchmark_main.cc index 49c26326..173b8873 100644 --- a/mace/core/testing/test_benchmark_main.cc +++ b/mace/core/testing/test_benchmark_main.cc @@ -17,7 +17,7 @@ #include "gflags/gflags.h" #include "mace/core/runtime/cpu/cpu_runtime.h" #include "mace/core/testing/test_benchmark.h" -#include "mace/utils/logging.h" +#include "mace/ops/ops_test_util.h" DEFINE_string(filter, "all", "op benchmark regex filter, eg:.*CONV.*"); DEFINE_int32(omp_num_threads, -1, "num of openmp threads"); @@ -31,13 +31,10 @@ int main(int argc, char **argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); // config runtime - mace::MaceStatus status = mace::SetOpenMPThreadsAndAffinityPolicy( + mace::ops::test::OpTestContext::Get( FLAGS_omp_num_threads, static_cast(FLAGS_cpu_affinity_policy), true); - if (status != mace::MACE_SUCCESS) { - LOG(WARNING) << "Set openmp or cpu affinity failed."; - } mace::testing::Benchmark::Run(FLAGS_filter.c_str()); return 0; diff --git a/mace/kernels/conv_2d.h b/mace/kernels/conv_2d.h index 024644f3..a8ecaa89 100644 --- a/mace/kernels/conv_2d.h +++ b/mace/kernels/conv_2d.h @@ -838,7 +838,8 @@ struct Conv2dFunctor : Conv2dFunctorBase { MACE_CHECK(dilations_[0] == 1 && dilations_[1] == 1, "Quantization convolution does not support dilation > 1 yet."); - gemmlowp::GemmContext& gemm_context = GetGemmlowpContext(); + auto gemm_context = context_->device()->cpu_runtime()->GetGemmlowpContext(); + MACE_CHECK_NOTNULL(gemm_context); std::vector output_shape(4); std::vector paddings(2); @@ -955,7 +956,7 @@ struct Conv2dFunctor : Conv2dFunctorBase { using BitDepthParams = gemmlowp::L8R8WithLhsNonzeroBitDepthParams; gemmlowp::GemmWithOutputPipeline( - &gemm_context, filter_matrix, input_matrix, &output_matrix, + gemm_context, filter_matrix, input_matrix, &output_matrix, -filter->zero_point(), -input->zero_point(), output_pipeline); return MACE_SUCCESS; diff --git a/mace/kernels/fully_connected.h b/mace/kernels/fully_connected.h index e6743aa4..ccbc6344 100644 --- a/mace/kernels/fully_connected.h +++ b/mace/kernels/fully_connected.h @@ -100,7 +100,8 @@ struct FullyConnectedFunctor: FullyConnectedBase { Tensor *output, StatsFuture *future) { MACE_UNUSED(future); - gemmlowp::GemmContext& gemm_context = GetGemmlowpContext(); + auto gemm_context = context_->device()->cpu_runtime()->GetGemmlowpContext(); + MACE_CHECK_NOTNULL(gemm_context); std::vector output_shape = {input->dim(0), 1, 1, weight->dim(0)}; MACE_RETURN_IF_ERROR(output->Resize(output_shape)); @@ -142,7 +143,7 @@ struct FullyConnectedFunctor: FullyConnectedBase { using BitDepthParams = gemmlowp::L8R8WithLhsNonzeroBitDepthParams; gemmlowp::GemmWithOutputPipeline( - &gemm_context, weight_matrix, input_matrix, &output_matrix, + gemm_context, weight_matrix, input_matrix, &output_matrix, -weight->zero_point(), -input->zero_point(), output_pipeline); return MACE_SUCCESS; diff --git a/mace/kernels/gemmlowp_util.h b/mace/kernels/gemmlowp_util.h index 28d45d3a..f8fd26e0 100644 --- a/mace/kernels/gemmlowp_util.h +++ b/mace/kernels/gemmlowp_util.h @@ -22,8 +22,6 @@ namespace mace { -gemmlowp::GemmContext& GetGemmlowpContext(); - struct GemmlowpOutputPipeline { typedef gemmlowp::VectorMap ColVectorMap; diff --git a/mace/kernels/matmul.h b/mace/kernels/matmul.h index 9c5292d2..d22d391f 100644 --- a/mace/kernels/matmul.h +++ b/mace/kernels/matmul.h @@ -119,7 +119,8 @@ struct MatMulFunctor : OpKernel { const index_t K, const index_t width, Tensor *C) { - gemmlowp::GemmContext& gemm_context = GetGemmlowpContext(); + auto gemm_context = context_->device()->cpu_runtime()->GetGemmlowpContext(); + MACE_CHECK_NOTNULL(gemm_context); Tensor::MappingGuard guarda(A); Tensor::MappingGuard guardb(B); @@ -146,7 +147,7 @@ struct MatMulFunctor : OpKernel { using BitDepthParams = gemmlowp::L8R8WithLhsNonzeroBitDepthParams; gemmlowp::GemmWithOutputPipeline( - &gemm_context, a_matrix, b_matrix, &c_matrix, -A->zero_point(), + gemm_context, a_matrix, b_matrix, &c_matrix, -A->zero_point(), -B->zero_point(), output_pipeline); } } diff --git a/mace/kernels/matmul_benchmark.cc b/mace/kernels/matmul_benchmark.cc index be76a88e..ef19bd6c 100644 --- a/mace/kernels/matmul_benchmark.cc +++ b/mace/kernels/matmul_benchmark.cc @@ -21,8 +21,8 @@ #include "public/gemmlowp.h" #include "mace/core/testing/test_benchmark.h" #include "mace/kernels/gemm.h" -#include "mace/kernels/gemmlowp_util.h" #include "mace/kernels/sgemm.h" +#include "mace/ops/ops_test_util.h" namespace gemmlowp { @@ -164,18 +164,22 @@ void MatmulBenchmark_gemmlowp_uint8(int iters, int rows, int depth, int cols) { const auto output_pipeline = std::make_tuple(quantize_down_stage, saturating_cast_stage); - gemmlowp::GemmContext& gemm_context = GetGemmlowpContext(); + auto gemm_context = + mace::ops::test::OpTestContext::Get() + ->GetDevice(CPU)->cpu_runtime()->GetGemmlowpContext(); + MACE_CHECK_NOTNULL(gemm_context); + using BitDepthParams = gemmlowp::L8R8WithLhsNonzeroBitDepthParams; gemmlowp::GemmWithOutputPipeline( - &gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128, + gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128, -128, output_pipeline); mace::testing::StartTiming(); while (iters--) { gemmlowp::GemmWithOutputPipeline( - &gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128, + gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128, -128, output_pipeline); } } @@ -195,18 +199,22 @@ void MatmulBenchmark_gemmlowp_int32(int iters, int rows, int depth, int cols) { const auto output_pipeline = std::make_tuple(); - gemmlowp::GemmContext& gemm_context = GetGemmlowpContext(); + auto gemm_context = + mace::ops::test::OpTestContext::Get() + ->GetDevice(CPU)->cpu_runtime()->GetGemmlowpContext(); + MACE_CHECK_NOTNULL(gemm_context); + using BitDepthParams = gemmlowp::L8R8WithLhsNonzeroBitDepthParams; gemmlowp::GemmWithOutputPipeline( - &gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128, + gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128, -128, output_pipeline); mace::testing::StartTiming(); while (iters--) { gemmlowp::GemmWithOutputPipeline( - &gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128, + gemm_context, lhs.const_map(), rhs.const_map(), &result.map(), -128, -128, output_pipeline); } } diff --git a/mace/libmace/mace.cc b/mace/libmace/mace.cc index 80a35943..b9ae1497 100644 --- a/mace/libmace/mace.cc +++ b/mace/libmace/mace.cc @@ -177,9 +177,6 @@ class MaceEngineConfig::Impl { CPUAffinityPolicy policy, bool use_gemmlowp); - MaceStatus SetOpenMPThreadAffinity(int num_threads, - const std::vector &cpu_ids); - inline DeviceType device_type() const { return device_type_; } @@ -188,6 +185,14 @@ class MaceEngineConfig::Impl { return num_threads_; } + inline CPUAffinityPolicy cpu_affinity_policy() const { + return cpu_affinity_policy_; + } + + inline bool use_gemmlowp() const { + return use_gemmlowp_; + } + inline std::shared_ptr gpu_context() const { return gpu_context_; } @@ -203,6 +208,8 @@ class MaceEngineConfig::Impl { private: DeviceType device_type_; int num_threads_; + CPUAffinityPolicy cpu_affinity_policy_; + bool use_gemmlowp_; std::shared_ptr gpu_context_; GPUPriorityHint gpu_priority_hint_; GPUPerfHint gpu_perf_hint_; @@ -211,6 +218,8 @@ class MaceEngineConfig::Impl { MaceEngineConfig::Impl::Impl(const DeviceType device_type) : device_type_(device_type), num_threads_(-1), + cpu_affinity_policy_(CPUAffinityPolicy::AFFINITY_NONE), + use_gemmlowp_(false), gpu_context_(new GPUContext), gpu_priority_hint_(GPUPriorityHint::PRIORITY_LOW), gpu_perf_hint_(GPUPerfHint::PERF_NORMAL) {} @@ -234,15 +243,9 @@ MaceStatus MaceEngineConfig::Impl::SetCPUThreadPolicy( CPUAffinityPolicy policy, bool use_gemmlowp) { num_threads_ = num_threads; - return mace::SetOpenMPThreadsAndAffinityPolicy( - num_threads, policy, use_gemmlowp); -} - -MaceStatus MaceEngineConfig::Impl::SetOpenMPThreadAffinity( - int num_threads, - const std::vector &cpu_ids) { - num_threads_ = num_threads; - return mace::SetOpenMPThreadsAndAffinityCPUs(num_threads, cpu_ids); + cpu_affinity_policy_ = policy; + use_gemmlowp_ = use_gemmlowp; + return MACE_SUCCESS; } @@ -270,32 +273,6 @@ MaceStatus MaceEngineConfig::SetCPUThreadPolicy( return impl_->SetCPUThreadPolicy(num_threads_hint, policy, use_gemmlowp); } -MaceStatus MaceEngineConfig::SetOpenMPThreadAffinity( - int num_threads, - const std::vector &cpu_ids) { - return impl_->SetOpenMPThreadAffinity(num_threads, cpu_ids); -} - -DeviceType MaceEngineConfig::device_type() const { - return impl_->device_type(); -} - -int MaceEngineConfig::num_threads() const { - return impl_->num_threads(); -} - -std::shared_ptr MaceEngineConfig::gpu_context() const { - return impl_->gpu_context(); -} - -GPUPerfHint MaceEngineConfig::gpu_perf_hint() const { - return impl_->gpu_perf_hint(); -} - -GPUPriorityHint MaceEngineConfig::gpu_priority_hint() const { - return impl_->gpu_priority_hint(); -} - // Mace Tensor class MaceTensor::Impl { public: @@ -389,7 +366,7 @@ MaceEngine::Impl::Impl(const MaceEngineConfig &config) : model_data_(nullptr), model_data_size_(0), op_registry_(new OperatorRegistry()), - device_type_(config.device_type()), + device_type_(config.impl_->device_type()), device_(nullptr), ws_(new Workspace()), net_(nullptr) @@ -399,16 +376,21 @@ MaceEngine::Impl::Impl(const MaceEngineConfig &config) { LOG(INFO) << "Creating MaceEngine, MACE version: " << MaceVersion(); if (device_type_ == DeviceType::CPU || device_type_ == DeviceType::HEXAGON) { - device_.reset(new CPUDevice(config.num_threads())); + device_.reset(new CPUDevice(config.impl_->num_threads(), + config.impl_->cpu_affinity_policy(), + config.impl_->use_gemmlowp())); } #ifdef MACE_ENABLE_OPENCL if (device_type_ == DeviceType::GPU) { - device_.reset(new GPUDevice(config.gpu_context()->opencl_tuner(), - config.gpu_context()->opencl_cache_storage(), - config.gpu_priority_hint(), - config.gpu_perf_hint(), - config.gpu_context()->opencl_binary_storage(), - config.num_threads())); + device_.reset(new GPUDevice( + config.impl_->gpu_context()->opencl_tuner(), + config.impl_->gpu_context()->opencl_cache_storage(), + config.impl_->gpu_priority_hint(), + config.impl_->gpu_perf_hint(), + config.impl_->gpu_context()->opencl_binary_storage(), + config.impl_->num_threads(), + config.impl_->cpu_affinity_policy(), + config.impl_->use_gemmlowp())); } #endif } diff --git a/mace/ops/ops_test_util.cc b/mace/ops/ops_test_util.cc index 5be4cb96..5e94c3a6 100644 --- a/mace/ops/ops_test_util.cc +++ b/mace/ops/ops_test_util.cc @@ -18,8 +18,12 @@ namespace mace { namespace ops { namespace test { -OpTestContext *OpTestContext::Get() { - static OpTestContext instance; +OpTestContext *OpTestContext::Get(int num_threads, + CPUAffinityPolicy cpu_affinity_policy, + bool use_gemmlowp) { + static OpTestContext instance(num_threads, + cpu_affinity_policy, + use_gemmlowp); return &instance; } @@ -31,8 +35,15 @@ Device *OpTestContext::GetDevice(DeviceType device_type) { return device_map_[device_type].get(); } -OpTestContext::OpTestContext() : gpu_context_(new GPUContext()) { - device_map_[DeviceType::CPU] = std::unique_ptr(new CPUDevice(-1)); +OpTestContext::OpTestContext(int num_threads, + CPUAffinityPolicy cpu_affinity_policy, + bool use_gemmlowp) + : gpu_context_(new GPUContext()) { + device_map_[DeviceType::CPU] = std::unique_ptr( + new CPUDevice(num_threads, + cpu_affinity_policy, + use_gemmlowp)); + device_map_[DeviceType::GPU] = std::unique_ptr( new GPUDevice(gpu_context_->opencl_tuner(), gpu_context_->opencl_cache_storage(), diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index 3a248ac1..4ebfb4d4 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -114,11 +114,17 @@ class OpDefBuilder { class OpTestContext { public: - static OpTestContext *Get(); + static OpTestContext *Get( + int num_threads = -1, + CPUAffinityPolicy cpu_affinity_policy = AFFINITY_BIG_ONLY, + bool use_gemmlowp = true); std::shared_ptr gpu_context() const; Device *GetDevice(DeviceType device_type); + private: - OpTestContext(); + OpTestContext(int num_threads, + CPUAffinityPolicy cpu_affinity_policy, + bool use_gemmlowp); MACE_DISABLE_COPY_AND_ASSIGN(OpTestContext); std::shared_ptr gpu_context_; @@ -501,8 +507,6 @@ class OpsTestNet { class OpsTestBase : public ::testing::Test { protected: virtual void SetUp() { - SetOpenMPThreadsAndAffinityPolicy(-1, - CPUAffinityPolicy::AFFINITY_BIG_ONLY); } virtual void TearDown() { diff --git a/mace/public/mace.h b/mace/public/mace.h index 0b743423..db50a58e 100644 --- a/mace/public/mace.h +++ b/mace/public/mace.h @@ -97,21 +97,6 @@ enum MaceStatus { } \ } -/// \brief Get ARM big.LITTLE configuration. -/// -/// This function will detect the max frequencies of all CPU cores, and assume -/// the cores with largest max frequencies as big cores, and all the remaining -/// cores as little. If all cpu core's max frequencies equals, big_core_ids and -/// little_core_ids will both be filled with all cpu core ids. -/// -/// \param [out] big_core_ids -/// \param [out] little_core_ids -/// \return If successful, it returns MACE_SUCCESS and error if it can't -/// reliabley detect the frequency of big-LITTLE cores (e.g. MTK). - -MACE_API MaceStatus GetBigLittleCoreIDs(std::vector *big_core_ids, - std::vector *little_core_ids); - /// \brief GPU context contain the status used for GPU device. /// /// The life cycle of GPUContext object is the same as MaceEngines use it. @@ -170,6 +155,8 @@ class MACE_API GPUContextBuilder { }; class MACE_API MaceEngineConfig { + friend class MaceEngine; + public: explicit MaceEngineConfig(const DeviceType device_type); ~MaceEngineConfig(); @@ -219,32 +206,6 @@ class MACE_API MaceEngineConfig { CPUAffinityPolicy policy, bool use_gemmlowp = false); - /// \brief Set OpenMP threads number and processor affinity. - /// - /// Caution: this function may hurt performance - /// if improper parameters provided. - /// This function may not work well on some chips (e.g. MTK). Setting thread - /// affinity to offline cores may run very slow or unexpectedly. - /// In such cases, please use SetOpenMPThreadPolicy with default policy - /// instead. - /// - /// \param num_threads - /// \param cpu_ids - /// \return MACE_SUCCESS for success, other for failed. - MaceStatus SetOpenMPThreadAffinity( - int num_threads, - const std::vector &cpu_ids); - - DeviceType device_type() const; - - int num_threads() const; - - std::shared_ptr gpu_context() const; - - GPUPriorityHint gpu_priority_hint() const; - - GPUPerfHint gpu_perf_hint() const; - private: class Impl; std::unique_ptr impl_; -- GitLab