提交 c01ea0bb 编写于 作者: 李寅

Make core platform compitable

上级 8f5a79b7
stages: stages:
- cpplint - cpplint
- pycodestyle - pycodestyle
- platform_compitable_tests
- ops_test - ops_test
- ops_benchmark - ops_benchmark
- extra_tests - extra_tests
...@@ -34,3 +35,10 @@ extra_tests: ...@@ -34,3 +35,10 @@ extra_tests:
- if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
- python tools/bazel_adb_run.py --target="//mace/kernels:kernels_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS - python tools/bazel_adb_run.py --target="//mace/kernels:kernels_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS
- python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS - python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS
platform_compitable_tests:
stage: platform_compitable_tests
script:
- bazel build mace/core:core
...@@ -23,36 +23,41 @@ cc_library( ...@@ -23,36 +23,41 @@ cc_library(
[ [
"*.cc", "*.cc",
"runtime/cpu/*.cc", "runtime/cpu/*.cc",
"runtime/opencl/*.cc",
"runtime/hexagon/*.cc",
], ],
exclude = [ exclude = [
"*_test.cc", "*_test.cc",
"runtime/hexagon/hexagon_controller_dummy.cc",
], ],
) + if_not_hexagon_enabled([ ) + if_android(glob(
"runtime/hexagon/hexagon_controller_dummy.cc", [
]) + if_hexagon_enabled([ "runtime/opencl/*.cc",
],
)) + if_hexagon_enabled(glob([
"runtime/hexagon/*.cc",
"runtime/hexagon/libhexagon_controller.so", "runtime/hexagon/libhexagon_controller.so",
]), ])),
hdrs = glob([ hdrs = glob([
"*.h", "*.h",
"runtime/cpu/*.h", "runtime/cpu/*.h",
"runtime/opencl/*.h", ]) + if_android(glob(
"runtime/hexagon/*.h", [
]), "runtime/opencl/*.h",
copts = if_openmp_enabled(["-fopenmp"]), ],
)) + if_hexagon_enabled(glob(["runtime/hexagon/*.h"])),
copts = if_openmp_enabled(["-fopenmp"]) + if_android([
"-DMACE_ENABLE_OPENCL",
]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]),
linkopts = ["-ldl"] + if_android([ linkopts = ["-ldl"] + if_android([
"-pie", "-pie",
"-lm", "-lm",
]), ]),
deps = [ deps = [
"//mace/utils",
] + if_android([
":opencl_headers", ":opencl_headers",
"//mace/codegen:generated_opencl", "//mace/codegen:generated_opencl",
"//mace/codegen:generated_version", "//mace/codegen:generated_version",
"//mace/utils",
"@half//:half", "@half//:half",
] + if_production_mode([ ]) + if_production_mode([
"//mace/codegen:generated_tuning_params", "//mace/codegen:generated_tuning_params",
"//mace/utils:utils_prod", "//mace/utils:utils_prod",
]) + if_not_production_mode([ ]) + if_not_production_mode([
......
...@@ -3,7 +3,9 @@ ...@@ -3,7 +3,9 @@
// //
#include "mace/core/allocator.h" #include "mace/core/allocator.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/opencl_allocator.h" #include "mace/core/runtime/opencl/opencl_allocator.h"
#endif
namespace mace { namespace mace {
...@@ -23,7 +25,9 @@ Allocator *GetDeviceAllocator(DeviceType type) { ...@@ -23,7 +25,9 @@ Allocator *GetDeviceAllocator(DeviceType type) {
MACE_REGISTER_ALLOCATOR(DeviceType::CPU, new CPUAllocator()); MACE_REGISTER_ALLOCATOR(DeviceType::CPU, new CPUAllocator());
MACE_REGISTER_ALLOCATOR(DeviceType::NEON, new CPUAllocator()); MACE_REGISTER_ALLOCATOR(DeviceType::NEON, new CPUAllocator());
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_ALLOCATOR(DeviceType::OPENCL, new OpenCLAllocator()); MACE_REGISTER_ALLOCATOR(DeviceType::OPENCL, new OpenCLAllocator());
#endif
MACE_REGISTER_ALLOCATOR(DeviceType::HEXAGON, new CPUAllocator()); MACE_REGISTER_ALLOCATOR(DeviceType::HEXAGON, new CPUAllocator());
} // namespace mace } // namespace mace
...@@ -6,10 +6,11 @@ ...@@ -6,10 +6,11 @@
#ifndef MACE_CORE_ALLOCATOR_H_ #ifndef MACE_CORE_ALLOCATOR_H_
#define MACE_CORE_ALLOCATOR_H_ #define MACE_CORE_ALLOCATOR_H_
#include <malloc.h> #include <stdlib.h>
#include <map> #include <map>
#include <limits> #include <limits>
#include <vector> #include <vector>
#include <cstring>
#include "mace/core/registry.h" #include "mace/core/registry.h"
#include "mace/core/types.h" #include "mace/core/types.h"
...@@ -17,7 +18,9 @@ ...@@ -17,7 +18,9 @@
namespace mace { namespace mace {
#ifdef __ANDROID__ #if defined(__hexagon__)
constexpr size_t kMaceAlignment = 128;
#elif defined(__ANDROID__)
// 16 bytes = 128 bits = 32 * 4 (Neon) // 16 bytes = 128 bits = 32 * 4 (Neon)
constexpr size_t kMaceAlignment = 16; constexpr size_t kMaceAlignment = 16;
#else #else
...@@ -58,7 +61,7 @@ class CPUAllocator : public Allocator { ...@@ -58,7 +61,7 @@ class CPUAllocator : public Allocator {
void *New(size_t nbytes) const override { void *New(size_t nbytes) const override {
VLOG(3) << "Allocate CPU buffer: " << nbytes; VLOG(3) << "Allocate CPU buffer: " << nbytes;
void *data = nullptr; void *data = nullptr;
#ifdef __ANDROID__ #if defined(__ANDROID__) || defined(__hexagon__)
data = memalign(kMaceAlignment, nbytes); data = memalign(kMaceAlignment, nbytes);
#else #else
MACE_CHECK(posix_memalign(&data, kMaceAlignment, nbytes) == 0); MACE_CHECK(posix_memalign(&data, kMaceAlignment, nbytes) == 0);
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
#include <numeric>
#include <functional> #include <functional>
#include "mace/core/allocator.h" #include "mace/core/allocator.h"
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
#include <algorithm> #include <algorithm>
#include <memory> #include <memory>
#include <utility> #include <utility>
#include <cstring>
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
namespace mace { namespace mace {
class CallStats; struct CallStats;
// Wait the call to finish and get the stats if param is not nullptr // Wait the call to finish and get the stats if param is not nullptr
struct StatsFuture { struct StatsFuture {
......
...@@ -4,17 +4,23 @@ ...@@ -4,17 +4,23 @@
#include <memory> #include <memory>
#include "mace/core/file_storage.h"
#include "mace/core/net.h" #include "mace/core/net.h"
#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/types.h" #include "mace/core/types.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/opencl_runtime.h"
#endif // MACE_ENABLE_OPENCL
#ifdef MACE_ENABLE_HEXAGON
#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
#endif // MACE_ENABLE_HEXAGON
namespace mace { namespace mace {
// Mace Tensor // Mace Tensor
struct MaceTensor::Impl { class MaceTensor::Impl {
public:
std::vector<int64_t> shape; std::vector<int64_t> shape;
std::shared_ptr<float> data; std::shared_ptr<float> data;
}; };
...@@ -39,8 +45,8 @@ MaceTensor::MaceTensor(const MaceTensor &other) { ...@@ -39,8 +45,8 @@ MaceTensor::MaceTensor(const MaceTensor &other) {
MaceTensor::MaceTensor(const MaceTensor &&other) { MaceTensor::MaceTensor(const MaceTensor &&other) {
impl_ = std::unique_ptr<MaceTensor::Impl>(new MaceTensor::Impl()); impl_ = std::unique_ptr<MaceTensor::Impl>(new MaceTensor::Impl());
impl_->shape = std::move(other.shape()); impl_->shape = other.shape();
impl_->data = std::move(other.data()); impl_->data = other.data();
} }
MaceTensor &MaceTensor::operator=(const MaceTensor &other) { MaceTensor &MaceTensor::operator=(const MaceTensor &other) {
...@@ -50,8 +56,8 @@ MaceTensor &MaceTensor::operator=(const MaceTensor &other) { ...@@ -50,8 +56,8 @@ MaceTensor &MaceTensor::operator=(const MaceTensor &other) {
} }
MaceTensor &MaceTensor::operator=(const MaceTensor &&other) { MaceTensor &MaceTensor::operator=(const MaceTensor &&other) {
impl_->shape = std::move(other.shape()); impl_->shape = other.shape();
impl_->data = std::move(other.data()); impl_->data = other.data();
return *this; return *this;
} }
...@@ -81,7 +87,9 @@ class MaceEngine::Impl { ...@@ -81,7 +87,9 @@ class MaceEngine::Impl {
DeviceType device_type_; DeviceType device_type_;
std::unique_ptr<Workspace> ws_; std::unique_ptr<Workspace> ws_;
std::unique_ptr<NetBase> net_; std::unique_ptr<NetBase> net_;
#ifdef MACE_ENABLE_HEXAGON
std::unique_ptr<HexagonControlWrapper> hexagon_controller_; std::unique_ptr<HexagonControlWrapper> hexagon_controller_;
#endif
DISABLE_COPY_AND_ASSIGN(Impl); DISABLE_COPY_AND_ASSIGN(Impl);
}; };
...@@ -93,8 +101,11 @@ MaceEngine::Impl::Impl(const NetDef *net_def, ...@@ -93,8 +101,11 @@ MaceEngine::Impl::Impl(const NetDef *net_def,
: op_registry_(new OperatorRegistry()), : op_registry_(new OperatorRegistry()),
device_type_(device_type), device_type_(device_type),
ws_(new Workspace()), ws_(new Workspace()),
net_(nullptr), net_(nullptr)
hexagon_controller_(nullptr) { #ifdef MACE_ENABLE_HEXAGON
, hexagon_controller_(nullptr)
#endif
{
LOG(INFO) << "MACE version: " << MaceVersion(); LOG(INFO) << "MACE version: " << MaceVersion();
// Set storage path for internal usage // Set storage path for internal usage
for (auto input_name : input_nodes) { for (auto input_name : input_nodes) {
...@@ -105,6 +116,7 @@ MaceEngine::Impl::Impl(const NetDef *net_def, ...@@ -105,6 +116,7 @@ MaceEngine::Impl::Impl(const NetDef *net_def,
ws_->CreateTensor(MakeString("mace_output_node_", output_name, ":0"), ws_->CreateTensor(MakeString("mace_output_node_", output_name, ":0"),
GetDeviceAllocator(device_type_), DT_FLOAT); GetDeviceAllocator(device_type_), DT_FLOAT);
} }
#ifdef MACE_ENABLE_HEXAGON
if (device_type == HEXAGON) { if (device_type == HEXAGON) {
hexagon_controller_.reset(new HexagonControlWrapper()); hexagon_controller_.reset(new HexagonControlWrapper());
MACE_CHECK(hexagon_controller_->Config(), "hexagon config error"); MACE_CHECK(hexagon_controller_->Config(), "hexagon config error");
...@@ -120,6 +132,7 @@ MaceEngine::Impl::Impl(const NetDef *net_def, ...@@ -120,6 +132,7 @@ MaceEngine::Impl::Impl(const NetDef *net_def,
hexagon_controller_->PrintGraph(); hexagon_controller_->PrintGraph();
} }
} else { } else {
#endif
ws_->LoadModelTensor(*net_def, device_type); ws_->LoadModelTensor(*net_def, device_type);
// Init model // Init model
...@@ -128,11 +141,14 @@ MaceEngine::Impl::Impl(const NetDef *net_def, ...@@ -128,11 +141,14 @@ MaceEngine::Impl::Impl(const NetDef *net_def,
if (!net->Run()) { if (!net->Run()) {
LOG(FATAL) << "Net init run failed"; LOG(FATAL) << "Net init run failed";
} }
net_ = std::move(CreateNet(op_registry_, *net_def, ws_.get(), device_type)); net_ = CreateNet(op_registry_, *net_def, ws_.get(), device_type);
#ifdef MACE_ENABLE_HEXAGON
} }
#endif
} }
MaceEngine::Impl::~Impl() { MaceEngine::Impl::~Impl() {
#ifdef MACE_ENABLE_HEXAGON
if (device_type_ == HEXAGON) { if (device_type_ == HEXAGON) {
if (VLOG_IS_ON(2)) { if (VLOG_IS_ON(2)) {
hexagon_controller_->GetPerfInfo(); hexagon_controller_->GetPerfInfo();
...@@ -141,6 +157,7 @@ MaceEngine::Impl::~Impl() { ...@@ -141,6 +157,7 @@ MaceEngine::Impl::~Impl() {
MACE_CHECK(hexagon_controller_->TeardownGraph(), "hexagon teardown error"); MACE_CHECK(hexagon_controller_->TeardownGraph(), "hexagon teardown error");
MACE_CHECK(hexagon_controller_->Finalize(), "hexagon finalize error"); MACE_CHECK(hexagon_controller_->Finalize(), "hexagon finalize error");
} }
#endif
} }
MaceStatus MaceEngine::Impl::Run( MaceStatus MaceEngine::Impl::Run(
...@@ -167,18 +184,25 @@ MaceStatus MaceEngine::Impl::Run( ...@@ -167,18 +184,25 @@ MaceStatus MaceEngine::Impl::Run(
ws_->GetTensor(MakeString("mace_output_node_", output.first + ":0")); ws_->GetTensor(MakeString("mace_output_node_", output.first + ":0"));
output_tensors.push_back(output_tensor); output_tensors.push_back(output_tensor);
} }
#ifdef MACE_ENABLE_HEXAGON
if (device_type_ == HEXAGON) { if (device_type_ == HEXAGON) {
MACE_CHECK(input_tensors.size() == 1 && output_tensors.size() == 1, MACE_CHECK(input_tensors.size() == 1 && output_tensors.size() == 1,
"HEXAGON not support multiple inputs and outputs yet."); "HEXAGON not support multiple inputs and outputs yet.");
hexagon_controller_->ExecuteGraph(*input_tensors[0], output_tensors[0]); hexagon_controller_->ExecuteGraph(*input_tensors[0], output_tensors[0]);
} else { } else {
#endif
if (!net_->Run(run_metadata)) { if (!net_->Run(run_metadata)) {
LOG(FATAL) << "Net run failed"; LOG(FATAL) << "Net run failed";
} }
#ifdef MACE_ENABLE_HEXAGON
} }
#endif
#ifdef MACE_ENABLE_OPENCL
if (device_type_ == OPENCL) { if (device_type_ == OPENCL) {
OpenCLRuntime::Global()->SaveBuiltCLProgram(); OpenCLRuntime::Global()->SaveBuiltCLProgram();
} }
#endif
for (auto &output : *outputs) { for (auto &output : *outputs) {
Tensor *output_tensor = Tensor *output_tensor =
ws_->GetTensor(MakeString("mace_output_node_", output.first + ":0")); ws_->GetTensor(MakeString("mace_output_node_", output.first + ":0"));
......
...@@ -3,38 +3,15 @@ ...@@ -3,38 +3,15 @@
// //
#include "mace/public/mace_runtime.h" #include "mace/public/mace_runtime.h"
#include "mace/core/runtime/cpu/cpu_runtime.h" #include "mace/utils/logging.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
namespace mace { namespace mace {
std::shared_ptr<KVStorageFactory> kStorageFactory = nullptr; std::shared_ptr<KVStorageFactory> kStorageFactory = nullptr;
void SetGPUHints(GPUPerfHint gpu_perf_hint, GPUPriorityHint gpu_priority_hint) {
VLOG(1) << "Set GPU configurations, gpu_perf_hint: " << gpu_perf_hint
<< ", gpu_priority_hint: " << gpu_priority_hint;
OpenCLRuntime::Configure(gpu_perf_hint, gpu_priority_hint);
}
void SetKVStorageFactory(std::shared_ptr<KVStorageFactory> storage_factory) { void SetKVStorageFactory(std::shared_ptr<KVStorageFactory> storage_factory) {
VLOG(1) << "Set internal KV Storage Engine"; VLOG(1) << "Set internal KV Storage Engine";
kStorageFactory = storage_factory; kStorageFactory = storage_factory;
} }
MaceStatus SetOpenMPThreadPolicy(int num_threads_hint,
CPUAffinityPolicy policy) {
VLOG(1) << "Set CPU openmp num_threads_hint: " << num_threads_hint
<< ", affinity policy: " << policy;
return SetOpenMPThreadsAndAffinityPolicy(num_threads_hint, policy);
}
void SetOpenMPThreadAffinity(int num_threads, const std::vector<int> &cpu_ids) {
return SetOpenMPThreadsAndAffinityCPUs(num_threads, cpu_ids);
}
MaceStatus GetBigLittleCoreIDs(std::vector<int> *big_core_ids,
std::vector<int> *little_core_ids) {
return GetCPUBigLittleCoreIDs(big_core_ids, little_core_ids);
}
}; // namespace mace }; // namespace mace
...@@ -142,12 +142,12 @@ void OperatorDef::CopyFrom(const OperatorDef &from) { ...@@ -142,12 +142,12 @@ void OperatorDef::CopyFrom(const OperatorDef &from) {
std::copy(from_output.begin(), from_output.end(), output_.begin()); std::copy(from_output.begin(), from_output.end(), output_.begin());
auto from_arg = from.arg(); auto from_arg = from.arg();
arg_.resize(from_arg.size()); arg_.resize(from_arg.size());
for (int i = 0; i < from_arg.size(); ++i) { for (size_t i = 0; i < from_arg.size(); ++i) {
arg_[i].CopyFrom(from_arg[i]); arg_[i].CopyFrom(from_arg[i]);
} }
auto from_output_shape = from.output_shape(); auto from_output_shape = from.output_shape();
output_shape_.resize(from_output_shape.size()); output_shape_.resize(from_output_shape.size());
for (int i = 0; i < from_output_shape.size(); ++i) { for (size_t i = 0; i < from_output_shape.size(); ++i) {
output_shape_[i].CopyFrom(from_output_shape[i]); output_shape_[i].CopyFrom(from_output_shape[i]);
} }
auto from_data_type = from.output_type(); auto from_data_type = from.output_type();
...@@ -164,7 +164,7 @@ void OperatorDef::CopyFrom(const OperatorDef &from) { ...@@ -164,7 +164,7 @@ void OperatorDef::CopyFrom(const OperatorDef &from) {
padding_ = from.padding(); padding_ = from.padding();
auto from_node_input = from.node_input(); auto from_node_input = from.node_input();
node_input_.resize(from_node_input.size()); node_input_.resize(from_node_input.size());
for (int i = 0; i < from_node_input.size(); ++i) { for (size_t i = 0; i < from_node_input.size(); ++i) {
node_input_[i].CopyFrom(from_node_input[i]); node_input_[i].CopyFrom(from_node_input[i]);
} }
auto from_out_max_byte_size = from.out_max_byte_size(); auto from_out_max_byte_size = from.out_max_byte_size();
...@@ -213,7 +213,7 @@ void OperatorDef::add_out_max_byte_size(int value) { ...@@ -213,7 +213,7 @@ void OperatorDef::add_out_max_byte_size(int value) {
} }
const std::vector<std::string> &OperatorDef::input() const { return input_; } const std::vector<std::string> &OperatorDef::input() const { return input_; }
const std::string &OperatorDef::input(int index) const { const std::string &OperatorDef::input(int index) const {
MACE_CHECK(0 <= index && index <= input_.size()); MACE_CHECK(0 <= index && index <= static_cast<int>(input_.size()));
return input_[index]; return input_[index];
} }
std::string *OperatorDef::add_input() { std::string *OperatorDef::add_input() {
...@@ -230,7 +230,7 @@ void OperatorDef::set_input(const std::vector<std::string> &value) { ...@@ -230,7 +230,7 @@ void OperatorDef::set_input(const std::vector<std::string> &value) {
} }
const std::vector<std::string> &OperatorDef::output() const { return output_; } const std::vector<std::string> &OperatorDef::output() const { return output_; }
const std::string &OperatorDef::output(int index) const { const std::string &OperatorDef::output(int index) const {
MACE_CHECK(0 <= index && index <= output_.size()); MACE_CHECK(0 <= index && index <= static_cast<int>(output_.size()));
return output_[index]; return output_[index];
} }
std::string *OperatorDef::add_output() { std::string *OperatorDef::add_output() {
......
...@@ -15,7 +15,7 @@ NetBase::NetBase(const std::shared_ptr<const OperatorRegistry> op_registry, ...@@ -15,7 +15,7 @@ NetBase::NetBase(const std::shared_ptr<const OperatorRegistry> op_registry,
const std::shared_ptr<const NetDef> net_def, const std::shared_ptr<const NetDef> net_def,
Workspace *ws, Workspace *ws,
DeviceType type) DeviceType type)
: op_registry_(op_registry), name_(net_def->name()) {} : name_(net_def->name()), op_registry_(op_registry) {}
SerialNet::SerialNet(const std::shared_ptr<const OperatorRegistry> op_registry, SerialNet::SerialNet(const std::shared_ptr<const OperatorRegistry> op_registry,
const std::shared_ptr<const NetDef> net_def, const std::shared_ptr<const NetDef> net_def,
......
...@@ -6,12 +6,15 @@ ...@@ -6,12 +6,15 @@
#include <omp.h> #include <omp.h>
#include <unistd.h> #include <unistd.h>
#include <sys/types.h>
#include <algorithm> #include <algorithm>
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/public/mace_runtime.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
namespace mace { namespace mace {
namespace { namespace {
...@@ -70,8 +73,11 @@ void SortCPUIdsByMaxFreqAsc(std::vector<int> *cpu_ids, int *big_core_offset) { ...@@ -70,8 +73,11 @@ void SortCPUIdsByMaxFreqAsc(std::vector<int> *cpu_ids, int *big_core_offset) {
} }
void SetThreadAffinity(cpu_set_t mask) { void SetThreadAffinity(cpu_set_t mask) {
int sys_call_res; #if defined(__ANDROID__)
pid_t pid = gettid(); pid_t pid = gettid();
#else
pid_t pid = pthread_self();
#endif
int err = sched_setaffinity(pid, sizeof(mask), &mask); int err = sched_setaffinity(pid, sizeof(mask), &mask);
MACE_CHECK(err == 0, "set affinity error: ", errno); MACE_CHECK(err == 0, "set affinity error: ", errno);
} }
...@@ -163,12 +169,28 @@ MaceStatus SetOpenMPThreadsAndAffinityPolicy(int omp_num_threads_hint, ...@@ -163,12 +169,28 @@ MaceStatus SetOpenMPThreadsAndAffinityPolicy(int omp_num_threads_hint,
} }
if (omp_num_threads_hint <= 0 || if (omp_num_threads_hint <= 0 ||
omp_num_threads_hint > use_cpu_ids.size()) { omp_num_threads_hint > static_cast<int>(use_cpu_ids.size())) {
omp_num_threads_hint = use_cpu_ids.size(); omp_num_threads_hint = use_cpu_ids.size();
} }
SetOpenMPThreadsAndAffinityCPUs(omp_num_threads_hint, use_cpu_ids); SetOpenMPThreadsAndAffinityCPUs(omp_num_threads_hint, use_cpu_ids);
return MACE_SUCCESS; return MACE_SUCCESS;
} }
MaceStatus SetOpenMPThreadPolicy(int num_threads_hint,
CPUAffinityPolicy policy) {
VLOG(1) << "Set CPU openmp num_threads_hint: " << num_threads_hint
<< ", affinity policy: " << policy;
return SetOpenMPThreadsAndAffinityPolicy(num_threads_hint, policy);
}
void SetOpenMPThreadAffinity(int num_threads, const std::vector<int> &cpu_ids) {
return SetOpenMPThreadsAndAffinityCPUs(num_threads, cpu_ids);
}
MaceStatus GetBigLittleCoreIDs(std::vector<int> *big_core_ids,
std::vector<int> *little_core_ids) {
return GetCPUBigLittleCoreIDs(big_core_ids, little_core_ids);
}
} // namespace mace } // namespace mace
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <vector> #include <vector>
#include <utility> #include <utility>
#include "mace/public/mace_runtime.h"
#include "mace/core/file_storage.h" #include "mace/core/file_storage.h"
#include "mace/core/runtime/opencl/opencl_extension.h" #include "mace/core/runtime/opencl/opencl_extension.h"
#include "mace/public/mace.h" #include "mace/public/mace.h"
...@@ -25,6 +26,12 @@ extern const std::string kCompiledProgramPlatform; ...@@ -25,6 +26,12 @@ extern const std::string kCompiledProgramPlatform;
extern const std::map<std::string, std::vector<unsigned char>> extern const std::map<std::string, std::vector<unsigned char>>
kEncryptedProgramMap; kEncryptedProgramMap;
void SetGPUHints(GPUPerfHint gpu_perf_hint, GPUPriorityHint gpu_priority_hint) {
VLOG(1) << "Set GPU configurations, gpu_perf_hint: " << gpu_perf_hint
<< ", gpu_priority_hint: " << gpu_priority_hint;
OpenCLRuntime::Configure(gpu_perf_hint, gpu_priority_hint);
}
const std::string OpenCLErrorToString(cl_int error) { const std::string OpenCLErrorToString(cl_int error) {
switch (error) { switch (error) {
case CL_SUCCESS: case CL_SUCCESS:
......
...@@ -11,8 +11,10 @@ ...@@ -11,8 +11,10 @@
#include "mace/core/buffer.h" #include "mace/core/buffer.h"
#include "mace/core/preallocated_pooled_allocator.h" #include "mace/core/preallocated_pooled_allocator.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/types.h" #include "mace/core/types.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif
#include "mace/public/mace.h" #include "mace/public/mace.h"
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
...@@ -26,6 +28,7 @@ namespace mace { ...@@ -26,6 +28,7 @@ namespace mace {
break; \ break; \
} }
#ifdef MACE_ENABLE_OPENCL
#define CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, INVALID, DEFAULT) \ #define CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, INVALID, DEFAULT) \
switch (TYPE_ENUM) { \ switch (TYPE_ENUM) { \
CASE(half, SINGLE_ARG(STMTS)) \ CASE(half, SINGLE_ARG(STMTS)) \
...@@ -46,6 +49,27 @@ namespace mace { ...@@ -46,6 +49,27 @@ namespace mace {
DEFAULT; \ DEFAULT; \
break; \ break; \
} }
#else
#define CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, INVALID, DEFAULT) \
switch (TYPE_ENUM) { \
CASE(float, SINGLE_ARG(STMTS)) \
CASE(double, SINGLE_ARG(STMTS)) \
CASE(int32_t, SINGLE_ARG(STMTS)) \
CASE(uint8_t, SINGLE_ARG(STMTS)) \
CASE(uint16_t, SINGLE_ARG(STMTS)) \
CASE(int16_t, SINGLE_ARG(STMTS)) \
CASE(int8_t, SINGLE_ARG(STMTS)) \
CASE(std::string, SINGLE_ARG(STMTS)) \
CASE(int64_t, SINGLE_ARG(STMTS)) \
CASE(bool, SINGLE_ARG(STMTS)) \
case DT_INVALID: \
INVALID; \
break; \
default: \
DEFAULT; \
break; \
}
#endif
#define CASES(TYPE_ENUM, STMTS) \ #define CASES(TYPE_ENUM, STMTS) \
CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, LOG(FATAL) << "Type not set"; \ CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, LOG(FATAL) << "Type not set"; \
...@@ -127,6 +151,7 @@ class Tensor { ...@@ -127,6 +151,7 @@ class Tensor {
return buffer_ != nullptr && !buffer_->OnHost() && !has_opencl_image(); return buffer_ != nullptr && !buffer_->OnHost() && !has_opencl_image();
} }
#ifdef MACE_ENABLE_OPENCL
inline cl::Image *opencl_image() const { inline cl::Image *opencl_image() const {
MACE_CHECK(has_opencl_image(), "do not have image"); MACE_CHECK(has_opencl_image(), "do not have image");
return static_cast<cl::Image *>(buffer_->buffer()); return static_cast<cl::Image *>(buffer_->buffer());
...@@ -136,6 +161,7 @@ class Tensor { ...@@ -136,6 +161,7 @@ class Tensor {
MACE_CHECK(has_opencl_buffer(), "do not have opencl buffer"); MACE_CHECK(has_opencl_buffer(), "do not have opencl buffer");
return static_cast<cl::Buffer *>(buffer_->buffer()); return static_cast<cl::Buffer *>(buffer_->buffer());
} }
#endif
inline index_t buffer_offset() const { return buffer_->offset(); } inline index_t buffer_offset() const { return buffer_->offset(); }
......
...@@ -30,11 +30,18 @@ bool DataTypeCanUseMemcpy(DataType dt) { ...@@ -30,11 +30,18 @@ bool DataTypeCanUseMemcpy(DataType dt) {
std::string DataTypeToString(const DataType dt) { std::string DataTypeToString(const DataType dt) {
static std::map<DataType, std::string> dtype_string_map = { static std::map<DataType, std::string> dtype_string_map = {
{DT_FLOAT, "DT_FLOAT"}, {DT_HALF, "DT_HALF"}, {DT_FLOAT, "DT_FLOAT"},
{DT_DOUBLE, "DT_DOUBLE"}, {DT_UINT8, "DT_UINT8"}, #ifdef MACE_ENABLE_OPENCL
{DT_INT8, "DT_INT8"}, {DT_INT32, "DT_INT32"}, {DT_HALF, "DT_HALF"},
{DT_UINT32, "DT_UINT32"}, {DT_UINT16, "DT_UINT16"}, #endif
{DT_INT64, "DT_INT64"}, {DT_BOOL, "DT_BOOL"}, {DT_DOUBLE, "DT_DOUBLE"},
{DT_UINT8, "DT_UINT8"},
{DT_INT8, "DT_INT8"},
{DT_INT32, "DT_INT32"},
{DT_UINT32, "DT_UINT32"},
{DT_UINT16, "DT_UINT16"},
{DT_INT64, "DT_INT64"},
{DT_BOOL, "DT_BOOL"},
{DT_STRING, "DT_STRING"}}; {DT_STRING, "DT_STRING"}};
MACE_CHECK(dt != DT_INVALID) << "Not support Invalid data type"; MACE_CHECK(dt != DT_INVALID) << "Not support Invalid data type";
return dtype_string_map[dt]; return dtype_string_map[dt];
...@@ -44,8 +51,10 @@ size_t GetEnumTypeSize(const DataType dt) { ...@@ -44,8 +51,10 @@ size_t GetEnumTypeSize(const DataType dt) {
switch (dt) { switch (dt) {
case DT_FLOAT: case DT_FLOAT:
return sizeof(float); return sizeof(float);
#ifdef MACE_ENABLE_OPENCL
case DT_HALF: case DT_HALF:
return sizeof(half); return sizeof(half);
#endif
case DT_UINT8: case DT_UINT8:
return sizeof(uint8_t); return sizeof(uint8_t);
case DT_INT8: case DT_INT8:
......
...@@ -9,13 +9,17 @@ ...@@ -9,13 +9,17 @@
#include <string> #include <string>
#include "mace/public/mace_types.h" #include "mace/public/mace_types.h"
#ifdef MACE_ENABLE_OPENCL
#include "include/half.hpp" #include "include/half.hpp"
#endif
namespace mace { namespace mace {
typedef int64_t index_t; typedef int64_t index_t;
#ifdef MACE_ENABLE_OPENCL
using half = half_float::half; using half = half_float::half;
#endif
bool DataTypeCanUseMemcpy(DataType dt); bool DataTypeCanUseMemcpy(DataType dt);
...@@ -52,7 +56,9 @@ struct EnumToDataType {}; // Specializations below ...@@ -52,7 +56,9 @@ struct EnumToDataType {}; // Specializations below
typedef TYPE Type; \ typedef TYPE Type; \
} }
#ifdef MACE_ENABLE_OPENCL
MATCH_TYPE_AND_ENUM(half, DT_HALF); MATCH_TYPE_AND_ENUM(half, DT_HALF);
#endif
MATCH_TYPE_AND_ENUM(float, DT_FLOAT); MATCH_TYPE_AND_ENUM(float, DT_FLOAT);
MATCH_TYPE_AND_ENUM(double, DT_DOUBLE); MATCH_TYPE_AND_ENUM(double, DT_DOUBLE);
MATCH_TYPE_AND_ENUM(int32_t, DT_INT32); MATCH_TYPE_AND_ENUM(int32_t, DT_INT32);
......
...@@ -22,8 +22,7 @@ Tensor *Workspace::CreateTensor(const std::string &name, ...@@ -22,8 +22,7 @@ Tensor *Workspace::CreateTensor(const std::string &name,
VLOG(3) << "Tensor " << name << " already exists. Skipping."; VLOG(3) << "Tensor " << name << " already exists. Skipping.";
} else { } else {
VLOG(3) << "Creating Tensor " << name; VLOG(3) << "Creating Tensor " << name;
tensor_map_[name] = tensor_map_[name] = std::unique_ptr<Tensor>(new Tensor(alloc, type));
std::move(std::unique_ptr<Tensor>(new Tensor(alloc, type)));
tensor_map_[name]->SetSourceOpName(name); tensor_map_[name]->SetSourceOpName(name);
} }
return GetTensor(name); return GetTensor(name);
...@@ -72,12 +71,12 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { ...@@ -72,12 +71,12 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
} }
VLOG(3) << "Model data size: " << model_data_size; VLOG(3) << "Model data size: " << model_data_size;
if (type == DeviceType::CPU) { if (type == DeviceType::CPU || type == DeviceType::NEON) {
tensor_buffer_ = std::move(std::unique_ptr<Buffer>( tensor_buffer_ = std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(type), model_data_ptr, model_data_size))); new Buffer(GetDeviceAllocator(type), model_data_ptr, model_data_size));
} else { } else {
tensor_buffer_ = std::move(std::unique_ptr<Buffer>( tensor_buffer_ = std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(type), model_data_size))); new Buffer(GetDeviceAllocator(type), model_data_size));
tensor_buffer_->Map(nullptr); tensor_buffer_->Map(nullptr);
tensor_buffer_->Copy(model_data_ptr, 0, model_data_size); tensor_buffer_->Copy(model_data_ptr, 0, model_data_size);
tensor_buffer_->UnMap(); tensor_buffer_->UnMap();
......
...@@ -7,7 +7,7 @@ package( ...@@ -7,7 +7,7 @@ package(
licenses(["notice"]) # Apache 2.0 licenses(["notice"]) # Apache 2.0
load("//mace:mace.bzl", "if_android", "if_neon_enabled", "if_openmp_enabled", "if_android_armv7") load("//mace:mace.bzl", "if_android", "if_neon_enabled", "if_openmp_enabled", "if_android_armv7", "if_hexagon_enabled")
cc_library( cc_library(
name = "kernels", name = "kernels",
...@@ -28,7 +28,9 @@ cc_library( ...@@ -28,7 +28,9 @@ cc_library(
"opencl/*.h", "opencl/*.h",
"arm/*.h", "arm/*.h",
]), ]),
copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]), copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([
"-DMACE_ENABLE_OPENCL",
]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]),
linkopts = if_android(["-lm"]), linkopts = if_android(["-lm"]),
deps = [ deps = [
"//mace/core", "//mace/core",
...@@ -46,7 +48,9 @@ cc_test( ...@@ -46,7 +48,9 @@ cc_test(
"opencl/*_test.cc", "opencl/*_test.cc",
], ],
), ),
copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]), copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([
"-DMACE_ENABLE_OPENCL",
]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]),
linkopts = ["-fopenmp"], linkopts = ["-fopenmp"],
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
...@@ -55,15 +59,3 @@ cc_test( ...@@ -55,15 +59,3 @@ cc_test(
"@gtest//:gtest_main", "@gtest//:gtest_main",
], ],
) )
cc_test(
name = "benchmark",
testonly = 1,
srcs = glob(["benchmark/*.cc"]),
linkstatic = 1,
deps = [
":kernels",
"//mace/core",
"//mace/core:test_benchmark_main",
],
)
...@@ -7,7 +7,7 @@ package( ...@@ -7,7 +7,7 @@ package(
licenses(["notice"]) # Apache 2.0 licenses(["notice"]) # Apache 2.0
load("//mace:mace.bzl", "if_android", "if_neon_enabled", "if_openmp_enabled", "if_android_armv7") load("//mace:mace.bzl", "if_android", "if_neon_enabled", "if_openmp_enabled", "if_android_armv7", "if_hexagon_enabled")
cc_library( cc_library(
name = "test", name = "test",
...@@ -34,7 +34,9 @@ cc_library( ...@@ -34,7 +34,9 @@ cc_library(
["*.h"], ["*.h"],
exclude = ["ops_test_util.h"], exclude = ["ops_test_util.h"],
), ),
copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]), copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([
"-DMACE_ENABLE_OPENCL",
]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]),
deps = [ deps = [
"//mace/kernels", "//mace/kernels",
], ],
...@@ -47,6 +49,9 @@ cc_test( ...@@ -47,6 +49,9 @@ cc_test(
srcs = glob( srcs = glob(
["*_test.cc"], ["*_test.cc"],
), ),
copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([
"-DMACE_ENABLE_OPENCL",
]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]),
linkopts = ["-fopenmp"], linkopts = ["-fopenmp"],
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
...@@ -60,6 +65,9 @@ cc_test( ...@@ -60,6 +65,9 @@ cc_test(
name = "ops_benchmark", name = "ops_benchmark",
testonly = 1, testonly = 1,
srcs = glob(["*_benchmark.cc"]), srcs = glob(["*_benchmark.cc"]),
copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([
"-DMACE_ENABLE_OPENCL",
]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]),
linkopts = ["-fopenmp"], linkopts = ["-fopenmp"],
linkstatic = 1, linkstatic = 1,
deps = [ deps = [
......
...@@ -342,7 +342,8 @@ struct OperatorStats { ...@@ -342,7 +342,8 @@ struct OperatorStats {
CallStats stats; CallStats stats;
}; };
struct RunMetadata { class RunMetadata {
public:
std::vector<OperatorStats> op_stats; std::vector<OperatorStats> op_stats;
}; };
......
...@@ -6,15 +6,22 @@ ...@@ -6,15 +6,22 @@
#define MACE_UTILS_ENV_TIME_H_ #define MACE_UTILS_ENV_TIME_H_
#include <stdint.h> #include <stdint.h>
#ifdef __hexagon__
#include <HAP_perf.h>
#else
#include <sys/time.h> #include <sys/time.h>
#include <time.h> #endif
namespace mace { namespace mace {
inline int64_t NowMicros() { inline int64_t NowMicros() {
#ifdef __hexagon__
return HAP_perf_get_time_us();
#else
struct timeval tv; struct timeval tv;
gettimeofday(&tv, nullptr); gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec; return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#endif
} }
} // namespace mace } // namespace mace
......
...@@ -5,13 +5,16 @@ ...@@ -5,13 +5,16 @@
#ifndef MACE_UTILS_MEMORY_LOGGING_H_ #ifndef MACE_UTILS_MEMORY_LOGGING_H_
#define MACE_UTILS_MEMORY_LOGGING_H_ #define MACE_UTILS_MEMORY_LOGGING_H_
#ifndef __hexagon__
#include <malloc.h> #include <malloc.h>
#endif
#include <string> #include <string>
#include "mace/utils/logging.h" #include "mace/utils/logging.h"
namespace mace { namespace mace {
#ifdef MACE_ENABLE_MEMORY_LOGGING
class MallinfoChangeLogger { class MallinfoChangeLogger {
public: public:
explicit MallinfoChangeLogger(const std::string &name) : name_(name) { explicit MallinfoChangeLogger(const std::string &name) : name_(name) {
...@@ -87,7 +90,6 @@ class MallinfoChangeLogger { ...@@ -87,7 +90,6 @@ class MallinfoChangeLogger {
} }
}; };
#ifdef MACE_ENABLE_MEMORY_LOGGING
#define MACE_MEMORY_LOGGING_GUARD() \ #define MACE_MEMORY_LOGGING_GUARD() \
MallinfoChangeLogger mem_logger_##__line__(std::string(__FILE__) + ":" + \ MallinfoChangeLogger mem_logger_##__line__(std::string(__FILE__) + ":" + \
std::string(__func__)); std::string(__func__));
......
...@@ -5,7 +5,6 @@ ...@@ -5,7 +5,6 @@
#ifndef MACE_UTILS_UTILS_H_ #ifndef MACE_UTILS_UTILS_H_
#define MACE_UTILS_UTILS_H_ #define MACE_UTILS_UTILS_H_
#include <sys/time.h>
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <utility> #include <utility>
...@@ -50,7 +49,7 @@ inline std::string ObfuscateString(const std::string &src, ...@@ -50,7 +49,7 @@ inline std::string ObfuscateString(const std::string &src,
for (size_t i = 0; i < src.size(); i++) { for (size_t i = 0; i < src.size(); i++) {
dest[i] = src[i] ^ lookup_table[i % lookup_table.size()]; dest[i] = src[i] ^ lookup_table[i % lookup_table.size()];
} }
return std::move(dest); return dest;
} }
// ObfuscateString(ObfuscateString(str)) ==> str // ObfuscateString(ObfuscateString(str)) ==> str
...@@ -86,7 +85,7 @@ inline std::string ObfuscateSymbol(const std::string &src) { ...@@ -86,7 +85,7 @@ inline std::string ObfuscateSymbol(const std::string &src) {
// There is no collision if it's true for every char at every position // There is no collision if it's true for every char at every position
dest[i] = encode_dict[(idx + i + 31) % encode_dict.size()]; dest[i] = encode_dict[(idx + i + 31) % encode_dict.size()];
} }
return std::move(dest); return dest;
} }
#ifdef MACE_OBFUSCATE_LITERALS #ifdef MACE_OBFUSCATE_LITERALS
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册