提交 c01ea0bb 编写于 作者: 李寅

Make core platform compitable

上级 8f5a79b7
stages:
- cpplint
- pycodestyle
- platform_compitable_tests
- ops_test
- ops_benchmark
- extra_tests
......@@ -34,3 +35,10 @@ extra_tests:
- if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
- python tools/bazel_adb_run.py --target="//mace/kernels:kernels_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS
- python tools/bazel_adb_run.py --target="//mace/utils:tuner_test" --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS
platform_compitable_tests:
stage: platform_compitable_tests
script:
- bazel build mace/core:core
......@@ -23,36 +23,41 @@ cc_library(
[
"*.cc",
"runtime/cpu/*.cc",
"runtime/opencl/*.cc",
"runtime/hexagon/*.cc",
],
exclude = [
"*_test.cc",
"runtime/hexagon/hexagon_controller_dummy.cc",
],
) + if_not_hexagon_enabled([
"runtime/hexagon/hexagon_controller_dummy.cc",
]) + if_hexagon_enabled([
) + if_android(glob(
[
"runtime/opencl/*.cc",
],
)) + if_hexagon_enabled(glob([
"runtime/hexagon/*.cc",
"runtime/hexagon/libhexagon_controller.so",
]),
])),
hdrs = glob([
"*.h",
"runtime/cpu/*.h",
"runtime/opencl/*.h",
"runtime/hexagon/*.h",
]),
copts = if_openmp_enabled(["-fopenmp"]),
]) + if_android(glob(
[
"runtime/opencl/*.h",
],
)) + if_hexagon_enabled(glob(["runtime/hexagon/*.h"])),
copts = if_openmp_enabled(["-fopenmp"]) + if_android([
"-DMACE_ENABLE_OPENCL",
]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]),
linkopts = ["-ldl"] + if_android([
"-pie",
"-lm",
]),
deps = [
"//mace/utils",
] + if_android([
":opencl_headers",
"//mace/codegen:generated_opencl",
"//mace/codegen:generated_version",
"//mace/utils",
"@half//:half",
] + if_production_mode([
]) + if_production_mode([
"//mace/codegen:generated_tuning_params",
"//mace/utils:utils_prod",
]) + if_not_production_mode([
......
......@@ -3,7 +3,9 @@
//
#include "mace/core/allocator.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/opencl_allocator.h"
#endif
namespace mace {
......@@ -23,7 +25,9 @@ Allocator *GetDeviceAllocator(DeviceType type) {
MACE_REGISTER_ALLOCATOR(DeviceType::CPU, new CPUAllocator());
MACE_REGISTER_ALLOCATOR(DeviceType::NEON, new CPUAllocator());
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_ALLOCATOR(DeviceType::OPENCL, new OpenCLAllocator());
#endif
MACE_REGISTER_ALLOCATOR(DeviceType::HEXAGON, new CPUAllocator());
} // namespace mace
......@@ -6,10 +6,11 @@
#ifndef MACE_CORE_ALLOCATOR_H_
#define MACE_CORE_ALLOCATOR_H_
#include <malloc.h>
#include <stdlib.h>
#include <map>
#include <limits>
#include <vector>
#include <cstring>
#include "mace/core/registry.h"
#include "mace/core/types.h"
......@@ -17,7 +18,9 @@
namespace mace {
#ifdef __ANDROID__
#if defined(__hexagon__)
constexpr size_t kMaceAlignment = 128;
#elif defined(__ANDROID__)
// 16 bytes = 128 bits = 32 * 4 (Neon)
constexpr size_t kMaceAlignment = 16;
#else
......@@ -58,7 +61,7 @@ class CPUAllocator : public Allocator {
void *New(size_t nbytes) const override {
VLOG(3) << "Allocate CPU buffer: " << nbytes;
void *data = nullptr;
#ifdef __ANDROID__
#if defined(__ANDROID__) || defined(__hexagon__)
data = memalign(kMaceAlignment, nbytes);
#else
MACE_CHECK(posix_memalign(&data, kMaceAlignment, nbytes) == 0);
......
......@@ -7,6 +7,7 @@
#include <vector>
#include <algorithm>
#include <numeric>
#include <functional>
#include "mace/core/allocator.h"
......
......@@ -13,7 +13,7 @@
#include <algorithm>
#include <memory>
#include <utility>
#include <cstring>
#include "mace/utils/logging.h"
......
......@@ -11,7 +11,7 @@
namespace mace {
class CallStats;
struct CallStats;
// Wait the call to finish and get the stats if param is not nullptr
struct StatsFuture {
......
......@@ -4,17 +4,23 @@
#include <memory>
#include "mace/core/file_storage.h"
#include "mace/core/net.h"
#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/types.h"
#include "mace/public/mace.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/opencl_runtime.h"
#endif // MACE_ENABLE_OPENCL
#ifdef MACE_ENABLE_HEXAGON
#include "mace/core/runtime/hexagon/hexagon_control_wrapper.h"
#endif // MACE_ENABLE_HEXAGON
namespace mace {
// Mace Tensor
struct MaceTensor::Impl {
class MaceTensor::Impl {
public:
std::vector<int64_t> shape;
std::shared_ptr<float> data;
};
......@@ -39,8 +45,8 @@ MaceTensor::MaceTensor(const MaceTensor &other) {
MaceTensor::MaceTensor(const MaceTensor &&other) {
impl_ = std::unique_ptr<MaceTensor::Impl>(new MaceTensor::Impl());
impl_->shape = std::move(other.shape());
impl_->data = std::move(other.data());
impl_->shape = other.shape();
impl_->data = other.data();
}
MaceTensor &MaceTensor::operator=(const MaceTensor &other) {
......@@ -50,8 +56,8 @@ MaceTensor &MaceTensor::operator=(const MaceTensor &other) {
}
MaceTensor &MaceTensor::operator=(const MaceTensor &&other) {
impl_->shape = std::move(other.shape());
impl_->data = std::move(other.data());
impl_->shape = other.shape();
impl_->data = other.data();
return *this;
}
......@@ -81,7 +87,9 @@ class MaceEngine::Impl {
DeviceType device_type_;
std::unique_ptr<Workspace> ws_;
std::unique_ptr<NetBase> net_;
#ifdef MACE_ENABLE_HEXAGON
std::unique_ptr<HexagonControlWrapper> hexagon_controller_;
#endif
DISABLE_COPY_AND_ASSIGN(Impl);
};
......@@ -93,8 +101,11 @@ MaceEngine::Impl::Impl(const NetDef *net_def,
: op_registry_(new OperatorRegistry()),
device_type_(device_type),
ws_(new Workspace()),
net_(nullptr),
hexagon_controller_(nullptr) {
net_(nullptr)
#ifdef MACE_ENABLE_HEXAGON
, hexagon_controller_(nullptr)
#endif
{
LOG(INFO) << "MACE version: " << MaceVersion();
// Set storage path for internal usage
for (auto input_name : input_nodes) {
......@@ -105,6 +116,7 @@ MaceEngine::Impl::Impl(const NetDef *net_def,
ws_->CreateTensor(MakeString("mace_output_node_", output_name, ":0"),
GetDeviceAllocator(device_type_), DT_FLOAT);
}
#ifdef MACE_ENABLE_HEXAGON
if (device_type == HEXAGON) {
hexagon_controller_.reset(new HexagonControlWrapper());
MACE_CHECK(hexagon_controller_->Config(), "hexagon config error");
......@@ -120,6 +132,7 @@ MaceEngine::Impl::Impl(const NetDef *net_def,
hexagon_controller_->PrintGraph();
}
} else {
#endif
ws_->LoadModelTensor(*net_def, device_type);
// Init model
......@@ -128,11 +141,14 @@ MaceEngine::Impl::Impl(const NetDef *net_def,
if (!net->Run()) {
LOG(FATAL) << "Net init run failed";
}
net_ = std::move(CreateNet(op_registry_, *net_def, ws_.get(), device_type));
net_ = CreateNet(op_registry_, *net_def, ws_.get(), device_type);
#ifdef MACE_ENABLE_HEXAGON
}
#endif
}
MaceEngine::Impl::~Impl() {
#ifdef MACE_ENABLE_HEXAGON
if (device_type_ == HEXAGON) {
if (VLOG_IS_ON(2)) {
hexagon_controller_->GetPerfInfo();
......@@ -141,6 +157,7 @@ MaceEngine::Impl::~Impl() {
MACE_CHECK(hexagon_controller_->TeardownGraph(), "hexagon teardown error");
MACE_CHECK(hexagon_controller_->Finalize(), "hexagon finalize error");
}
#endif
}
MaceStatus MaceEngine::Impl::Run(
......@@ -167,18 +184,25 @@ MaceStatus MaceEngine::Impl::Run(
ws_->GetTensor(MakeString("mace_output_node_", output.first + ":0"));
output_tensors.push_back(output_tensor);
}
#ifdef MACE_ENABLE_HEXAGON
if (device_type_ == HEXAGON) {
MACE_CHECK(input_tensors.size() == 1 && output_tensors.size() == 1,
"HEXAGON not support multiple inputs and outputs yet.");
hexagon_controller_->ExecuteGraph(*input_tensors[0], output_tensors[0]);
} else {
#endif
if (!net_->Run(run_metadata)) {
LOG(FATAL) << "Net run failed";
}
#ifdef MACE_ENABLE_HEXAGON
}
#endif
#ifdef MACE_ENABLE_OPENCL
if (device_type_ == OPENCL) {
OpenCLRuntime::Global()->SaveBuiltCLProgram();
}
#endif
for (auto &output : *outputs) {
Tensor *output_tensor =
ws_->GetTensor(MakeString("mace_output_node_", output.first + ":0"));
......
......@@ -3,38 +3,15 @@
//
#include "mace/public/mace_runtime.h"
#include "mace/core/runtime/cpu/cpu_runtime.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/utils/logging.h"
namespace mace {
std::shared_ptr<KVStorageFactory> kStorageFactory = nullptr;
void SetGPUHints(GPUPerfHint gpu_perf_hint, GPUPriorityHint gpu_priority_hint) {
VLOG(1) << "Set GPU configurations, gpu_perf_hint: " << gpu_perf_hint
<< ", gpu_priority_hint: " << gpu_priority_hint;
OpenCLRuntime::Configure(gpu_perf_hint, gpu_priority_hint);
}
void SetKVStorageFactory(std::shared_ptr<KVStorageFactory> storage_factory) {
VLOG(1) << "Set internal KV Storage Engine";
kStorageFactory = storage_factory;
}
MaceStatus SetOpenMPThreadPolicy(int num_threads_hint,
CPUAffinityPolicy policy) {
VLOG(1) << "Set CPU openmp num_threads_hint: " << num_threads_hint
<< ", affinity policy: " << policy;
return SetOpenMPThreadsAndAffinityPolicy(num_threads_hint, policy);
}
void SetOpenMPThreadAffinity(int num_threads, const std::vector<int> &cpu_ids) {
return SetOpenMPThreadsAndAffinityCPUs(num_threads, cpu_ids);
}
MaceStatus GetBigLittleCoreIDs(std::vector<int> *big_core_ids,
std::vector<int> *little_core_ids) {
return GetCPUBigLittleCoreIDs(big_core_ids, little_core_ids);
}
}; // namespace mace
......@@ -142,12 +142,12 @@ void OperatorDef::CopyFrom(const OperatorDef &from) {
std::copy(from_output.begin(), from_output.end(), output_.begin());
auto from_arg = from.arg();
arg_.resize(from_arg.size());
for (int i = 0; i < from_arg.size(); ++i) {
for (size_t i = 0; i < from_arg.size(); ++i) {
arg_[i].CopyFrom(from_arg[i]);
}
auto from_output_shape = from.output_shape();
output_shape_.resize(from_output_shape.size());
for (int i = 0; i < from_output_shape.size(); ++i) {
for (size_t i = 0; i < from_output_shape.size(); ++i) {
output_shape_[i].CopyFrom(from_output_shape[i]);
}
auto from_data_type = from.output_type();
......@@ -164,7 +164,7 @@ void OperatorDef::CopyFrom(const OperatorDef &from) {
padding_ = from.padding();
auto from_node_input = from.node_input();
node_input_.resize(from_node_input.size());
for (int i = 0; i < from_node_input.size(); ++i) {
for (size_t i = 0; i < from_node_input.size(); ++i) {
node_input_[i].CopyFrom(from_node_input[i]);
}
auto from_out_max_byte_size = from.out_max_byte_size();
......@@ -213,7 +213,7 @@ void OperatorDef::add_out_max_byte_size(int value) {
}
const std::vector<std::string> &OperatorDef::input() const { return input_; }
const std::string &OperatorDef::input(int index) const {
MACE_CHECK(0 <= index && index <= input_.size());
MACE_CHECK(0 <= index && index <= static_cast<int>(input_.size()));
return input_[index];
}
std::string *OperatorDef::add_input() {
......@@ -230,7 +230,7 @@ void OperatorDef::set_input(const std::vector<std::string> &value) {
}
const std::vector<std::string> &OperatorDef::output() const { return output_; }
const std::string &OperatorDef::output(int index) const {
MACE_CHECK(0 <= index && index <= output_.size());
MACE_CHECK(0 <= index && index <= static_cast<int>(output_.size()));
return output_[index];
}
std::string *OperatorDef::add_output() {
......
......@@ -15,7 +15,7 @@ NetBase::NetBase(const std::shared_ptr<const OperatorRegistry> op_registry,
const std::shared_ptr<const NetDef> net_def,
Workspace *ws,
DeviceType type)
: op_registry_(op_registry), name_(net_def->name()) {}
: name_(net_def->name()), op_registry_(op_registry) {}
SerialNet::SerialNet(const std::shared_ptr<const OperatorRegistry> op_registry,
const std::shared_ptr<const NetDef> net_def,
......
......@@ -6,12 +6,15 @@
#include <omp.h>
#include <unistd.h>
#include <sys/types.h>
#include <algorithm>
#include <utility>
#include <vector>
#include "mace/public/mace.h"
#include "mace/public/mace_runtime.h"
#include "mace/utils/logging.h"
namespace mace {
namespace {
......@@ -70,8 +73,11 @@ void SortCPUIdsByMaxFreqAsc(std::vector<int> *cpu_ids, int *big_core_offset) {
}
void SetThreadAffinity(cpu_set_t mask) {
int sys_call_res;
#if defined(__ANDROID__)
pid_t pid = gettid();
#else
pid_t pid = pthread_self();
#endif
int err = sched_setaffinity(pid, sizeof(mask), &mask);
MACE_CHECK(err == 0, "set affinity error: ", errno);
}
......@@ -163,12 +169,28 @@ MaceStatus SetOpenMPThreadsAndAffinityPolicy(int omp_num_threads_hint,
}
if (omp_num_threads_hint <= 0 ||
omp_num_threads_hint > use_cpu_ids.size()) {
omp_num_threads_hint > static_cast<int>(use_cpu_ids.size())) {
omp_num_threads_hint = use_cpu_ids.size();
}
SetOpenMPThreadsAndAffinityCPUs(omp_num_threads_hint, use_cpu_ids);
return MACE_SUCCESS;
}
MaceStatus SetOpenMPThreadPolicy(int num_threads_hint,
CPUAffinityPolicy policy) {
VLOG(1) << "Set CPU openmp num_threads_hint: " << num_threads_hint
<< ", affinity policy: " << policy;
return SetOpenMPThreadsAndAffinityPolicy(num_threads_hint, policy);
}
void SetOpenMPThreadAffinity(int num_threads, const std::vector<int> &cpu_ids) {
return SetOpenMPThreadsAndAffinityCPUs(num_threads, cpu_ids);
}
MaceStatus GetBigLittleCoreIDs(std::vector<int> *big_core_ids,
std::vector<int> *little_core_ids) {
return GetCPUBigLittleCoreIDs(big_core_ids, little_core_ids);
}
} // namespace mace
......@@ -12,6 +12,7 @@
#include <vector>
#include <utility>
#include "mace/public/mace_runtime.h"
#include "mace/core/file_storage.h"
#include "mace/core/runtime/opencl/opencl_extension.h"
#include "mace/public/mace.h"
......@@ -25,6 +26,12 @@ extern const std::string kCompiledProgramPlatform;
extern const std::map<std::string, std::vector<unsigned char>>
kEncryptedProgramMap;
void SetGPUHints(GPUPerfHint gpu_perf_hint, GPUPriorityHint gpu_priority_hint) {
VLOG(1) << "Set GPU configurations, gpu_perf_hint: " << gpu_perf_hint
<< ", gpu_priority_hint: " << gpu_priority_hint;
OpenCLRuntime::Configure(gpu_perf_hint, gpu_priority_hint);
}
const std::string OpenCLErrorToString(cl_int error) {
switch (error) {
case CL_SUCCESS:
......
......@@ -11,8 +11,10 @@
#include "mace/core/buffer.h"
#include "mace/core/preallocated_pooled_allocator.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/types.h"
#ifdef MACE_ENABLE_OPENCL
#include "mace/core/runtime/opencl/cl2_header.h"
#endif
#include "mace/public/mace.h"
#include "mace/utils/logging.h"
......@@ -26,6 +28,7 @@ namespace mace {
break; \
}
#ifdef MACE_ENABLE_OPENCL
#define CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, INVALID, DEFAULT) \
switch (TYPE_ENUM) { \
CASE(half, SINGLE_ARG(STMTS)) \
......@@ -46,6 +49,27 @@ namespace mace {
DEFAULT; \
break; \
}
#else
#define CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, INVALID, DEFAULT) \
switch (TYPE_ENUM) { \
CASE(float, SINGLE_ARG(STMTS)) \
CASE(double, SINGLE_ARG(STMTS)) \
CASE(int32_t, SINGLE_ARG(STMTS)) \
CASE(uint8_t, SINGLE_ARG(STMTS)) \
CASE(uint16_t, SINGLE_ARG(STMTS)) \
CASE(int16_t, SINGLE_ARG(STMTS)) \
CASE(int8_t, SINGLE_ARG(STMTS)) \
CASE(std::string, SINGLE_ARG(STMTS)) \
CASE(int64_t, SINGLE_ARG(STMTS)) \
CASE(bool, SINGLE_ARG(STMTS)) \
case DT_INVALID: \
INVALID; \
break; \
default: \
DEFAULT; \
break; \
}
#endif
#define CASES(TYPE_ENUM, STMTS) \
CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, LOG(FATAL) << "Type not set"; \
......@@ -127,6 +151,7 @@ class Tensor {
return buffer_ != nullptr && !buffer_->OnHost() && !has_opencl_image();
}
#ifdef MACE_ENABLE_OPENCL
inline cl::Image *opencl_image() const {
MACE_CHECK(has_opencl_image(), "do not have image");
return static_cast<cl::Image *>(buffer_->buffer());
......@@ -136,6 +161,7 @@ class Tensor {
MACE_CHECK(has_opencl_buffer(), "do not have opencl buffer");
return static_cast<cl::Buffer *>(buffer_->buffer());
}
#endif
inline index_t buffer_offset() const { return buffer_->offset(); }
......
......@@ -30,11 +30,18 @@ bool DataTypeCanUseMemcpy(DataType dt) {
std::string DataTypeToString(const DataType dt) {
static std::map<DataType, std::string> dtype_string_map = {
{DT_FLOAT, "DT_FLOAT"}, {DT_HALF, "DT_HALF"},
{DT_DOUBLE, "DT_DOUBLE"}, {DT_UINT8, "DT_UINT8"},
{DT_INT8, "DT_INT8"}, {DT_INT32, "DT_INT32"},
{DT_UINT32, "DT_UINT32"}, {DT_UINT16, "DT_UINT16"},
{DT_INT64, "DT_INT64"}, {DT_BOOL, "DT_BOOL"},
{DT_FLOAT, "DT_FLOAT"},
#ifdef MACE_ENABLE_OPENCL
{DT_HALF, "DT_HALF"},
#endif
{DT_DOUBLE, "DT_DOUBLE"},
{DT_UINT8, "DT_UINT8"},
{DT_INT8, "DT_INT8"},
{DT_INT32, "DT_INT32"},
{DT_UINT32, "DT_UINT32"},
{DT_UINT16, "DT_UINT16"},
{DT_INT64, "DT_INT64"},
{DT_BOOL, "DT_BOOL"},
{DT_STRING, "DT_STRING"}};
MACE_CHECK(dt != DT_INVALID) << "Not support Invalid data type";
return dtype_string_map[dt];
......@@ -44,8 +51,10 @@ size_t GetEnumTypeSize(const DataType dt) {
switch (dt) {
case DT_FLOAT:
return sizeof(float);
#ifdef MACE_ENABLE_OPENCL
case DT_HALF:
return sizeof(half);
#endif
case DT_UINT8:
return sizeof(uint8_t);
case DT_INT8:
......
......@@ -9,13 +9,17 @@
#include <string>
#include "mace/public/mace_types.h"
#ifdef MACE_ENABLE_OPENCL
#include "include/half.hpp"
#endif
namespace mace {
typedef int64_t index_t;
#ifdef MACE_ENABLE_OPENCL
using half = half_float::half;
#endif
bool DataTypeCanUseMemcpy(DataType dt);
......@@ -52,7 +56,9 @@ struct EnumToDataType {}; // Specializations below
typedef TYPE Type; \
}
#ifdef MACE_ENABLE_OPENCL
MATCH_TYPE_AND_ENUM(half, DT_HALF);
#endif
MATCH_TYPE_AND_ENUM(float, DT_FLOAT);
MATCH_TYPE_AND_ENUM(double, DT_DOUBLE);
MATCH_TYPE_AND_ENUM(int32_t, DT_INT32);
......
......@@ -22,8 +22,7 @@ Tensor *Workspace::CreateTensor(const std::string &name,
VLOG(3) << "Tensor " << name << " already exists. Skipping.";
} else {
VLOG(3) << "Creating Tensor " << name;
tensor_map_[name] =
std::move(std::unique_ptr<Tensor>(new Tensor(alloc, type)));
tensor_map_[name] = std::unique_ptr<Tensor>(new Tensor(alloc, type));
tensor_map_[name]->SetSourceOpName(name);
}
return GetTensor(name);
......@@ -72,12 +71,12 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
}
VLOG(3) << "Model data size: " << model_data_size;
if (type == DeviceType::CPU) {
tensor_buffer_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(type), model_data_ptr, model_data_size)));
if (type == DeviceType::CPU || type == DeviceType::NEON) {
tensor_buffer_ = std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(type), model_data_ptr, model_data_size));
} else {
tensor_buffer_ = std::move(std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(type), model_data_size)));
tensor_buffer_ = std::unique_ptr<Buffer>(
new Buffer(GetDeviceAllocator(type), model_data_size));
tensor_buffer_->Map(nullptr);
tensor_buffer_->Copy(model_data_ptr, 0, model_data_size);
tensor_buffer_->UnMap();
......
......@@ -7,7 +7,7 @@ package(
licenses(["notice"]) # Apache 2.0
load("//mace:mace.bzl", "if_android", "if_neon_enabled", "if_openmp_enabled", "if_android_armv7")
load("//mace:mace.bzl", "if_android", "if_neon_enabled", "if_openmp_enabled", "if_android_armv7", "if_hexagon_enabled")
cc_library(
name = "kernels",
......@@ -28,7 +28,9 @@ cc_library(
"opencl/*.h",
"arm/*.h",
]),
copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]),
copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([
"-DMACE_ENABLE_OPENCL",
]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]),
linkopts = if_android(["-lm"]),
deps = [
"//mace/core",
......@@ -46,7 +48,9 @@ cc_test(
"opencl/*_test.cc",
],
),
copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]),
copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([
"-DMACE_ENABLE_OPENCL",
]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]),
linkopts = ["-fopenmp"],
linkstatic = 1,
deps = [
......@@ -55,15 +59,3 @@ cc_test(
"@gtest//:gtest_main",
],
)
cc_test(
name = "benchmark",
testonly = 1,
srcs = glob(["benchmark/*.cc"]),
linkstatic = 1,
deps = [
":kernels",
"//mace/core",
"//mace/core:test_benchmark_main",
],
)
......@@ -7,7 +7,7 @@ package(
licenses(["notice"]) # Apache 2.0
load("//mace:mace.bzl", "if_android", "if_neon_enabled", "if_openmp_enabled", "if_android_armv7")
load("//mace:mace.bzl", "if_android", "if_neon_enabled", "if_openmp_enabled", "if_android_armv7", "if_hexagon_enabled")
cc_library(
name = "test",
......@@ -34,7 +34,9 @@ cc_library(
["*.h"],
exclude = ["ops_test_util.h"],
),
copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]),
copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([
"-DMACE_ENABLE_OPENCL",
]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]),
deps = [
"//mace/kernels",
],
......@@ -47,6 +49,9 @@ cc_test(
srcs = glob(
["*_test.cc"],
),
copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([
"-DMACE_ENABLE_OPENCL",
]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]),
linkopts = ["-fopenmp"],
linkstatic = 1,
deps = [
......@@ -60,6 +65,9 @@ cc_test(
name = "ops_benchmark",
testonly = 1,
srcs = glob(["*_benchmark.cc"]),
copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled(["-DMACE_ENABLE_NEON"]) + if_android_armv7(["-mfpu=neon -mfloat-abi=softfp"]) + if_android([
"-DMACE_ENABLE_OPENCL",
]) + if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]),
linkopts = ["-fopenmp"],
linkstatic = 1,
deps = [
......
......@@ -342,7 +342,8 @@ struct OperatorStats {
CallStats stats;
};
struct RunMetadata {
class RunMetadata {
public:
std::vector<OperatorStats> op_stats;
};
......
......@@ -6,15 +6,22 @@
#define MACE_UTILS_ENV_TIME_H_
#include <stdint.h>
#ifdef __hexagon__
#include <HAP_perf.h>
#else
#include <sys/time.h>
#include <time.h>
#endif
namespace mace {
inline int64_t NowMicros() {
#ifdef __hexagon__
return HAP_perf_get_time_us();
#else
struct timeval tv;
gettimeofday(&tv, nullptr);
return static_cast<int64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
#endif
}
} // namespace mace
......
......@@ -5,13 +5,16 @@
#ifndef MACE_UTILS_MEMORY_LOGGING_H_
#define MACE_UTILS_MEMORY_LOGGING_H_
#ifndef __hexagon__
#include <malloc.h>
#endif
#include <string>
#include "mace/utils/logging.h"
namespace mace {
#ifdef MACE_ENABLE_MEMORY_LOGGING
class MallinfoChangeLogger {
public:
explicit MallinfoChangeLogger(const std::string &name) : name_(name) {
......@@ -87,7 +90,6 @@ class MallinfoChangeLogger {
}
};
#ifdef MACE_ENABLE_MEMORY_LOGGING
#define MACE_MEMORY_LOGGING_GUARD() \
MallinfoChangeLogger mem_logger_##__line__(std::string(__FILE__) + ":" + \
std::string(__func__));
......
......@@ -5,7 +5,6 @@
#ifndef MACE_UTILS_UTILS_H_
#define MACE_UTILS_UTILS_H_
#include <sys/time.h>
#include <sstream>
#include <string>
#include <utility>
......@@ -50,7 +49,7 @@ inline std::string ObfuscateString(const std::string &src,
for (size_t i = 0; i < src.size(); i++) {
dest[i] = src[i] ^ lookup_table[i % lookup_table.size()];
}
return std::move(dest);
return dest;
}
// ObfuscateString(ObfuscateString(str)) ==> str
......@@ -86,7 +85,7 @@ inline std::string ObfuscateSymbol(const std::string &src) {
// There is no collision if it's true for every char at every position
dest[i] = encode_dict[(idx + i + 31) % encode_dict.size()];
}
return std::move(dest);
return dest;
}
#ifdef MACE_OBFUSCATE_LITERALS
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册