diff --git a/mace/core/BUILD b/mace/core/BUILD index e1334527adad0d2109b7a98e471eda987fd6ed9a..252982f005128b1670224c60d327331d819e46ff 100644 --- a/mace/core/BUILD +++ b/mace/core/BUILD @@ -10,38 +10,27 @@ licenses(["notice"]) # Apache 2.0 load("//mace:mace.bzl", "if_android", "if_profiling_enabled", "if_embed_binary_program") cc_library( - name = "opencl_runtime", + name = "core", srcs = glob([ + "*.cc", "runtime/opencl/*.cc", ]), hdrs = glob([ + "*.h", + "public/*.h", "runtime/opencl/cl2.hpp", "runtime/opencl/*.h", ]), copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"] + if_profiling_enabled(["-DMACE_OPENCL_PROFILING"]) + if_embed_binary_program(["-DMACE_EMBED_BINARY_PROGRAM"]), - linkopts = ["-ldl"], + linkopts = if_android(["-pie", "-ldl"]), deps = [ - ":core", + "//mace/utils:utils_hdrs", "//mace/utils:logging", "//mace/utils:tuner", "@opencl_headers//:opencl20_headers", ], - alwayslink = 1, -) - - -cc_library( - name = "core", - srcs = glob(["*.cc"]), - hdrs = glob(["*.h", "public/*.h"]), - copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], - linkopts = if_android(["-pie"]), - deps = [ - "//mace/utils:utils_hdrs", - "//mace/utils:logging", - ], ) cc_library( diff --git a/mace/core/allocator.cc b/mace/core/allocator.cc index d05c45b352e37e2e7c67226aee28441a15c665b8..fcf3ef6af23510ba9df048ddd7e3a4765c2992af 100644 --- a/mace/core/allocator.cc +++ b/mace/core/allocator.cc @@ -3,6 +3,7 @@ // #include "mace/core/allocator.h" +#include "mace/core/runtime/opencl/opencl_allocator.h" namespace mace { @@ -22,5 +23,6 @@ Allocator *GetDeviceAllocator(DeviceType type) { MACE_REGISTER_ALLOCATOR(DeviceType::CPU, new CPUAllocator()); MACE_REGISTER_ALLOCATOR(DeviceType::NEON, new CPUAllocator()); +MACE_REGISTER_ALLOCATOR(DeviceType::OPENCL, new OpenCLAllocator()); } // namespace mace diff --git a/mace/core/mace.cc b/mace/core/mace.cc index 9d0d0c76a2db2525906685274e8636817f10fd2c..c25deea3d2a88ebded71181b8f894882cc3de1e3 100644 --- a/mace/core/mace.cc +++ b/mace/core/mace.cc @@ -5,6 +5,7 @@ #include "mace/core/public/mace.h" #include "mace/core/types.h" #include "mace/core/net.h" +#include "mace/core/operator.h" #include "mace/core/workspace.h" #include "mace/utils/logging.h" @@ -481,17 +482,19 @@ const OperatorDef &NetDef::op(const int idx) const { // Mace Engine MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type): - device_type_(device_type), ws_(new Workspace()), net_(nullptr) { + op_registry_(new OperatorRegistry()), device_type_(device_type), + ws_(new Workspace()), net_(nullptr) { ws_->LoadModelTensor(*net_def, device_type); // Init model - auto net = CreateNet(*net_def, ws_.get(), device_type, NetMode::INIT); + auto net = CreateNet(op_registry_, *net_def, ws_.get(), + device_type, NetMode::INIT); if(!net->Run()) { LOG(FATAL) << "Net init run failed"; } ws_->CreateTensor("mace_input_node:0", GetDeviceAllocator(device_type_), DT_FLOAT); - net_ = std::move(CreateNet(*net_def, ws_.get(), device_type)); + net_ = std::move(CreateNet(op_registry_, *net_def, ws_.get(), device_type)); } MaceEngine::~MaceEngine() = default; bool MaceEngine::Run(const float *input, diff --git a/mace/core/net.cc b/mace/core/net.cc index 3a65de85a2715297c01f6701469f888852cada9c..515d0e98686acaf4c1d795aeee62a7ac9801572f 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -3,22 +3,24 @@ // #include "mace/core/net.h" -#include "mace/core/operator.h" #include "mace/core/workspace.h" #include "mace/utils/utils.h" namespace mace { -NetBase::NetBase(const std::shared_ptr &net_def, +NetBase::NetBase(const std::shared_ptr op_registry, + const std::shared_ptr net_def, Workspace *ws, DeviceType type) - : name_(net_def->name()) {} + : op_registry_(op_registry), name_(net_def->name()) {} -SimpleNet::SimpleNet(const std::shared_ptr &net_def, +SimpleNet::SimpleNet(const std::shared_ptr op_registry, + const std::shared_ptr net_def, Workspace *ws, DeviceType type, const NetMode mode) - : NetBase(net_def, ws, type), device_type_(type){ + : NetBase(op_registry, net_def, ws, type), + device_type_(type) { VLOG(1) << "Constructing SimpleNet " << net_def->name(); for (int idx = 0; idx < net_def->op_size(); ++idx) { const auto &operator_def = net_def->op(idx); @@ -26,7 +28,7 @@ SimpleNet::SimpleNet(const std::shared_ptr &net_def, << operator_def.type(); std::unique_ptr op{nullptr}; OperatorDef temp_def(operator_def); - op = CreateOperator(temp_def, ws, type, mode); + op = op_registry->CreateOperator(temp_def, ws, type, mode); if (op) { operators_.emplace_back(std::move(op)); } @@ -62,9 +64,8 @@ bool SimpleNet::Run(RunMetadata *run_metadata) { } if (run_metadata != nullptr) { - OperatorStats op_stats = { op->debug_def().name(), - op->debug_def().type(), - call_stats }; + OperatorStats op_stats = {op->debug_def().name(), op->debug_def().type(), + call_stats}; run_metadata->op_stats.emplace_back(op_stats); } @@ -80,19 +81,23 @@ bool SimpleNet::Run(RunMetadata *run_metadata) { return true; } -unique_ptr CreateNet(const NetDef &net_def, - Workspace *ws, - DeviceType type, - const NetMode mode) { +std::unique_ptr CreateNet( + const std::shared_ptr op_registry, + const NetDef &net_def, + Workspace *ws, + DeviceType type, + const NetMode mode) { std::shared_ptr tmp_net_def(new NetDef(net_def)); - return CreateNet(tmp_net_def, ws, type, mode); + return CreateNet(op_registry, tmp_net_def, ws, type, mode); } -unique_ptr CreateNet(const std::shared_ptr &net_def, - Workspace *ws, - DeviceType type, - const NetMode mode) { - unique_ptr net(new SimpleNet(net_def, ws, type, mode)); +std::unique_ptr CreateNet( + const std::shared_ptr op_registry, + const std::shared_ptr net_def, + Workspace *ws, + DeviceType type, + const NetMode mode) { + unique_ptr net(new SimpleNet(op_registry, net_def, ws, type, mode)); return net; } diff --git a/mace/core/net.h b/mace/core/net.h index 8619bcb81e8215108271b91175a75830761782aa..cb8116a9d74b7cc1016fccb29dd1366667c3bdc1 100644 --- a/mace/core/net.h +++ b/mace/core/net.h @@ -6,6 +6,7 @@ #define MACE_CORE_NET_H_ #include "mace/core/common.h" +#include "mace/core/operator.h" #include "mace/core/public/mace.h" namespace mace { @@ -16,7 +17,8 @@ class Workspace; class NetBase { public: - NetBase(const std::shared_ptr &net_def, + NetBase(const std::shared_ptr op_registry, + const std::shared_ptr net_def, Workspace *ws, DeviceType type); virtual ~NetBase() noexcept {} @@ -27,13 +29,15 @@ class NetBase { protected: string name_; + const std::shared_ptr op_registry_; DISABLE_COPY_AND_ASSIGN(NetBase); }; class SimpleNet : public NetBase { public: - SimpleNet(const std::shared_ptr &net_def, + SimpleNet(const std::shared_ptr op_registry, + const std::shared_ptr net_def, Workspace *ws, DeviceType type, const NetMode mode = NetMode::NORMAL); @@ -47,14 +51,18 @@ class SimpleNet : public NetBase { DISABLE_COPY_AND_ASSIGN(SimpleNet); }; -unique_ptr CreateNet(const NetDef &net_def, - Workspace *ws, - DeviceType type, - const NetMode mode = NetMode::NORMAL); -unique_ptr CreateNet(const std::shared_ptr &net_def, - Workspace *ws, - DeviceType type, - const NetMode mode = NetMode::NORMAL); +std::unique_ptr CreateNet( + const std::shared_ptr op_registry, + const NetDef &net_def, + Workspace *ws, + DeviceType type, + const NetMode mode = NetMode::NORMAL); +std::unique_ptr CreateNet( + const std::shared_ptr op_registry, + const std::shared_ptr net_def, + Workspace *ws, + DeviceType type, + const NetMode mode = NetMode::NORMAL); } // namespace mace diff --git a/mace/core/operator.cc b/mace/core/operator.cc index 2026105439b96b27bc90bb2b867f7914a66e31dc..ae1770b135d54a20081af51577313c3a8eef3ff4 100644 --- a/mace/core/operator.cc +++ b/mace/core/operator.cc @@ -2,12 +2,19 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // +#include + #include "mace/core/operator.h" namespace mace { +OperatorBase::OperatorBase(const OperatorDef &operator_def, Workspace *ws) + : operator_ws_(ws), + operator_def_(std::make_shared(operator_def)) {} -OpKeyBuilder::OpKeyBuilder(const char *op_name): op_name_(op_name) {} +OpKeyBuilder::OpKeyBuilder(const char *op_name) : op_name_(op_name) {} + +OpKeyBuilder &OpKeyBuilder::Device(DeviceType device) { device_type_ = device; } OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name, const DataType allowed) { @@ -17,61 +24,72 @@ OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name, const std::string OpKeyBuilder::Build() { static const std::vector type_order = {"T"}; - std::string key = op_name_; + std::stringstream ss; + ss << op_name_; + ss << device_type_; for (auto type : type_order) { - key += type + "_" + DataTypeToString(type_constraint_[type]); + ss << type << "_" << DataTypeToString(type_constraint_[type]); } - return key; -} -std::map *gDeviceTypeRegistry() { - static std::map g_device_type_registry; - return &g_device_type_registry; + return ss.str(); } -MACE_DEFINE_REGISTRY(CPUOperatorRegistry, - OperatorBase, - const OperatorDef &, - Workspace *); -MACE_REGISTER_DEVICE_TYPE(DeviceType::CPU, CPUOperatorRegistry); - -MACE_DEFINE_REGISTRY(NEONOperatorRegistry, - OperatorBase, - const OperatorDef &, - Workspace *); -MACE_REGISTER_DEVICE_TYPE(DeviceType::NEON, NEONOperatorRegistry); - -MACE_DEFINE_REGISTRY(OPENCLOperatorRegistry, - OperatorBase, - const OperatorDef &, - Workspace *); -MACE_REGISTER_DEVICE_TYPE(DeviceType::OPENCL, OPENCLOperatorRegistry); - -unique_ptr CreateOperator(const OperatorDef &operator_def, - Workspace *ws, - DeviceType type, - const NetMode mode) { - OperatorRegistry *registry = gDeviceTypeRegistry()->at(type); - const int dtype = ArgumentHelper::GetSingleArgument(operator_def, - "T", - static_cast(DT_FLOAT)); - const int op_mode_i= ArgumentHelper::GetSingleArgument(operator_def, - "mode", - static_cast(NetMode::NORMAL)); +std::unique_ptr OperatorRegistry::CreateOperator( + const OperatorDef &operator_def, + Workspace *ws, + DeviceType type, + const NetMode mode) const { + const int dtype = ArgumentHelper::GetSingleArgument( + operator_def, "T", static_cast(DT_FLOAT)); + const int op_mode_i = ArgumentHelper::GetSingleArgument( + operator_def, "mode", static_cast(NetMode::NORMAL)); const NetMode op_mode = static_cast(op_mode_i); if (op_mode == mode) { - return registry->Create(OpKeyBuilder(operator_def.type().data()) - .TypeConstraint("T", static_cast(dtype)) - .Build(), - operator_def, - ws); + return registry_.Create( + OpKeyBuilder(operator_def.type().data()) + .Device(type) + .TypeConstraint("T", static_cast(dtype)) + .Build(), + operator_def, ws); } else { return nullptr; } } -OperatorBase::OperatorBase(const OperatorDef &operator_def, Workspace *ws) - : operator_ws_(ws), - operator_def_(std::make_shared(operator_def)) {} +extern void Register_AddN(OperatorRegistry *op_registry); +extern void Register_BatchNorm(OperatorRegistry *op_registry); +extern void Register_BatchToSpaceND(OperatorRegistry *op_registry); +extern void Register_BiasAdd(OperatorRegistry *op_registry); +extern void Register_BufferToImage(OperatorRegistry *op_registry); +extern void Register_ChannelShuffle(OperatorRegistry *op_registry); +extern void Register_Concat(OperatorRegistry *op_registry); +extern void Register_Conv2D(OperatorRegistry *op_registry); +extern void Register_DepthwiseConv2d(OperatorRegistry *op_registry); +extern void Register_FusedConv2D(OperatorRegistry *op_registry); +extern void Register_GlobalAvgPooling(OperatorRegistry *op_registry); +extern void Register_ImageToBuffer(OperatorRegistry *op_registry); +extern void Register_Pooling(OperatorRegistry *op_registry); +extern void Register_Relu(OperatorRegistry *op_registry); +extern void Register_ResizeBilinear(OperatorRegistry *op_registry); +extern void Register_SpaceToBatchND(OperatorRegistry *op_registry); + +OperatorRegistry::OperatorRegistry() { + Register_AddN(this); + Register_BatchNorm(this); + Register_BatchToSpaceND(this); + Register_BiasAdd(this); + Register_BufferToImage(this); + Register_ChannelShuffle(this); + Register_Concat(this); + Register_Conv2D(this); + Register_DepthwiseConv2d(this); + Register_FusedConv2D(this); + Register_GlobalAvgPooling(this); + Register_ImageToBuffer(this); + Register_Pooling(this); + Register_Relu(this); + Register_ResizeBilinear(this); + Register_SpaceToBatchND(this); +} } // namespace mace diff --git a/mace/core/operator.h b/mace/core/operator.h index 66e4701e8024c00e6b103734b01127ec9c8cd2e3..d673ca81b3a992a7ecaba38744d2c79fe9cf80cb 100644 --- a/mace/core/operator.h +++ b/mace/core/operator.h @@ -5,13 +5,13 @@ #ifndef MACE_CORE_OPERATOR_H #define MACE_CORE_OPERATOR_H -#include "mace/core/common.h" #include "mace/core/arg_helper.h" +#include "mace/core/common.h" #include "mace/core/future.h" +#include "mace/core/public/mace.h" #include "mace/core/registry.h" #include "mace/core/tensor.h" #include "mace/core/workspace.h" -#include "mace/core/public/mace.h" namespace mace { @@ -102,7 +102,7 @@ class Operator : public OperatorBase { } } } - virtual bool Run(StatsFuture *future) override = 0; + virtual bool Run(StatsFuture *future) override = 0; ~Operator() noexcept override {} }; @@ -122,29 +122,12 @@ class Operator : public OperatorBase { #define OP_OUTPUT_TAGS(first_input, ...) \ enum _OutputTags { first_input = 0, __VA_ARGS__ } -typedef Registry - OperatorRegistry; -typedef Registry *( - *RegistryFunction)(); -std::map *gDeviceTypeRegistry(); - -struct DeviceTypeRegisterer { - explicit DeviceTypeRegisterer(int32_t type, RegistryFunction func) { - if (gDeviceTypeRegistry()->count(type)) { - LOG(ERROR) << "Device type " << type - << "registered twice. This should not happen. Did you have " - "duplicated numbers assigned to different devices?"; - std::exit(1); - } - // Calling the registry function to get the actual registry pointer. - gDeviceTypeRegistry()->emplace(type, func()); - } -}; - class OpKeyBuilder { public: explicit OpKeyBuilder(const char *op_name); + OpKeyBuilder &Device(DeviceType device); + OpKeyBuilder &TypeConstraint(const char *attr_name, const DataType allowed); template @@ -154,6 +137,7 @@ class OpKeyBuilder { private: std::string op_name_; + DeviceType device_type_; std::map type_constraint_; }; @@ -162,48 +146,30 @@ OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name) { return this->TypeConstraint(attr_name, DataTypeToEnum::value); } +class OperatorRegistry { + public: + typedef Registry + RegistryType; + OperatorRegistry(); + ~OperatorRegistry() = default; + RegistryType *registry() { return ®istry_; }; + std::unique_ptr CreateOperator(const OperatorDef &operator_def, + Workspace *ws, + DeviceType type, + const NetMode mode) const; + private: + RegistryType registry_; + DISABLE_COPY_AND_ASSIGN(OperatorRegistry); +}; -#define MACE_REGISTER_DEVICE_TYPE(type, registry_function) \ - namespace { \ - static DeviceTypeRegisterer MACE_ANONYMOUS_VARIABLE(DeviceType)( \ - type, ®istry_function); \ - } - -MACE_DECLARE_REGISTRY(CPUOperatorRegistry, - OperatorBase, - const OperatorDef &, - Workspace *); - -#define REGISTER_CPU_OPERATOR_CREATOR(key, ...) \ - MACE_REGISTER_CREATOR(CPUOperatorRegistry, key, __VA_ARGS__) -#define REGISTER_CPU_OPERATOR(name, ...) \ - MACE_REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__) - -MACE_DECLARE_REGISTRY(NEONOperatorRegistry, - OperatorBase, - const OperatorDef &, - Workspace *); - -#define REGISTER_NEON_OPERATOR_CREATOR(key, ...) \ - MACE_REGISTER_CREATOR(NEONOperatorRegistry, key, __VA_ARGS__) -#define REGISTER_NEON_OPERATOR(name, ...) \ - MACE_REGISTER_CLASS(NEONOperatorRegistry, name, __VA_ARGS__) - -MACE_DECLARE_REGISTRY(OPENCLOperatorRegistry, +MACE_DECLARE_REGISTRY(OpRegistry, OperatorBase, const OperatorDef &, Workspace *); -#define REGISTER_OPENCL_OPERATOR_CREATOR(key, ...) \ - MACE_REGISTER_CREATOR(OPENCLOperatorRegistry, key, __VA_ARGS__) -#define REGISTER_OPENCL_OPERATOR(name, ...) \ - MACE_REGISTER_CLASS(OPENCLOperatorRegistry, name, __VA_ARGS__) - -unique_ptr CreateOperator(const OperatorDef &operator_def, - Workspace *ws, - DeviceType type, - const NetMode mode); +#define REGISTER_OPERATOR(op_registry, name, ...) \ + MACE_REGISTER_CLASS(OpRegistry, op_registry->registry(), name, __VA_ARGS__) } // namespace mace diff --git a/mace/core/public/mace.h b/mace/core/public/mace.h index 1b0d3ceb5cfae0ae6414d5f4686732e200088b7c..a363610f6f93d540d3235bcff332eeb92937fd39 100644 --- a/mace/core/public/mace.h +++ b/mace/core/public/mace.h @@ -302,10 +302,12 @@ class NetDef { class Workspace; class NetBase; +class OperatorRegistry; class MaceEngine { public: - explicit MaceEngine(const NetDef *net_def, DeviceType device_type); + explicit MaceEngine(const NetDef *net_def, + DeviceType device_type); ~MaceEngine(); bool Run(const float *input, const std::vector &input_shape, @@ -314,6 +316,7 @@ class MaceEngine { MaceEngine &operator=(const MaceEngine&) = delete; private: + std::shared_ptr op_registry_; DeviceType device_type_; std::unique_ptr ws_; std::unique_ptr net_; diff --git a/mace/core/registry.h b/mace/core/registry.h index c92ebb123f03c8410129aa7ade5057e4eabe5195..5c82ef2e4b60ce3645e6c5708a2b7442f9e8a85e 100644 --- a/mace/core/registry.h +++ b/mace/core/registry.h @@ -17,24 +17,27 @@ class Registry { Registry() : registry_() {} void Register(const SrcType &key, Creator creator) { + VLOG(2) << "Registering: " << key; std::lock_guard lock(register_mutex_); MACE_CHECK(registry_.count(key) == 0, "Key already registered."); registry_[key] = creator; } - inline bool Has(const SrcType &key) { return registry_.count(key) != 0; } + inline bool Has(const SrcType &key) const { + return registry_.count(key) != 0; + } - unique_ptr Create(const SrcType &key, Args... args) { + unique_ptr Create(const SrcType &key, Args... args) const { if (registry_.count(key) == 0) { LOG(FATAL) << "Key not registered: " << key; } - return registry_[key](args...); + return registry_.at(key)(args...); } /** * Returns the keys currently registered as a vector. */ - vector Keys() { + vector Keys() const { vector keys; for (const auto &it : registry_) { keys.push_back(it.first); @@ -77,39 +80,31 @@ class Registerer { typedef Registerer \ Registerer##RegistryName; +/* #define MACE_DEFINE_TYPED_REGISTRY(RegistryName, SrcType, ObjectType, ...) \ Registry *RegistryName() { \ static Registry *registry = \ new Registry(); \ return registry; \ } +*/ #define MACE_DECLARE_REGISTRY(RegistryName, ObjectType, ...) \ MACE_DECLARE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \ ##__VA_ARGS__) +/* #define MACE_DEFINE_REGISTRY(RegistryName, ObjectType, ...) \ MACE_DEFINE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \ ##__VA_ARGS__) +*/ -#define MACE_REGISTER_TYPED_CREATOR(RegistryName, key, ...) \ - namespace { \ - static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \ - key, RegistryName(), __VA_ARGS__); - -#define MACE_REGISTER_TYPED_CLASS(RegistryName, key, ...) \ - namespace { \ - static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \ - key, \ - RegistryName(), \ - Registerer##RegistryName::DefaultCreator<__VA_ARGS__>); \ - } - -#define MACE_REGISTER_CREATOR(RegistryName, key, ...) \ - MACE_REGISTER_TYPED_CREATOR(RegistryName, key, __VA_ARGS__) +#define MACE_REGISTER_TYPED_CLASS(RegistryName, registry, key, ...) \ + Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(l_##RegistryName)( \ + key, registry, Registerer##RegistryName::DefaultCreator<__VA_ARGS__>); -#define MACE_REGISTER_CLASS(RegistryName, key, ...) \ - MACE_REGISTER_TYPED_CLASS(RegistryName, key, __VA_ARGS__) +#define MACE_REGISTER_CLASS(RegistryName, registry, key, ...) \ + MACE_REGISTER_TYPED_CLASS(RegistryName, registry, key, __VA_ARGS__) } // namespace mace diff --git a/mace/core/runtime/opencl/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc index 4e14636e00d49b0ac023f1e319818258f2d23000..929b48186203e51bd4a778208a2f83176fa0ae96 100644 --- a/mace/core/runtime/opencl/opencl_allocator.cc +++ b/mace/core/runtime/opencl/opencl_allocator.cc @@ -127,6 +127,4 @@ void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) { bool OpenCLAllocator::OnHost() { return false; } -MACE_REGISTER_ALLOCATOR(DeviceType::OPENCL, new OpenCLAllocator()); - } // namespace mace diff --git a/mace/core/tensor.h b/mace/core/tensor.h index 2d00699ed863acac513e39e23b71b048b6d577b2..51dc311fb15a915ab17e381c33d8948671c48924 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -105,7 +105,8 @@ class Tensor { inline index_t dim_size() const { return shape_.size(); } inline index_t dim(unsigned int index) const { - MACE_CHECK(index < shape_.size(), "Exceeding ndim limit"); + MACE_CHECK(index < shape_.size(), "Dim out of range: ", + index, " >= ", shape_.size()); return shape_[index]; } diff --git a/mace/examples/BUILD b/mace/examples/BUILD index ffdb4f7265da572368f6a61cffa81c43c96a1218..9db67631e1cf877c60c96f151ee1ed8a193f4345 100644 --- a/mace/examples/BUILD +++ b/mace/examples/BUILD @@ -11,7 +11,6 @@ cc_binary( deps = [ "//mace/core", "//mace/ops", - "//mace/core:opencl_runtime", ], ) diff --git a/mace/kernels/BUILD b/mace/kernels/BUILD index da1ee8ec3e5314e79c5ef286626f39504f99ea42..a2ad37dbe93a1f00a7875cd244f2ecdf3d922f5e 100644 --- a/mace/kernels/BUILD +++ b/mace/kernels/BUILD @@ -26,7 +26,6 @@ cc_library( linkopts = if_android(["-lm"]), deps = [ "//mace/core", - "//mace/core:opencl_runtime", "//mace/utils:utils_hdrs", ], ) diff --git a/mace/ops/addn.cc b/mace/ops/addn.cc index d29944dec3237ffd7a72d15317353dc9ad59cd27..d9b514d4e3043c598e01d02aa3612c7ecac73abf 100644 --- a/mace/ops/addn.cc +++ b/mace/ops/addn.cc @@ -6,26 +6,32 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("AddN") - .TypeConstraint("T") - .Build(), - AddNOp); +void Register_AddN(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + AddNOp); #if MACE_ENABLE_NEON -REGISTER_NEON_OPERATOR(OpKeyBuilder("AddN") - .TypeConstraint("T") - .Build(), - AddNOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN") + .Device(DeviceType::NEON) + .TypeConstraint("T") + .Build(), + AddNOp); #endif // MACE_ENABLE_NEON -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("AddN") - .TypeConstraint("T") - .Build(), - AddNOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + AddNOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("AddN") - .TypeConstraint("T") - .Build(), - AddNOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + AddNOp); +} } // namespace mace diff --git a/mace/ops/addn_benchmark.cc b/mace/ops/addn_benchmark.cc index 717be1ea886e933a29b151276f6c653c2177cb3c..2d5293188f6b67ab48c8b6e09c233c9300350fb9 100644 --- a/mace/ops/addn_benchmark.cc +++ b/mace/ops/addn_benchmark.cc @@ -15,8 +15,8 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) { OpsTestNet net; // Add input data for (int i = 0; i < inputs; ++i) { - net.AddRandomInput( - internal::MakeString("Input", i).c_str(), {n, h, w, c}); + net.AddRandomInput(internal::MakeString("Input", i).c_str(), + {n, h, w, c}); } if (D == DeviceType::OPENCL) { @@ -30,16 +30,16 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) { op_def_builder.Input(internal::MakeString("InputImage", i).c_str()); } op_def_builder.Output("OutputImage") - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); } else { OpDefBuilder op_def_builder("AddN", "AddNBM"); for (int i = 0; i < inputs; ++i) { op_def_builder.Input(internal::MakeString("Input", i).c_str()); } op_def_builder.Output("Output") - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); } // Warm-up diff --git a/mace/ops/batch_norm.cc b/mace/ops/batch_norm.cc index 6136c814f4ff6f11bc5697295729b4b54ea8e299..ade5c7c7c5d107a15bdf55e68133adb227fd9f64 100644 --- a/mace/ops/batch_norm.cc +++ b/mace/ops/batch_norm.cc @@ -6,26 +6,32 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("BatchNorm") - .TypeConstraint("T") - .Build(), - BatchNormOp); +void Register_BatchNorm(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + BatchNormOp); #if MACE_ENABLE_NEON -REGISTER_NEON_OPERATOR(OpKeyBuilder("BatchNorm") - .TypeConstraint("T") - .Build(), - BatchNormOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm") + .Device(DeviceType::NEON) + .TypeConstraint("T") + .Build(), + BatchNormOp); #endif // MACE_ENABLE_NEON -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchNorm") - .TypeConstraint("T") - .Build(), - BatchNormOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + BatchNormOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchNorm") - .TypeConstraint("T") - .Build(), - BatchNormOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + BatchNormOp); +} } // namespace mace diff --git a/mace/ops/batch_norm_benchmark.cc b/mace/ops/batch_norm_benchmark.cc index 3f54e745c3d527d1cc786793008b9c97a6362214..976bc2419f621c8ddf5cc088ca01e4d44eff02e3 100644 --- a/mace/ops/batch_norm_benchmark.cc +++ b/mace/ops/batch_norm_benchmark.cc @@ -23,11 +23,16 @@ static void BatchNorm( net.AddRandomInput("Var", {channels}, true); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Scale", "ScaleImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Offset", "OffsetImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Mean", "MeanImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Var", "VarImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormBM") .Input("InputImage") .Input("ScaleImage") @@ -37,8 +42,7 @@ static void BatchNorm( .AddFloatArg("epsilon", 1e-3) .Output("Output") .Finalize(net.NewOperatorDef()); - } - else { + } else { OpDefBuilder("BatchNorm", "BatchNormBM") .Input("Input") .Input("Scale") @@ -50,7 +54,6 @@ static void BatchNorm( .Finalize(net.NewOperatorDef()); } - // tuning setenv("MACE_TUNING", "1", 1); net.RunOp(D); @@ -79,9 +82,8 @@ static void BatchNorm( } \ BENCHMARK(BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) -#define BM_BATCH_NORM(N, C, H, W, TYPE) \ - BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \ - BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, NEON);\ +#define BM_BATCH_NORM(N, C, H, W, TYPE) \ + BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \ BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, OPENCL); BM_BATCH_NORM(1, 1, 512, 512, float); diff --git a/mace/ops/batch_norm_test.cc b/mace/ops/batch_norm_test.cc index 6cc6eea342f26d4ac124bed7d7de3d89ad877fe2..595635e7a6f7bfdbe119746b42a2799f6c07da2c 100644 --- a/mace/ops/batch_norm_test.cc +++ b/mace/ops/batch_norm_test.cc @@ -15,18 +15,23 @@ void Simple() { // Add input data net.AddInputFromArray("Input", {1, 6, 2, 1}, - {5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}); + {5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}); net.AddInputFromArray("Scale", {1}, {4.0f}); net.AddInputFromArray("Offset", {1}, {2.0}); net.AddInputFromArray("Mean", {1}, {10}); net.AddInputFromArray("Var", {1}, {11.67f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Scale", "ScaleImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Offset", "OffsetImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Mean", "MeanImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Var", "VarImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") .Input("InputImage") @@ -41,7 +46,8 @@ void Simple() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("BatchNorm", "BatchNormTest") .Input("Input") @@ -64,9 +70,7 @@ void Simple() { ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-2); } -TEST_F(BatchNormOpTest, SimpleCPU) { - Simple(); -} +TEST_F(BatchNormOpTest, SimpleCPU) { Simple(); } /* TEST_F(BatchNormOpTest, SimpleNEON) { @@ -74,9 +78,7 @@ TEST_F(BatchNormOpTest, SimpleNEON) { } */ -TEST_F(BatchNormOpTest, SimpleOPENCL) { - Simple(); -} +TEST_F(BatchNormOpTest, SimpleOPENCL) { Simple(); } /* TEST_F(BatchNormOpTest, SimpleRandomNeon) { @@ -100,7 +102,8 @@ TEST_F(BatchNormOpTest, SimpleRandomNeon) { .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, channels, height, width}); + net.AddRandomInput("Input", {batch, channels, height, +width}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); @@ -141,7 +144,8 @@ TEST_F(BatchNormOpTest, ComplexRandomNeon) { .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, channels, height, width}); + net.AddRandomInput("Input", {batch, channels, height, +width}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); @@ -184,7 +188,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, height, width, channels}); + net.AddRandomInput( + "Input", {batch, height, width, channels}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); @@ -198,11 +203,16 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { expected.Copy(*net.GetOutput("Output")); // Run on opencl - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Scale", "ScaleImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Offset", "OffsetImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Mean", "MeanImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Var", "VarImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") .Input("InputImage") @@ -223,7 +233,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { net.RunOp(DeviceType::OPENCL); net.Sync(); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2); } @@ -249,7 +260,8 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, height, width, channels}); + net.AddRandomInput( + "Input", {batch, height, width, channels}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); @@ -263,11 +275,16 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { expected.Copy(*net.GetOutput("Output")); // Run on opencl - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Scale", "ScaleImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Offset", "OffsetImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Mean", "MeanImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Var", "VarImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") .Input("InputImage") @@ -289,7 +306,8 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { net.RunOp(DeviceType::OPENCL); net.Sync(); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.5); } @@ -315,7 +333,8 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, height, width, channels}); + net.AddRandomInput( + "Input", {batch, height, width, channels}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); @@ -328,13 +347,17 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { Tensor expected; expected.Copy(*net.GetOutput("Output")); - // Run on opencl - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Scale", "ScaleImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Offset", "OffsetImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Mean", "MeanImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Var", "VarImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") .Input("InputImage") @@ -355,7 +378,8 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { net.RunOp(DeviceType::OPENCL); net.Sync(); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2); } @@ -381,7 +405,8 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, height, width, channels}); + net.AddRandomInput( + "Input", {batch, height, width, channels}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); @@ -394,13 +419,17 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { Tensor expected; expected.Copy(*net.GetOutput("Output")); - // Run on opencl - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Scale", "ScaleImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Offset", "OffsetImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Mean", "MeanImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Var", "VarImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") .Input("InputImage") @@ -422,7 +451,8 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { net.RunOp(DeviceType::OPENCL); net.Sync(); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.5); } } diff --git a/mace/ops/batch_to_space.cc b/mace/ops/batch_to_space.cc index 4e4ea6610e408889b8a4ad29a786fdd33853703a..ece90171d44f31c3656de7dff712e8d9de0ba5a1 100644 --- a/mace/ops/batch_to_space.cc +++ b/mace/ops/batch_to_space.cc @@ -6,13 +6,17 @@ namespace mace { -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchToSpaceND") - .TypeConstraint("T") - .Build(), - BatchToSpaceNDOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchToSpaceND") - .TypeConstraint("T") - .Build(), - BatchToSpaceNDOp); +void Register_BatchToSpaceND(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchToSpaceND") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + BatchToSpaceNDOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchToSpaceND") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + BatchToSpaceNDOp); +} } // namespace mace diff --git a/mace/ops/batch_to_space_benchmark.cc b/mace/ops/batch_to_space_benchmark.cc index a3a0e0b6ee508e0f1333e33b70dabf982c9cd903..93df21f931979ba75040f5d3d7002ee4b674ab03 100644 --- a/mace/ops/batch_to_space_benchmark.cc +++ b/mace/ops/batch_to_space_benchmark.cc @@ -14,7 +14,8 @@ static void BMBatchToSpace( OpsTestNet net; net.AddRandomInput("Input", {batch, height, width, channels}); - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest") .Input("InputImage") .Output("OutputImage") @@ -36,16 +37,17 @@ static void BMBatchToSpace( } #define BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, TYPE, DEVICE) \ - static void BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ - BMBatchToSpace(iters, N, C, H, W, ARG); \ - } \ + static void \ + BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::ItemsProcessed(tot); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + BMBatchToSpace(iters, N, C, H, W, ARG); \ + } \ BENCHMARK(BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE) -#define BM_BATCH_TO_SPACE(N, H, W, C, ARG, TYPE) \ +#define BM_BATCH_TO_SPACE(N, H, W, C, ARG, TYPE) \ BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, TYPE, OPENCL); BM_BATCH_TO_SPACE(128, 8, 8, 128, 2, float); diff --git a/mace/ops/bias_add.cc b/mace/ops/bias_add.cc index 5dc8a4436e2de706e94ed4d585088d39815280b8..01a7582dccba285633784dbaa3b2ae43b7ed366b 100644 --- a/mace/ops/bias_add.cc +++ b/mace/ops/bias_add.cc @@ -6,28 +6,34 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("BiasAdd") - .TypeConstraint("T") - .Build(), - BiasAddOp); +void Register_BiasAdd(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BiasAdd") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + BiasAddOp); -/* -#if __ARM_NEON -REGISTER_NEON_OPERATOR(OpKeyBuilder("BiasAdd") - .TypeConstraint("T") - .Build(), - BiasAddOp); -#endif // __ARM_NEON -*/ + /* + #if __ARM_NEON + REGISTER_OPERATOR(op_registry,OpKeyBuilder("BiasAdd") + .Device(DeviceType::NEON) + .TypeConstraint("T") + .Build(), + BiasAddOp); + #endif // __ARM_NEON + */ -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BiasAdd") - .TypeConstraint("T") - .Build(), - BiasAddOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BiasAdd") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + BiasAddOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BiasAdd") - .TypeConstraint("T") - .Build(), - BiasAddOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BiasAdd") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + BiasAddOp); +} } // namespace mace diff --git a/mace/ops/bias_add_benchmark.cc b/mace/ops/bias_add_benchmark.cc index d277a15d8abb7513bc6a8c28878d4b38873aacad..917c28a15898d2d4e4cf8fafd427de4a03bcc378 100644 --- a/mace/ops/bias_add_benchmark.cc +++ b/mace/ops/bias_add_benchmark.cc @@ -9,8 +9,7 @@ namespace mace { template -static void BiasAdd( - int iters, int batch, int channels, int height, int width) { +static void BiasAdd(int iters, int batch, int channels, int height, int width) { mace::testing::StopTiming(); OpsTestNet net; @@ -20,15 +19,16 @@ static void BiasAdd( net.AddRandomInput("Bias", {channels}, true); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BiasAdd", "BiasAddBM") .Input("InputImage") .Input("BiasImage") .Output("Output") .Finalize(net.NewOperatorDef()); - } - else { + } else { OpDefBuilder("BiasAdd", "BiasAddBM") .Input("Input") .Input("Bias") @@ -51,12 +51,12 @@ static void BiasAdd( #define BM_BIAS_ADD_MACRO(N, C, H, W, TYPE, DEVICE) \ static void BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::ItemsProcessed(tot); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ BiasAdd(iters, N, C, H, W); \ - } \ + } \ BENCHMARK(BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) #define BM_BIAS_ADD(N, C, H, W, TYPE) \ diff --git a/mace/ops/bias_add_test.cc b/mace/ops/bias_add_test.cc index b9e347388a8af57df065ad76befe6aea1a25eb2f..ce83ebd72909d0d06bc5755b30537e2b3ddc3c93 100644 --- a/mace/ops/bias_add_test.cc +++ b/mace/ops/bias_add_test.cc @@ -15,12 +15,14 @@ void BiasAddSimple() { // Add input data net.AddInputFromArray("Input", {1, 6, 2, 1}, - {5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}); + {5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}); net.AddInputFromArray("Bias", {1}, {0.5f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BiasAdd", "BiasAddTest") .Input("InputImage") @@ -31,7 +33,8 @@ void BiasAddSimple() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("BiasAdd", "BiasAddTest") .Input("Input") @@ -43,16 +46,14 @@ void BiasAddSimple() { } // Check - auto expected = - CreateTensor({1, 6, 2, 1}, {5.5, 5.5, 7.5, 7.5, 9.5, 9.5, 11.5, - 11.5, 13.5, 13.5, 15.5, 15.5}); + auto expected = CreateTensor( + {1, 6, 2, 1}, + {5.5, 5.5, 7.5, 7.5, 9.5, 9.5, 11.5, 11.5, 13.5, 13.5, 15.5, 15.5}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-2); } -TEST_F(BiasAddOpTest, BiasAddSimpleCPU) { - BiasAddSimple(); -} +TEST_F(BiasAddOpTest, BiasAddSimpleCPU) { BiasAddSimple(); } TEST_F(BiasAddOpTest, BiasAddSimpleOPENCL) { BiasAddSimple(); @@ -76,7 +77,8 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, height, width, channels}); + net.AddRandomInput( + "Input", {batch, height, width, channels}); net.AddRandomInput("Bias", {channels}, true); // run cpu @@ -87,8 +89,10 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { expected.Copy(*net.GetOutput("Output")); // Run on opencl - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BiasAdd", "BiasAddTest") .Input("InputImage") @@ -100,7 +104,8 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { net.RunOp(DeviceType::OPENCL); net.Sync(); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2); } @@ -122,7 +127,8 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, height, width, channels}); + net.AddRandomInput( + "Input", {batch, height, width, channels}); net.AddRandomInput("Bias", {channels}, true); // run cpu @@ -132,10 +138,11 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { Tensor expected; expected.Copy(*net.GetOutput("Output")); - // Run on opencl - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BiasAdd", "BiasAddTest") .Input("InputImage") @@ -147,8 +154,8 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { net.RunOp(DeviceType::OPENCL); net.Sync(); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2); } - } diff --git a/mace/ops/buffer_to_image.cc b/mace/ops/buffer_to_image.cc index 56711794b7fef1546ec67e63d873289bea2ef1cc..c9118a19392b9f90fe3eb80bba2a1b9b8a17f4b3 100644 --- a/mace/ops/buffer_to_image.cc +++ b/mace/ops/buffer_to_image.cc @@ -6,14 +6,18 @@ namespace mace { -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BufferToImage") - .TypeConstraint("T") - .Build(), - BufferToImageOp); +void Register_BufferToImage(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BufferToImage") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + BufferToImageOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BufferToImage") - .TypeConstraint("T") - .Build(), - BufferToImageOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BufferToImage") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + BufferToImageOp); +} } // namespace mace diff --git a/mace/ops/channel_shuffle.cc b/mace/ops/channel_shuffle.cc index 7d36b1af13034ec0a1d51b451edf3df449f83752..b12d8beb826c961d2fa77a2c4eb5d618fc8188be 100644 --- a/mace/ops/channel_shuffle.cc +++ b/mace/ops/channel_shuffle.cc @@ -6,9 +6,12 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("ChannelShuffle") - .TypeConstraint("T") - .Build(), - ChannelShuffleOp); +void Register_ChannelShuffle(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("ChannelShuffle") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + ChannelShuffleOp); +} } // namespace mace diff --git a/mace/ops/channel_shuffle_benchmark.cc b/mace/ops/channel_shuffle_benchmark.cc index d9f63f3acffaf8861a120bec381ed175db2963fb..ca75ce105a6a3587bba7b1e422a8e26b0600d85f 100644 --- a/mace/ops/channel_shuffle_benchmark.cc +++ b/mace/ops/channel_shuffle_benchmark.cc @@ -23,7 +23,8 @@ static void ChannelShuffle( .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, channels, height, width}); + net.AddRandomInput("Input", + {batch, channels, height, width}); // Warm-up for (int i = 0; i < 5; ++i) { diff --git a/mace/ops/channel_shuffle_test.cc b/mace/ops/channel_shuffle_test.cc index f42e3b1b6caf492058a1d00359fd99bdf587343c..8f5736d4bc586d4989ff193e2956a43c66252e03 100644 --- a/mace/ops/channel_shuffle_test.cc +++ b/mace/ops/channel_shuffle_test.cc @@ -17,7 +17,6 @@ TEST_F(ChannelShuffleOpTest, C8G4) { .AddIntArg("group", 4) .Finalize(net.NewOperatorDef()); - // Add input data net.AddInputFromArray( "Input", {1, 8, 1, 2}, diff --git a/mace/ops/concat.cc b/mace/ops/concat.cc index 929da85f2f36c8c8cff02608523475e14f5cbc47..71be2fc3eb1100c36a23907aeccbec41c2dba899 100644 --- a/mace/ops/concat.cc +++ b/mace/ops/concat.cc @@ -6,21 +6,28 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("Concat") - .TypeConstraint("T") - .Build(), - ConcatOp); -REGISTER_CPU_OPERATOR(OpKeyBuilder("Concat") - .TypeConstraint("T") - .Build(), - ConcatOp); +void Register_Concat(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + ConcatOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + ConcatOp); + + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + ConcatOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + ConcatOp); +} -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Concat") - .TypeConstraint("T") - .Build(), - ConcatOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Concat") - .TypeConstraint("T") - .Build(), - ConcatOp); } // namespace mace diff --git a/mace/ops/concat_benchmark.cc b/mace/ops/concat_benchmark.cc index 0e4425251426e8056a2973352ded79cd2b4f88ef..6a3dda02f7448b968456ba334a1167349c4ef6e4 100644 --- a/mace/ops/concat_benchmark.cc +++ b/mace/ops/concat_benchmark.cc @@ -60,8 +60,10 @@ static void OpenclConcatHelper(int iters, net.AddRandomInput("Input0", shape0); net.AddRandomInput("Input1", shape1); - BufferToImage(net, "Input0", "InputImage0", kernels::BufferType::IN_OUT); - BufferToImage(net, "Input1", "InputImage1", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input0", "InputImage0", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Input1", "InputImage1", + kernels::BufferType::IN_OUT); OpDefBuilder("Concat", "ConcatBM") .Input("InputImage0") .Input("InputImage1") @@ -75,7 +77,8 @@ static void OpenclConcatHelper(int iters, net.RunOp(DeviceType::OPENCL); } - const int64_t tot = static_cast(iters) * + const int64_t tot = + static_cast(iters) * (net.GetTensor("Input0")->size() + net.GetTensor("Input1")->size()); mace::testing::ItemsProcessed(tot); testing::BytesProcessed(tot * sizeof(T)); diff --git a/mace/ops/concat_test.cc b/mace/ops/concat_test.cc index ca1c06d6197c2a16d8efd4b2ea8cfba13bafccdb..49d55d2a830e59b62e81ce9592b7327ad3a7a219 100644 --- a/mace/ops/concat_test.cc +++ b/mace/ops/concat_test.cc @@ -97,7 +97,9 @@ TEST_F(ConcatOpTest, CPURandom) { for (int i = 0; i < num_inputs; ++i) { builder = builder.Input(("Input" + ToString(i)).c_str()); } - builder.AddIntArg("axis", axis).Output("Output").Finalize(net.NewOperatorDef()); + builder.AddIntArg("axis", axis) + .Output("Output") + .Finalize(net.NewOperatorDef()); std::vector shape_data; GenerateRandomIntTypeData({dim}, shape_data, 1, dim); @@ -110,8 +112,8 @@ TEST_F(ConcatOpTest, CPURandom) { concat_axis_size += input_shapes[i][axis]; GenerateRandomRealTypeData(input_shapes[i], inputs[i]); input_ptrs[i] = inputs[i].data(); - net.AddInputFromArray(("Input" + ToString(i)).c_str(), - input_shapes[i], inputs[i]); + net.AddInputFromArray( + ("Input" + ToString(i)).c_str(), input_shapes[i], inputs[i]); } // Run @@ -137,7 +139,7 @@ TEST_F(ConcatOpTest, CPURandom) { } } -template +template void OpenclRandomTest(const std::vector> &shapes, const int axis) { srand(time(nullptr)); @@ -149,9 +151,9 @@ void OpenclRandomTest(const std::vector> &shapes, const std::string input_name = ("Input" + ToString(i)).c_str(); const std::string image_name = ("InputImage" + ToString(i)).c_str(); concat_axis_size += shapes[i][axis]; - net.AddRandomInput(input_name, - shapes[i]); - BufferToImage(net, input_name, image_name, kernels::BufferType::IN_OUT); + net.AddRandomInput(input_name, shapes[i]); + BufferToImage(net, input_name, image_name, + kernels::BufferType::IN_OUT); } auto builder = OpDefBuilder("Concat", "ConcatTest"); @@ -167,7 +169,8 @@ void OpenclRandomTest(const std::vector> &shapes, // Run net.RunOp(DeviceType::OPENCL); - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); // Check auto output = net.GetOutput("Output"); @@ -182,15 +185,16 @@ void OpenclRandomTest(const std::vector> &shapes, while (output_ptr != (output->data() + output->size())) { for (int i = 0; i < num_inputs; ++i) { index_t num_elements = - std::accumulate(shapes[i].begin() + axis, shapes[i].end(), - 1, std::multiplies()); + std::accumulate(shapes[i].begin() + axis, shapes[i].end(), 1, + std::multiplies()); const std::string input_name = ("Input" + ToString(i)).c_str(); const Tensor *input_tensor = net.GetTensor(input_name.data()); Tensor::MappingGuard input_guard(input_tensor); const float *input_ptr = input_tensor->data() + k * num_elements; for (int j = 0; j < num_elements; ++j) { - EXPECT_NEAR(*(input_ptr + j), *output_ptr++, 1e-2) << "With index: " << i << ", " << j; + EXPECT_NEAR(*(input_ptr + j), *output_ptr++, 1e-2) + << "With index: " << i << ", " << j; } } k++; @@ -198,25 +202,13 @@ void OpenclRandomTest(const std::vector> &shapes, } TEST_F(ConcatOpTest, OPENCLAligned) { - OpenclRandomTest({ - {3, 32, 32, 32}, - {3, 32, 32, 64} - }, - 3); + OpenclRandomTest({{3, 32, 32, 32}, {3, 32, 32, 64}}, 3); } TEST_F(ConcatOpTest, OPENCLHalfAligned) { - OpenclRandomTest({ - {3, 32, 32, 32}, - {3, 32, 32, 64} - }, - 3); + OpenclRandomTest({{3, 32, 32, 32}, {3, 32, 32, 64}}, 3); } TEST_F(ConcatOpTest, OPENCLUnAligned) { - OpenclRandomTest({ - {3, 32, 32, 13}, - {3, 32, 32, 17} - }, - 3); + OpenclRandomTest({{3, 32, 32, 13}, {3, 32, 32, 17}}, 3); } diff --git a/mace/ops/conv_2d.cc b/mace/ops/conv_2d.cc index ad771df67a4bb266be6e265e081fb54dcc9b9a2e..f7191c370b4bd713f94d825775c745560e234422 100644 --- a/mace/ops/conv_2d.cc +++ b/mace/ops/conv_2d.cc @@ -6,31 +6,38 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("Conv2D") - .TypeConstraint("T") - .Build(), - Conv2dOp); - -REGISTER_CPU_OPERATOR(OpKeyBuilder("Conv2D") - .TypeConstraint("T") - .Build(), - Conv2dOp); +void Register_Conv2D(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + Conv2dOp); + + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + Conv2dOp); #if MACE_ENABLE_NEON -REGISTER_NEON_OPERATOR(OpKeyBuilder("Conv2D") - .TypeConstraint("T") - .Build(), - Conv2dOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D") + .Device(DeviceType::NEON) + .TypeConstraint("T") + .Build(), + Conv2dOp); #endif // MACE_ENABLE_NEON -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Conv2D") - .TypeConstraint("T") - .Build(), - Conv2dOp); - -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Conv2D") - .TypeConstraint("T") - .Build(), - Conv2dOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + Conv2dOp); + + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + Conv2dOp); +} } // namespace mace diff --git a/mace/ops/conv_2d_benchmark.cc b/mace/ops/conv_2d_benchmark.cc index 0870f2b829d20004e85b48606e4bc0b59c597969..83e264d8cd5e0ed96e94a9598dfa5249074066fc 100644 --- a/mace/ops/conv_2d_benchmark.cc +++ b/mace/ops/conv_2d_benchmark.cc @@ -33,9 +33,12 @@ static void Conv2d(int iters, net.AddRandomInput("Bias", {output_channels}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") @@ -89,7 +92,7 @@ static void Conv2d(int iters, BENCHMARK( \ BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE) -#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \ +#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \ BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, OPENCL); // ICNet @@ -106,28 +109,29 @@ BM_CONV_2D(1, 3, 512, 512, 7, 7, 2, SAME, 64, half); BM_CONV_2D(1, 512, 64, 64, 1, 1, 1, SAME, 256, half); // Test RGB <-> YUV -//BM_CONV_2D(1, 3, 2160, 1080, 1, 1, 1, VALID, 3, float); -//BM_CONV_2D(1, 3, 480, 480, 1, 1, 1, VALID, 3, float); +// BM_CONV_2D(1, 3, 2160, 1080, 1, 1, 1, VALID, 3, float); +// BM_CONV_2D(1, 3, 480, 480, 1, 1, 1, VALID, 3, float); // -//BM_CONV_2D(1, 64, 32, 32, 1, 1, 1, VALID, 128, float); -//BM_CONV_2D(1, 64, 33, 31, 1, 1, 1, VALID, 128, float); // Test bad alignments -//BM_CONV_2D(1, 3, 512, 512, 1, 1, 1, VALID, 3, float); -//BM_CONV_2D(1, 32, 112, 112, 1, 1, 1, VALID, 64, float); -//BM_CONV_2D(1, 64, 56, 56, 1, 1, 1, VALID, 128, float); -//BM_CONV_2D(1, 256, 28, 28, 1, 1, 1, VALID, 256, float); -//BM_CONV_2D(1, 1024, 7, 7, 1, 1, 1, VALID, 1024, float); -//BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 128, float); -//BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, VALID, 128, float); -//BM_CONV_2D(1, 3, 512, 512, 3, 3, 1, VALID, 3, float); -//BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, SAME, 128, float); -//BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, SAME, 128, float); -//BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, VALID, 128, float); -//BM_CONV_2D(1, 3, 512, 512, 3, 3, 2, VALID, 3, float); -//BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, VALID, 128, float); -//BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, SAME, 128, float); -//BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, SAME, 128, float); -//BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, VALID, 128, float); -//BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, VALID, 128, float); -//BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, SAME, 128, float); -//BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, SAME, 128, float); +// BM_CONV_2D(1, 64, 32, 32, 1, 1, 1, VALID, 128, float); +// BM_CONV_2D(1, 64, 33, 31, 1, 1, 1, VALID, 128, float); // Test bad +// alignments +// BM_CONV_2D(1, 3, 512, 512, 1, 1, 1, VALID, 3, float); +// BM_CONV_2D(1, 32, 112, 112, 1, 1, 1, VALID, 64, float); +// BM_CONV_2D(1, 64, 56, 56, 1, 1, 1, VALID, 128, float); +// BM_CONV_2D(1, 256, 28, 28, 1, 1, 1, VALID, 256, float); +// BM_CONV_2D(1, 1024, 7, 7, 1, 1, 1, VALID, 1024, float); +// BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 128, float); +// BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, VALID, 128, float); +// BM_CONV_2D(1, 3, 512, 512, 3, 3, 1, VALID, 3, float); +// BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, SAME, 128, float); +// BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, SAME, 128, float); +// BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, VALID, 128, float); +// BM_CONV_2D(1, 3, 512, 512, 3, 3, 2, VALID, 3, float); +// BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, VALID, 128, float); +// BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, SAME, 128, float); +// BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, SAME, 128, float); +// BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, VALID, 128, float); +// BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, VALID, 128, float); +// BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, SAME, 128, float); +// BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, SAME, 128, float); } // namespace mace diff --git a/mace/ops/conv_2d_test.cc b/mace/ops/conv_2d_test.cc index f137da6e911ff3ffc986393f19c2f8326077e03c..d4df0df8f11453d3462d6e56c0fa9e75ae8185ca 100644 --- a/mace/ops/conv_2d_test.cc +++ b/mace/ops/conv_2d_test.cc @@ -10,7 +10,7 @@ using namespace mace; class Conv2dOpTest : public OpsTestBase {}; -template +template void TestSimple3x3VALID() { OpsTestNet net; OpDefBuilder("Conv2D", "Conv2dTest") @@ -42,10 +42,9 @@ void TestSimple3x3VALID() { auto expected = CreateTensor({1, 1, 1, 1}, {18.1f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); - } -template +template void TestSimple3x3SAME() { OpsTestNet net; OpDefBuilder("Conv2D", "Conv2dTest") @@ -86,7 +85,7 @@ TEST_F(Conv2dOpTest, NEONSimple) { } #endif -template +template void TestNHWCSimple3x3VALID() { OpsTestNet net; // Add input data @@ -100,9 +99,12 @@ void TestNHWCSimple3x3VALID() { net.AddInputFromArray("Bias", {1}, {0.1f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") @@ -117,7 +119,8 @@ void TestNHWCSimple3x3VALID() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("Conv2D", "Conv2dTest") @@ -138,7 +141,7 @@ void TestNHWCSimple3x3VALID() { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.01); } -template +template void TestNHWCSimple3x3SAME() { OpsTestNet net; @@ -153,9 +156,12 @@ void TestNHWCSimple3x3SAME() { net.AddInputFromArray("Bias", {1}, {0.1f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") @@ -170,7 +176,8 @@ void TestNHWCSimple3x3SAME() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("Conv2D", "Conv2dTest") @@ -204,7 +211,7 @@ TEST_F(Conv2dOpTest, OPENCLSimple) { TestNHWCSimple3x3SAME(); } -template +template void TestSimple3x3WithoutBias() { OpsTestNet net; OpDefBuilder("Conv2D", "Conv2dTest") @@ -234,14 +241,13 @@ void TestSimple3x3WithoutBias() { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } - #ifdef __ARM_NEON TEST_F(Conv2dOpTest, NEONWithouBias) { TestSimple3x3WithoutBias(); } #endif -template +template void TestNHWCSimple3x3WithoutBias() { OpsTestNet net; @@ -255,8 +261,10 @@ void TestNHWCSimple3x3WithoutBias() { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") @@ -270,7 +278,8 @@ void TestNHWCSimple3x3WithoutBias() { // Run net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("Conv2D", "Conv2dTest") .Input("Input") @@ -300,7 +309,7 @@ TEST_F(Conv2dOpTest, OPENCLWithoutBias) { TestNHWCSimple3x3WithoutBias(); } -template +template static void TestCombined3x3() { // Construct graph OpsTestNet net; @@ -335,17 +344,13 @@ static void TestCombined3x3() { 4.2f, 6.2f, 4.2f, 6.2f, 9.2f, 6.2f, 4.2f, 6.2f, 4.2f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); - } - #ifdef __ARM_NEON -TEST_F(Conv2dOpTest, NEONCombined) { - TestCombined3x3(); -} +TEST_F(Conv2dOpTest, NEONCombined) { TestCombined3x3(); } #endif -template +template static void TestNHWCCombined3x3() { // Construct graph OpsTestNet net; @@ -353,8 +358,8 @@ static void TestNHWCCombined3x3() { // Add input data net.AddInputFromArray( "Input", {1, 5, 5, 2}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); net.AddInputFromArray( "Filter", {3, 3, 2, 2}, {1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, @@ -363,9 +368,12 @@ static void TestNHWCCombined3x3() { net.AddInputFromArray("Bias", {2}, {0.1f, 0.2f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2DTest") .Input("InputImage") @@ -380,7 +388,8 @@ static void TestNHWCCombined3x3() { // Run net.RunOp(D); - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("Conv2D", "Conv2DTest") .Input("Input") @@ -394,16 +403,13 @@ static void TestNHWCCombined3x3() { .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); - } // Check auto expected = CreateTensor( - {1, 3, 3, 2}, {8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f, - 12.1f, 6.2f, 18.1f, 9.2f, 12.1f, 6.2f, - 8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f}); + {1, 3, 3, 2}, {8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 18.1f, + 9.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.01); - } TEST_F(Conv2dOpTest, CPUStride2) { @@ -414,7 +420,7 @@ TEST_F(Conv2dOpTest, OPENCLStride2) { TestNHWCCombined3x3(); } -template +template void TestConv1x1() { // Construct graph OpsTestNet net; @@ -435,9 +441,12 @@ void TestConv1x1() { net.AddInputFromArray("Bias", {2}, {0.1f, 0.2f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2DTest") .Input("InputImage") @@ -451,7 +460,8 @@ void TestConv1x1() { // Run net.RunOp(D); - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("Conv2D", "Conv2DTest") .Input("Input") @@ -479,15 +489,11 @@ void TestConv1x1() { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } -TEST_F(Conv2dOpTest, CPUConv1x1) { - TestConv1x1(); -} +TEST_F(Conv2dOpTest, CPUConv1x1) { TestConv1x1(); } -TEST_F(Conv2dOpTest, OPENCLConv1x1) { - TestConv1x1(); -} +TEST_F(Conv2dOpTest, OPENCLConv1x1) { TestConv1x1(); } -template +template static void TestComplexConvNxNS12(const std::vector &shape) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, @@ -526,9 +532,12 @@ static void TestComplexConvNxNS12(const std::vector &shape) { expected.Copy(*net.GetOutput("Output")); // run on gpu - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") @@ -543,7 +552,8 @@ static void TestComplexConvNxNS12(const std::vector &shape) { // Run on device net.RunOp(D); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.001); }; @@ -592,15 +602,20 @@ static void TestHalfComplexConvNxNS12(const std::vector &input_shape, .Finalize(net.NewOperatorDef()); std::vector float_input_data; - GenerateRandomRealTypeData({batch, height, width, input_channels}, float_input_data); + GenerateRandomRealTypeData({batch, height, width, input_channels}, + float_input_data); std::vector float_filter_data; - GenerateRandomRealTypeData({kernel_h, kernel_w, input_channels, output_channels}, float_filter_data); + GenerateRandomRealTypeData( + {kernel_h, kernel_w, input_channels, output_channels}, + float_filter_data); std::vector float_bias_data; GenerateRandomRealTypeData({output_channels}, float_bias_data); // Add input data - net.AddInputFromArray("Input", {batch, height, width, input_channels}, float_input_data); net.AddInputFromArray( - "Filter", {kernel_h, kernel_w, input_channels, output_channels}, float_filter_data); + "Input", {batch, height, width, input_channels}, float_input_data); + net.AddInputFromArray( + "Filter", {kernel_h, kernel_w, input_channels, output_channels}, + float_filter_data); net.AddInputFromArray("Bias", {output_channels}, float_bias_data); // run on cpu @@ -610,9 +625,12 @@ static void TestHalfComplexConvNxNS12(const std::vector &input_shape, expected.Copy(*net.GetOutput("Output")); // run on gpu - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") @@ -627,7 +645,8 @@ static void TestHalfComplexConvNxNS12(const std::vector &input_shape, // Run on device net.RunOp(D); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.5); }; diff --git a/mace/ops/core_test.cc b/mace/ops/core_test.cc index 8761d04fba494c3d39cdd196dfd05c01c692f687..206c4d6c5e1119b5e0594c65800f2904170f1768 100644 --- a/mace/ops/core_test.cc +++ b/mace/ops/core_test.cc @@ -7,7 +7,6 @@ namespace mace { TEST(CoreTest, INIT_MODE) { - std::vector op_defs; Workspace ws; @@ -18,10 +17,11 @@ TEST(CoreTest, INIT_MODE) { .Output("B2IOutput") .AddIntArg("buffer_type", kernels::BufferType::FILTER) .AddIntArg("mode", static_cast(NetMode::INIT)) - .Finalize(&op_defs[op_defs.size()-1]); + .Finalize(&op_defs[op_defs.size() - 1]); Tensor *input = - ws.CreateTensor("Input", GetDeviceAllocator(DeviceType::OPENCL), DataTypeToEnum::v()); + ws.CreateTensor("Input", GetDeviceAllocator(DeviceType::OPENCL), + DataTypeToEnum::v()); input->Resize({1, 3, 3, 3}); { Tensor::MappingGuard input_mapper(input); @@ -34,23 +34,26 @@ TEST(CoreTest, INIT_MODE) { .Input("B2IOutput") .Output("Output") .AddIntArg("buffer_type", kernels::BufferType::FILTER) - .Finalize(&op_defs[op_defs.size()-1]); + .Finalize(&op_defs[op_defs.size() - 1]); NetDef net_def; for (auto &op_def : op_defs) { net_def.add_op()->CopyFrom(op_def); } - auto net = CreateNet(net_def, &ws, DeviceType::OPENCL, NetMode::INIT); + std::shared_ptr op_registry(new OperatorRegistry()); + auto net = + CreateNet(op_registry, net_def, &ws, DeviceType::OPENCL, NetMode::INIT); net->Run(); EXPECT_TRUE(ws.GetTensor("B2IOutput") != nullptr); EXPECT_TRUE(ws.GetTensor("Output") == nullptr); - net = CreateNet(net_def, &ws, DeviceType::OPENCL); + net = CreateNet(op_registry, net_def, &ws, DeviceType::OPENCL); net->Run(); EXPECT_TRUE(ws.GetTensor("Output") != nullptr); - ExpectTensorNear(*ws.GetTensor("Input"), *ws.GetTensor("Output"), 1e-5); + ExpectTensorNear(*ws.GetTensor("Input"), *ws.GetTensor("Output"), + 1e-5); } -} // namespace mace +} // namespace mace diff --git a/mace/ops/depthwise_conv2d.cc b/mace/ops/depthwise_conv2d.cc index ed13d0994336a398156e2aa056ec354fc35f8d72..4e99a378e64da0230b40d6343dbaa1036ffc7337 100644 --- a/mace/ops/depthwise_conv2d.cc +++ b/mace/ops/depthwise_conv2d.cc @@ -6,21 +6,26 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("DepthwiseConv2d") - .TypeConstraint("T") - .Build(), - DepthwiseConv2dOp); +void Register_DepthwiseConv2d(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + DepthwiseConv2dOp); #if MACE_ENABLE_NEON -REGISTER_NEON_OPERATOR(OpKeyBuilder("DepthwiseConv2d") - .TypeConstraint("T") - .Build(), - DepthwiseConv2dOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d") + .Device(DeviceType::NEON) + .TypeConstraint("T") + .Build(), + DepthwiseConv2dOp); #endif // MACE_ENABLE_NEON -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("DepthwiseConv2d") - .TypeConstraint("T") - .Build(), - DepthwiseConv2dOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + DepthwiseConv2dOp); +} } // namespace mace diff --git a/mace/ops/depthwise_conv2d_test.cc b/mace/ops/depthwise_conv2d_test.cc index b3fbdeb21bd12082437f4144d30c00298397842c..e2f2872dffcfb348278555f8c29ed1c00f5c70f5 100644 --- a/mace/ops/depthwise_conv2d_test.cc +++ b/mace/ops/depthwise_conv2d_test.cc @@ -26,7 +26,7 @@ void SimpleValidTest() { // Add input data net.AddInputFromArray("Input", {1, 2, 2, 3}, - {1, 3, 5, 7, 9, 11, 2, 4, 6, 8, 10, 12}); + {1, 3, 5, 7, 9, 11, 2, 4, 6, 8, 10, 12}); net.AddInputFromArray( "Filter", {2, 2, 2, 2}, {1.0f, 5.0f, 9.0f, 13.0f, 2.0f, 6.0f, 10.0f, 14.0f, 3.0f, 7.0f, 11.0f, @@ -41,12 +41,9 @@ void SimpleValidTest() { {196.1f, 252.1f, 216.2f, 280.2f, 272.3f, 344.3f, 296.4f, 376.4f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); - } -TEST_F(DepthwiseConv2dOpTest, SimpleCPU) { - SimpleValidTest(); -} +TEST_F(DepthwiseConv2dOpTest, SimpleCPU) { SimpleValidTest(); } template void TestNxNS12(const index_t height, const index_t width) { @@ -72,8 +69,10 @@ void TestNxNS12(const index_t height, const index_t width) { .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, input_channels, height, width}); - net.AddRandomInput("Filter", {multiplier, input_channels, kernel_h, kernel_w}); + net.AddRandomInput("Input", + {batch, input_channels, height, width}); + net.AddRandomInput( + "Filter", {multiplier, input_channels, kernel_h, kernel_w}); net.AddRandomInput("Bias", {multiplier * input_channels}); // Run on device net.RunOp(D); @@ -93,7 +92,6 @@ void TestNxNS12(const index_t height, const index_t width) { func(kernel_size, kernel_size, stride, stride, SAME); } } - } #if __ARM_NEON diff --git a/mace/ops/depthwise_conv_2d_benchmark.cc b/mace/ops/depthwise_conv_2d_benchmark.cc index b8b277c088f6fd88dbae36f3863ead840286dc43..8b4a57760ae3f8fee150c13b8e1223b9cfcc10ed 100644 --- a/mace/ops/depthwise_conv_2d_benchmark.cc +++ b/mace/ops/depthwise_conv_2d_benchmark.cc @@ -38,8 +38,8 @@ static void DepthwiseConv2d(int iters, // Add input data net.AddRandomInput("Input", {batch, channels, height, width}); net.AddRandomInput("Filter", - {output_channels, channels, kernel_h, kernel_w}); - net.AddRandomInput("Bias", {output_channels*channels}); + {output_channels, channels, kernel_h, kernel_w}); + net.AddRandomInput("Bias", {output_channels * channels}); // Warm-up for (int i = 0; i < 5; ++i) { @@ -54,23 +54,22 @@ static void DepthwiseConv2d(int iters, net.Sync(); } -#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, \ - DEVICE) \ - static void \ +#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, \ + DEVICE) \ + static void \ BM_DEPTHWISE_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ - DepthwiseConv2d(iters, N, C, H, W, KH, KW, STRIDE, \ - mace::Padding::P, OC); \ - } \ - BENCHMARK( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::ItemsProcessed(tot); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + DepthwiseConv2d(iters, N, C, H, W, KH, KW, STRIDE, \ + mace::Padding::P, OC); \ + } \ + BENCHMARK( \ BM_DEPTHWISE_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE) -#define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \ - BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \ - BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, NEON);\ +#define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \ + BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \ BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, OPENCL); BM_DEPTHWISE_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 2, float); diff --git a/mace/ops/fused_conv_2d.cc b/mace/ops/fused_conv_2d.cc index 6e6b0172f9e04cd2d0a098cd701431506856f7f9..fd17a12a1a3e72df988fe68b967d71b4a8c50640 100644 --- a/mace/ops/fused_conv_2d.cc +++ b/mace/ops/fused_conv_2d.cc @@ -6,25 +6,30 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("FusedConv2D") - .TypeConstraint("T") - .Build(), - FusedConv2dOp); - -REGISTER_CPU_OPERATOR(OpKeyBuilder("FusedConv2D") - .TypeConstraint("T") - .Build(), - FusedConv2dOp); - - -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("FusedConv2D") - .TypeConstraint("T") - .Build(), - FusedConv2dOp); - -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("FusedConv2D") - .TypeConstraint("T") - .Build(), - FusedConv2dOp); +void Register_FusedConv2D(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + FusedConv2dOp); + + REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + FusedConv2dOp); + + REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + FusedConv2dOp); + + REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + FusedConv2dOp); +} } // namespace mace diff --git a/mace/ops/fused_conv_2d_test.cc b/mace/ops/fused_conv_2d_test.cc index f1effb3ee99cb9dd6353c1beae5f581515a87125..eef3b1a730b172a9406c0b1adec72349d473feb0 100644 --- a/mace/ops/fused_conv_2d_test.cc +++ b/mace/ops/fused_conv_2d_test.cc @@ -9,7 +9,7 @@ using namespace mace; class FusedConv2dOpTest : public OpsTestBase {}; -template +template void TestNHWCSimple3x3VALID() { OpsTestNet net; // Add input data @@ -23,9 +23,12 @@ void TestNHWCSimple3x3VALID() { net.AddInputFromArray("Bias", {1}, {-0.1f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("InputImage") .Input("FilterImage") @@ -40,7 +43,8 @@ void TestNHWCSimple3x3VALID() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("FusedConv2D", "FusedConv2dTest") @@ -61,7 +65,7 @@ void TestNHWCSimple3x3VALID() { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.01); } -template +template void TestNHWCSimple3x3SAME() { OpsTestNet net; @@ -76,9 +80,12 @@ void TestNHWCSimple3x3SAME() { net.AddInputFromArray("Bias", {1}, {-0.1f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("InputImage") .Input("FilterImage") @@ -93,7 +100,8 @@ void TestNHWCSimple3x3SAME() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("FusedConv2D", "FusedConv2dTest") @@ -111,8 +119,7 @@ void TestNHWCSimple3x3SAME() { } auto expected = CreateTensor( - {1, 3, 3, 1}, - {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}); + {1, 3, 3, 1}, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.01); } @@ -127,7 +134,7 @@ TEST_F(FusedConv2dOpTest, OPENCLSimple) { TestNHWCSimple3x3SAME(); } -template +template void TestNHWCSimple3x3WithoutBias() { OpsTestNet net; @@ -141,8 +148,10 @@ void TestNHWCSimple3x3WithoutBias() { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("InputImage") @@ -156,7 +165,8 @@ void TestNHWCSimple3x3WithoutBias() { // Run net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("Input") @@ -186,7 +196,7 @@ TEST_F(FusedConv2dOpTest, OPENCLWithoutBias) { TestNHWCSimple3x3WithoutBias(); } -template +template void TestConv1x1() { // Construct graph OpsTestNet net; @@ -207,9 +217,12 @@ void TestConv1x1() { net.AddInputFromArray("Bias", {2}, {0.1f, 0.2f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("InputImage") @@ -223,7 +236,8 @@ void TestConv1x1() { // Run net.RunOp(D); - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("Input") @@ -251,15 +265,11 @@ void TestConv1x1() { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } -TEST_F(FusedConv2dOpTest, CPUConv1x1) { - TestConv1x1(); -} +TEST_F(FusedConv2dOpTest, CPUConv1x1) { TestConv1x1(); } -TEST_F(FusedConv2dOpTest, OPENCLConv1x1) { - TestConv1x1(); -} +TEST_F(FusedConv2dOpTest, OPENCLConv1x1) { TestConv1x1(); } -template +template static void TestComplexConvNxNS12(const std::vector &shape) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, @@ -298,9 +308,12 @@ static void TestComplexConvNxNS12(const std::vector &shape) { expected.Copy(*net.GetOutput("Output")); // run on gpu - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("InputImage") @@ -315,7 +328,8 @@ static void TestComplexConvNxNS12(const std::vector &shape) { // Run on device net.RunOp(D); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.001); }; @@ -331,7 +345,7 @@ TEST_F(FusedConv2dOpTest, OPENCLUnalignedConvNxNS12) { TestComplexConvNxNS12({107, 113, 5, 7}); } -template +template static void TestHalfComplexConvNxNS12(const std::vector &shape) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, @@ -357,15 +371,20 @@ static void TestHalfComplexConvNxNS12(const std::vector &shape) { .Finalize(net.NewOperatorDef()); std::vector float_input_data; - GenerateRandomRealTypeData({batch, height, width, input_channels}, float_input_data); + GenerateRandomRealTypeData({batch, height, width, input_channels}, + float_input_data); std::vector float_filter_data; - GenerateRandomRealTypeData({kernel_h, kernel_w, input_channels, output_channels}, float_filter_data); + GenerateRandomRealTypeData( + {kernel_h, kernel_w, input_channels, output_channels}, + float_filter_data); std::vector float_bias_data; GenerateRandomRealTypeData({output_channels}, float_bias_data); // Add input data - net.AddInputFromArray("Input", {batch, height, width, input_channels}, float_input_data); net.AddInputFromArray( - "Filter", {kernel_h, kernel_w, input_channels, output_channels}, float_filter_data); + "Input", {batch, height, width, input_channels}, float_input_data); + net.AddInputFromArray( + "Filter", {kernel_h, kernel_w, input_channels, output_channels}, + float_filter_data); net.AddInputFromArray("Bias", {output_channels}, float_bias_data); // run on cpu @@ -375,9 +394,12 @@ static void TestHalfComplexConvNxNS12(const std::vector &shape) { expected.Copy(*net.GetOutput("Output")); // run on gpu - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("InputImage") @@ -392,7 +414,8 @@ static void TestHalfComplexConvNxNS12(const std::vector &shape) { // Run on device net.RunOp(D); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.2); }; @@ -408,7 +431,7 @@ TEST_F(FusedConv2dOpTest, OPENCLHalfAlignedConvNxNS12) { TestHalfComplexConvNxNS12({32, 32, 32, 64}); } -template +template static void TestGeneralConvNxNS12(const std::vector &image_shape, const std::vector &filter_shape) { testing::internal::LogToStderr(); @@ -449,9 +472,12 @@ static void TestGeneralConvNxNS12(const std::vector &image_shape, expected.Copy(*net.GetOutput("Output")); // run on gpu - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("InputImage") @@ -466,7 +492,8 @@ static void TestGeneralConvNxNS12(const std::vector &image_shape, // Run on device net.RunOp(D); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.001); }; @@ -477,13 +504,11 @@ static void TestGeneralConvNxNS12(const std::vector &image_shape, } TEST_F(FusedConv2dOpTest, OPENCL7X7ConvNxNS12) { - TestGeneralConvNxNS12({32, 32}, - {7, 7, 3, 64}); + TestGeneralConvNxNS12({32, 32}, {7, 7, 3, 64}); } TEST_F(FusedConv2dOpTest, OPENCL15X1ConvNxNS12) { - TestGeneralConvNxNS12({40, 40}, - {15, 1, 32, 64}); + TestGeneralConvNxNS12({40, 40}, {15, 1, 32, 64}); } template diff --git a/mace/ops/global_avg_pooling.cc b/mace/ops/global_avg_pooling.cc index f495c71246b9763940d8169eba5b24bb984aada0..65fd7f43b8051971fac29818aeace86db6a3a98f 100644 --- a/mace/ops/global_avg_pooling.cc +++ b/mace/ops/global_avg_pooling.cc @@ -6,16 +6,20 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("GlobalAvgPooling") - .TypeConstraint("T") - .Build(), - GlobalAvgPoolingOp); +void Register_GlobalAvgPooling(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("GlobalAvgPooling") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + GlobalAvgPoolingOp); #if MACE_ENABLE_NEON -REGISTER_NEON_OPERATOR(OpKeyBuilder("GlobalAvgPooling") - .TypeConstraint("T") - .Build(), - GlobalAvgPoolingOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("GlobalAvgPooling") + .Device(DeviceType::NEON) + .TypeConstraint("T") + .Build(), + GlobalAvgPoolingOp); #endif // MACE_ENABLE_NEON +} } // namespace mace diff --git a/mace/ops/global_avg_pooling_benchmark.cc b/mace/ops/global_avg_pooling_benchmark.cc index 3638243fd067c55d3d36147c28187d2ec635410d..523ea924d692ead03169a633c119afbc5291f63f 100644 --- a/mace/ops/global_avg_pooling_benchmark.cc +++ b/mace/ops/global_avg_pooling_benchmark.cc @@ -22,7 +22,8 @@ static void GlobalAvgPooling( .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, channels, height, width}); + net.AddRandomInput("Input", + {batch, channels, height, width}); // Warm-up for (int i = 0; i < 5; ++i) { diff --git a/mace/ops/image_to_buffer.cc b/mace/ops/image_to_buffer.cc index bcf8b997b2b6da5620bdb340c785e47f37915b37..88835145333d5eb9933b3b23af5cb0ad61a22943 100644 --- a/mace/ops/image_to_buffer.cc +++ b/mace/ops/image_to_buffer.cc @@ -6,14 +6,18 @@ namespace mace { -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ImageToBuffer") - .TypeConstraint("T") - .Build(), - ImageToBufferOp); +void Register_ImageToBuffer(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("ImageToBuffer") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + ImageToBufferOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ImageToBuffer") - .TypeConstraint("T") - .Build(), - ImageToBufferOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("ImageToBuffer") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + ImageToBufferOp); +} } // namespace mace diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index e9f2fd048598de6edf4921ebee562d46036e73bb..b9fd14c9104938831e71ad293cf607248c9ffe41 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -10,9 +10,9 @@ #include "gtest/gtest.h" #include "mace/core/common.h" #include "mace/core/net.h" +#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/tensor.h" #include "mace/core/workspace.h" -#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/kernels/opencl/helper.h" #include "mace/utils/utils.h" @@ -56,7 +56,8 @@ class OpDefBuilder { return *this; } - OpDefBuilder AddIntsArg(const std::string &name, const std::vector &values) { + OpDefBuilder AddIntsArg(const std::string &name, + const std::vector &values) { auto arg = op_def_.add_arg(); arg->set_name(name); for (auto value : values) { @@ -65,7 +66,8 @@ class OpDefBuilder { return *this; } - OpDefBuilder AddFloatsArg(const std::string &name, const std::vector &values) { + OpDefBuilder AddFloatsArg(const std::string &name, + const std::vector &values) { auto arg = op_def_.add_arg(); arg->set_name(name); for (auto value : values) { @@ -75,7 +77,7 @@ class OpDefBuilder { } OpDefBuilder AddStringsArg(const std::string &name, - const std::vector &values) { + const std::vector &values) { auto arg = op_def_.add_arg(); arg->set_name(name); for (auto value : values) { @@ -94,7 +96,7 @@ class OpDefBuilder { class OpsTestNet { public: - OpsTestNet() {} + OpsTestNet() : op_registry_(new OperatorRegistry()) {}; template void AddInputFromArray(const std::string &name, @@ -135,10 +137,11 @@ class OpsTestNet { std::mt19937 gen(rd()); std::normal_distribution nd(0, 1); if (DataTypeToEnum::value == DT_HALF) { - std::generate(input_data, input_data + input->size(), - [&gen, &nd, positive] { - return half_float::half_cast(positive ? std::abs(nd(gen)) : nd(gen)); - }); + std::generate( + input_data, input_data + input->size(), [&gen, &nd, positive] { + return half_float::half_cast(positive ? std::abs(nd(gen)) + : nd(gen)); + }); } else { std::generate(input_data, input_data + input->size(), [&gen, &nd, positive] { @@ -160,7 +163,7 @@ class OpsTestNet { for (auto &op_def_ : op_defs_) { net_def.add_op()->CopyFrom(op_def_); } - net_ = CreateNet(net_def, &ws_, device); + net_ = CreateNet(op_registry_, net_def, &ws_, device); device_ = device; return net_->Run(); } @@ -182,6 +185,7 @@ class OpsTestNet { } public: + std::shared_ptr op_registry_; Workspace ws_; std::vector op_defs_; std::unique_ptr net_; @@ -211,7 +215,8 @@ void GenerateRandomRealTypeData(const std::vector &shape, res.resize(size); if (DataTypeToEnum::value == DT_HALF) { - std::generate(res.begin(), res.end(), [&gen, &nd] { return half_float::half_cast(nd(gen)); }); + std::generate(res.begin(), res.end(), + [&gen, &nd] { return half_float::half_cast(nd(gen)); }); } else { std::generate(res.begin(), res.end(), [&gen, &nd] { return nd(gen); }); } @@ -236,7 +241,8 @@ void GenerateRandomIntTypeData(const std::vector &shape, template unique_ptr CreateTensor(const std::vector &shape, const std::vector &data) { - unique_ptr res(new Tensor(GetDeviceAllocator(DeviceType::CPU), DataTypeToEnum::v())); + unique_ptr res( + new Tensor(GetDeviceAllocator(DeviceType::CPU), DataTypeToEnum::v())); res->Resize(shape); T *input_data = res->mutable_data(); memcpy(input_data, data.data(), data.size() * sizeof(T)); @@ -268,9 +274,9 @@ inline std::string ShapeToString(const Tensor &x) { template struct is_floating_point_type { - static const bool value = - std::is_same::value || std::is_same::value - || std::is_same::value; + static const bool value = std::is_same::value || + std::is_same::value || + std::is_same::value; }; template @@ -293,7 +299,9 @@ inline void AssertSameDims(const Tensor &x, const Tensor &y) { << "y.shape [ " << ShapeToString(y) << "]"; } -template ::value> +template ::value> struct Expector; // Partial specialization for float and double. @@ -343,7 +351,6 @@ struct Expector { } } } - }; template @@ -355,8 +362,8 @@ void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) { template void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) { - static_assert(is_floating_point_type::value - && is_floating_point_type::value, + static_assert(is_floating_point_type::value && + is_floating_point_type::value, "T is not a floating point type"); Expector::Near(x, y, abs_err); } diff --git a/mace/ops/pooling.cc b/mace/ops/pooling.cc index 3a467d1237c8508fade69d53162e2630fb48d83f..d372f242e74deda8df82335667c096d73d6f7228 100644 --- a/mace/ops/pooling.cc +++ b/mace/ops/pooling.cc @@ -6,29 +6,36 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("Pooling") - .TypeConstraint("T") - .Build(), - PoolingOp); -REGISTER_CPU_OPERATOR(OpKeyBuilder("Pooling") - .TypeConstraint("T") - .Build(), - PoolingOp); +void Register_Pooling(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + PoolingOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + PoolingOp); #if MACE_ENABLE_NEON -REGISTER_NEON_OPERATOR(OpKeyBuilder("Pooling") - .TypeConstraint("T") - .Build(), - PoolingOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling") + .Device(DeviceType::NEON) + .TypeConstraint("T") + .Build(), + PoolingOp); #endif // MACE_ENABLE_NEON -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Pooling") - .TypeConstraint("T") - .Build(), - PoolingOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Pooling") - .TypeConstraint("T") - .Build(), - PoolingOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + PoolingOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + PoolingOp); +} } // namespace mace diff --git a/mace/ops/pooling_benchmark.cc b/mace/ops/pooling_benchmark.cc index 479563ece6c82bd47f5a22bdbe3f801c5553582c..2a6580d82a0c20764438bc38d206377a7cd8cf1c 100644 --- a/mace/ops/pooling_benchmark.cc +++ b/mace/ops/pooling_benchmark.cc @@ -35,7 +35,8 @@ static void Pooling(int iters, .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, channels, height, width}); + net.AddRandomInput("Input", + {batch, channels, height, width}); // Warm-up for (int i = 0; i < 5; ++i) { diff --git a/mace/ops/pooling_test.cc b/mace/ops/pooling_test.cc index 1e1a992ab620cc6ea377191d3fe3a215d822b943..c802c126667027513ef0337e339c5dc007fd4282 100644 --- a/mace/ops/pooling_test.cc +++ b/mace/ops/pooling_test.cc @@ -29,7 +29,7 @@ TEST_F(PoolingOpTest, MAX_VALID) { // Add input data net.AddInputFromArray( "Input", {1, 4, 4, 2}, - {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); // Run @@ -42,7 +42,6 @@ TEST_F(PoolingOpTest, MAX_VALID) { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } - TEST_F(PoolingOpTest, MAX_SAME) { // Construct graph OpsTestNet net; @@ -122,7 +121,7 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } -template +template static void SimpleMaxPooling3S2() { // Construct graph OpsTestNet net; @@ -130,11 +129,12 @@ static void SimpleMaxPooling3S2() { // Add input data net.AddInputFromArray( "Input", {1, 3, 9, 1}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("Pooling", "PoolingTest") .Input("InputImage") .Output("OutputImage") @@ -145,7 +145,8 @@ static void SimpleMaxPooling3S2() { .AddIntsArg("dilations", {1, 1}) .Finalize(net.NewOperatorDef()); net.RunOp(D); - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { // Run OpDefBuilder("Pooling", "PoolingTest") @@ -166,15 +167,13 @@ static void SimpleMaxPooling3S2() { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } -TEST_F(PoolingOpTest, CPUSimpleMaxPooling3S2) { - SimpleMaxPooling3S2(); -} +TEST_F(PoolingOpTest, CPUSimpleMaxPooling3S2) { SimpleMaxPooling3S2(); } TEST_F(PoolingOpTest, OPENCLSimpleMaxPooling3S2) { SimpleMaxPooling3S2(); } -template +template static void MaxPooling3S2(const std::vector &input_shape, const std::vector strides, Padding padding) { @@ -211,13 +210,14 @@ static void MaxPooling3S2(const std::vector &input_shape, .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); net.RunOp(D); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.001); } // TODO(chenghui) : there is a bug. -//TEST_F(PoolingOpTest, NEONAlignedMaxPooling3S2) { +// TEST_F(PoolingOpTest, NEONAlignedMaxPooling3S2) { // AlignedMaxPooling3S2(Padding::VALID); // AlignedMaxPooling3S2(Padding::SAME); //} @@ -259,7 +259,7 @@ TEST_F(PoolingOpTest, AVG_VALID) { // Add input data net.AddInputFromArray( "Input", {1, 4, 4, 2}, - {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); // Run @@ -272,7 +272,7 @@ TEST_F(PoolingOpTest, AVG_VALID) { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } -template +template static void SimpleAvgPoolingTest() { // Construct graph OpsTestNet net; @@ -282,7 +282,8 @@ static void SimpleAvgPoolingTest() { "Input", {1, 2, 8, 1}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("Pooling", "PoolingTest") .Input("InputImage") .Output("OutputImage") @@ -294,7 +295,8 @@ static void SimpleAvgPoolingTest() { .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); // Check auto expected = CreateTensor({1, 1, 4, 1}, {4.5, 6.5, 8.5, 10.5}); @@ -306,11 +308,11 @@ TEST_F(PoolingOpTest, OPENCLSimpleAvgPooling) { SimpleAvgPoolingTest(); } -template +template static void AvgPoolingTest(const std::vector &shape, - const std::vector &kernels, - const std::vector &strides, - Padding padding) { + const std::vector &kernels, + const std::vector &strides, + Padding padding) { // Construct graph OpsTestNet net; OpDefBuilder("Pooling", "PoolingTest") @@ -343,38 +345,49 @@ static void AvgPoolingTest(const std::vector &shape, .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); net.RunOp(D); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.01); } TEST_F(PoolingOpTest, OPENCLAlignedAvgPooling) { - AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::VALID); - AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::SAME); + AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, + Padding::VALID); + AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, + Padding::SAME); } TEST_F(PoolingOpTest, OPENCLHalfAlignedAvgPooling) { - AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::VALID); + AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, + Padding::VALID); AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::SAME); } TEST_F(PoolingOpTest, OPENCLAlignedLargeKernelAvgPooling) { - AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, Padding::VALID); - AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, Padding::SAME); + AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, + Padding::VALID); + AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, + Padding::SAME); } TEST_F(PoolingOpTest, OPENCLHalfAlignedLargeKernelAvgPooling) { - AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, Padding::VALID); - AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, Padding::SAME); + AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, + Padding::VALID); + AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, + Padding::SAME); } TEST_F(PoolingOpTest, OPENCLUnAlignedAvgPooling) { - AvgPoolingTest({3, 31, 37, 128}, {2, 2}, {2, 2}, Padding::VALID); - AvgPoolingTest({3, 31, 37, 128}, {2, 2}, {2, 2}, Padding::SAME); + AvgPoolingTest({3, 31, 37, 128}, {2, 2}, {2, 2}, + Padding::VALID); + AvgPoolingTest({3, 31, 37, 128}, {2, 2}, {2, 2}, + Padding::SAME); } TEST_F(PoolingOpTest, OPENCLUnAlignedLargeKernelAvgPooling) { - AvgPoolingTest({3, 31, 37, 128}, {8, 8}, {8, 8}, Padding::VALID); - AvgPoolingTest({3, 31, 37, 128}, {8, 8}, {8, 8}, Padding::SAME); + AvgPoolingTest({3, 31, 37, 128}, {8, 8}, {8, 8}, + Padding::VALID); + AvgPoolingTest({3, 31, 37, 128}, {8, 8}, {8, 8}, + Padding::SAME); } - diff --git a/mace/ops/relu.cc b/mace/ops/relu.cc index 0197e65cf6297f8addd0dc3acb5bf07425b6a1c7..f9f7b3be9b080fc739af959ef0bc469a0f17cc45 100644 --- a/mace/ops/relu.cc +++ b/mace/ops/relu.cc @@ -6,26 +6,32 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("Relu") - .TypeConstraint("T") - .Build(), - ReluOp); +void Register_Relu(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Relu") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + ReluOp); #if MACE_ENABLE_NEON -REGISTER_NEON_OPERATOR(OpKeyBuilder("Relu") - .TypeConstraint("T") - .Build(), - ReluOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Relu") + .Device(DeviceType::NEON) + .TypeConstraint("T") + .Build(), + ReluOp); #endif // MACE_ENABLE_NEON -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Relu") - .TypeConstraint("T") - .Build(), - ReluOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Relu") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + ReluOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Relu") - .TypeConstraint("T") - .Build(), - ReluOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Relu") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + ReluOp); +} } // namespace mace diff --git a/mace/ops/relu_benchmark.cc b/mace/ops/relu_benchmark.cc index c68009c9cb1c6bd29806078c69ab47ff005689fa..1b5d36245a027b3a9c4fe84f1c7cbc85a2050415 100644 --- a/mace/ops/relu_benchmark.cc +++ b/mace/ops/relu_benchmark.cc @@ -19,7 +19,8 @@ static void ReluBenchmark( net.AddRandomInput("Input", {batch, height, width, channels}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("Relu", "ReluBM") .Input("InputImage") @@ -54,9 +55,9 @@ static void ReluBenchmark( } \ BENCHMARK(BM_RELU_##N##C##H##W##_##TYPE##_##DEVICE) -#define BM_RELU(N, C, H, W, TYPE) \ - BM_RELU_MACRO(N, C, H, W, TYPE, CPU); \ - BM_RELU_MACRO(N, C, H, W, TYPE, NEON);\ +#define BM_RELU(N, C, H, W, TYPE) \ + BM_RELU_MACRO(N, C, H, W, TYPE, CPU); \ + BM_RELU_MACRO(N, C, H, W, TYPE, NEON); \ BM_RELU_MACRO(N, C, H, W, TYPE, OPENCL); BM_RELU(1, 1, 512, 512, float); diff --git a/mace/ops/relu_test.cc b/mace/ops/relu_test.cc index e74b927ef98a814c6bf0548909b345dae273acc2..e2a59a231472bdd872d6d3dabdd254e51717db7f 100644 --- a/mace/ops/relu_test.cc +++ b/mace/ops/relu_test.cc @@ -14,13 +14,13 @@ void TestSimple() { OpsTestNet net; // Add input data - net.AddInputFromArray("Input", - {2, 2, 2, 2}, - {-7, 7, -6, 6, -5, 5, -4, 4, - -3, 3, -2, 2, -1, 1, 0, 0}); + net.AddInputFromArray( + "Input", {2, 2, 2, 2}, + {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("Relu", "ReluTest") .Input("InputImage") @@ -31,7 +31,8 @@ void TestSimple() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("Relu", "ReluTest") .Input("Input") @@ -42,38 +43,30 @@ void TestSimple() { net.RunOp(D); } - auto expected = CreateTensor({2, 2, 2, 2}, - {0, 7, 0, 6, 0, 5, 0, 4, - 0, 3, 0, 2, 0, 1, 0, 0}); + auto expected = CreateTensor( + {2, 2, 2, 2}, {0, 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } -TEST_F(ReluOpTest, CPUSimple) { - TestSimple(); -} +TEST_F(ReluOpTest, CPUSimple) { TestSimple(); } #if __ARM_NEON -TEST_F(ReluOpTest, NEONSimple) { - TestSimple(); -} +TEST_F(ReluOpTest, NEONSimple) { TestSimple(); } #endif -TEST_F(ReluOpTest, OPENCLSimple) { - TestSimple(); -} +TEST_F(ReluOpTest, OPENCLSimple) { TestSimple(); } template void TestUnalignedSimple() { OpsTestNet net; // Add input data - net.AddInputFromArray("Input", - {1, 3, 2, 1}, - {-7, 7, -6, 6, -5, 5}); + net.AddInputFromArray("Input", {1, 3, 2, 1}, {-7, 7, -6, 6, -5, 5}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("Relu", "ReluTest") .Input("InputImage") @@ -84,7 +77,8 @@ void TestUnalignedSimple() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("Relu", "ReluTest") .Input("Input") @@ -95,8 +89,7 @@ void TestUnalignedSimple() { net.RunOp(D); } - auto expected = CreateTensor({1, 3, 2, 1}, - {0, 7, 0, 6, 0, 5}); + auto expected = CreateTensor({1, 3, 2, 1}, {0, 7, 0, 6, 0, 5}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } @@ -120,13 +113,13 @@ void TestSimpleReluX() { OpsTestNet net; // Add input data - net.AddInputFromArray("Input", - {2, 2, 2, 2}, - {-7, 7, -6, 6, -5, 5, -4, 4, - -3, 3, -2, 2, -1, 1, 0, 0}); + net.AddInputFromArray( + "Input", {2, 2, 2, 2}, + {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("Relu", "ReluTest") .Input("InputImage") @@ -138,7 +131,8 @@ void TestSimpleReluX() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("Relu", "ReluTest") .Input("Input") @@ -150,38 +144,31 @@ void TestSimpleReluX() { net.RunOp(D); } - auto expected = CreateTensor({2, 2, 2, 2}, - {0, 6, 0, 6, 0, 5, 0, 4, - 0, 3, 0, 2, 0, 1, 0, 0}); + auto expected = CreateTensor( + {2, 2, 2, 2}, {0, 6, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } -TEST_F(ReluOpTest, CPUSimpleReluX) { - TestSimpleReluX(); -} +TEST_F(ReluOpTest, CPUSimpleReluX) { TestSimpleReluX(); } #if __ARM_NEON -TEST_F(ReluOpTest, NEONSimpleReluX) { - TestSimpleReluX(); -} +TEST_F(ReluOpTest, NEONSimpleReluX) { TestSimpleReluX(); } #endif -TEST_F(ReluOpTest, OPENCLSimpleReluX) { - TestSimpleReluX(); -} +TEST_F(ReluOpTest, OPENCLSimpleReluX) { TestSimpleReluX(); } template void TestUnalignedSimpleReluX() { OpsTestNet net; // Add input data - net.AddInputFromArray("Input", - {1, 1, 7, 1}, + net.AddInputFromArray("Input", {1, 1, 7, 1}, {-7, 7, -6, 6, -5, 5, -4}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("Relu", "ReluTest") .Input("InputImage") @@ -193,7 +180,8 @@ void TestUnalignedSimpleReluX() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("Relu", "ReluTest") .Input("Input") @@ -205,8 +193,7 @@ void TestUnalignedSimpleReluX() { net.RunOp(D); } - auto expected = CreateTensor({1, 1, 7, 1}, - {0, 6, 0, 6, 0, 5, 0}); + auto expected = CreateTensor({1, 1, 7, 1}, {0, 6, 0, 6, 0, 5, 0}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } diff --git a/mace/ops/resize_bilinear.cc b/mace/ops/resize_bilinear.cc index 89f460fec98adda501aca49388badc4b67da3db7..b44f462b1aaae066b84280de038a0a33a95c9970 100644 --- a/mace/ops/resize_bilinear.cc +++ b/mace/ops/resize_bilinear.cc @@ -6,26 +6,32 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("ResizeBilinear") - .TypeConstraint("T") - .Build(), - ResizeBilinearOp); +void Register_ResizeBilinear(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + ResizeBilinearOp); #if MACE_ENABLE_NEON -REGISTER_NEON_OPERATOR(OpKeyBuilder("ResizeBilinear") - .TypeConstraint("T") - .Build(), - ResizeBilinearOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear") + .Device(DeviceType::NEON) + .TypeConstraint("T") + .Build(), + ResizeBilinearOp); #endif // MACE_ENABLE_NEON -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ResizeBilinear") - .TypeConstraint("T") - .Build(), - ResizeBilinearOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + ResizeBilinearOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ResizeBilinear") - .TypeConstraint("T") - .Build(), - ResizeBilinearOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + ResizeBilinearOp); +} } // namespace mace diff --git a/mace/ops/resize_bilinear_benchmark.cc b/mace/ops/resize_bilinear_benchmark.cc index 9a51b03caefe335a3cf1f98735076313f86e6060..46b9612394c9269aae98e402a41546d51cdc582e 100644 --- a/mace/ops/resize_bilinear_benchmark.cc +++ b/mace/ops/resize_bilinear_benchmark.cc @@ -26,22 +26,23 @@ static void ResizeBilinearBenchmark(int iters, net.AddInputFromArray("OutSize", {2}, {output_height, output_width}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("ResizeBilinear", "ResizeBilinearBenchmark") - .Input("InputImage") - .Input("OutSize") - .Output("OutputImage") - .AddIntsArg("size", {output_height, output_width}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("OutSize") + .Output("OutputImage") + .AddIntsArg("size", {output_height, output_width}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); } else { OpDefBuilder("ResizeBilinear", "ResizeBilinearBenchmark") - .Input("Input") - .Input("OutSize") - .Output("Output") - .AddIntsArg("size", {output_height, output_width}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("Input") + .Input("OutSize") + .Output("Output") + .AddIntsArg("size", {output_height, output_width}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); } // Warm-up @@ -68,8 +69,8 @@ static void ResizeBilinearBenchmark(int iters, BENCHMARK( \ BM_RESIZE_BILINEAR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_##DEVICE) -#define BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1, TYPE) \ - BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, CPU); \ +#define BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1, TYPE) \ + BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, CPU); \ BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, OPENCL); // SNPE 835 GPU: 6870us diff --git a/mace/ops/resize_bilinear_test.cc b/mace/ops/resize_bilinear_test.cc index 8d7f2d5579e24c8cf097efd46ee2c1b493f1b147..06b715a0b8ff581a17f664f8e780668b63efdc56 100644 --- a/mace/ops/resize_bilinear_test.cc +++ b/mace/ops/resize_bilinear_test.cc @@ -80,29 +80,31 @@ void TestRandomResizeBilinear() { {batch, in_height, in_width, channels}); OpDefBuilder("ResizeBilinear", "ResizeBilinearTest") - .Input("Input") - .Output("Output") - .AddIntArg("align_corners", align_corners) - .AddIntsArg("size", {height, width}) - .Finalize(net.NewOperatorDef()); + .Input("Input") + .Output("Output") + .AddIntArg("align_corners", align_corners) + .AddIntsArg("size", {height, width}) + .Finalize(net.NewOperatorDef()); // Run on CPU net.RunOp(DeviceType::CPU); Tensor expected; expected.Copy(*net.GetOutput("Output")); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("ResizeBilinear", "ResizeBilinearTest") - .Input("InputImage") - .Output("OutputImage") - .AddIntArg("align_corners", align_corners) - .AddIntsArg("size", {height, width}) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Output("OutputImage") + .AddIntArg("align_corners", align_corners) + .AddIntsArg("size", {height, width}) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); - ImageToBuffer(net, "OutputImage", "DeviceOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "DeviceOutput", + kernels::BufferType::IN_OUT); } else { // TODO support NEON } diff --git a/mace/ops/space_to_batch.cc b/mace/ops/space_to_batch.cc index 9b24f591d7e812262c4bfbb6ee9fac07cb2b4b3c..0e5c293dd8cf7cd9a032341c173107e38d26fc5e 100644 --- a/mace/ops/space_to_batch.cc +++ b/mace/ops/space_to_batch.cc @@ -6,13 +6,17 @@ namespace mace { -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("SpaceToBatchND") - .TypeConstraint("T") - .Build(), - SpaceToBatchNDOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("SpaceToBatchND") - .TypeConstraint("T") - .Build(), - SpaceToBatchNDOp); +void Register_SpaceToBatchND(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToBatchND") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + SpaceToBatchNDOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToBatchND") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + SpaceToBatchNDOp); +} } // namespace mace diff --git a/mace/ops/space_to_batch_benchmark.cc b/mace/ops/space_to_batch_benchmark.cc index 9afa88b98e2fc22a66f8779e980122c69f3d0f20..a2fea8dc9fd9c87eee7cabc4b4c332284e85c466 100644 --- a/mace/ops/space_to_batch_benchmark.cc +++ b/mace/ops/space_to_batch_benchmark.cc @@ -15,7 +15,8 @@ static void BMSpaceToBatch( OpsTestNet net; net.AddRandomInput("Input", {batch, height, width, channels}); - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest") .Input("InputImage") .Output("OutputImage") @@ -36,17 +37,19 @@ static void BMSpaceToBatch( net.Sync(); } -#define BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, DEVICE) \ - static void BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ - BMSpaceToBatch(iters, N, H, W, C, SHAPE); \ - } \ - BENCHMARK(BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE) - -#define BM_SPACE_TO_BATCH(N, H, W, C, SHAPE, TYPE) \ +#define BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, DEVICE) \ + static void \ + BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::ItemsProcessed(tot); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + BMSpaceToBatch(iters, N, H, W, C, SHAPE); \ + } \ + BENCHMARK( \ + BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE) + +#define BM_SPACE_TO_BATCH(N, H, W, C, SHAPE, TYPE) \ BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, OPENCL); BM_SPACE_TO_BATCH(128, 16, 16, 128, 2, float); diff --git a/mace/ops/space_to_batch_test.cc b/mace/ops/space_to_batch_test.cc index 4c1dbbdcd5a60c9d17a131888755d6fb12a1630f..bebbafeff042a85c2ae3d1d2581cdd584544df6d 100644 --- a/mace/ops/space_to_batch_test.cc +++ b/mace/ops/space_to_batch_test.cc @@ -2,23 +2,23 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // +#include #include "gtest/gtest.h" #include "mace/ops/ops_test_util.h" -#include using namespace mace; -template +template void RunSpaceToBatch(const std::vector &input_shape, const std::vector &input_data, const std::vector &block_shape_data, const std::vector &padding_data, const Tensor *expected) { OpsTestNet net; - net.AddInputFromArray( - "Input", input_shape, input_data); + net.AddInputFromArray("Input", input_shape, input_data); - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest") .Input("InputImage") .Output("OutputImage") @@ -29,12 +29,13 @@ void RunSpaceToBatch(const std::vector &input_shape, // Run net.RunOp(D); - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); // Check ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-8); } -template +template void RunBatchToSpace(const std::vector &input_shape, const std::vector &input_data, const std::vector &block_shape_data, @@ -42,10 +43,10 @@ void RunBatchToSpace(const std::vector &input_shape, const Tensor *expected) { OpsTestNet net; // Add input data - net.AddInputFromArray( - "Input", input_shape, input_data); + net.AddInputFromArray("Input", input_shape, input_data); - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest") .Input("InputImage") .Output("OutputImage") @@ -56,33 +57,33 @@ void RunBatchToSpace(const std::vector &input_shape, // Run net.RunOp(D); - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); // Check ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-8); } -template +template void TestBidirectionalTransform(const std::vector &space_shape, const std::vector &space_data, const std::vector &block_data, const std::vector &padding_data, const std::vector &batch_shape, const std::vector &batch_data) { - - auto space_tensor = unique_ptr(new Tensor(GetDeviceAllocator(DeviceType::OPENCL), - DataTypeToEnum::v())); + auto space_tensor = unique_ptr(new Tensor( + GetDeviceAllocator(DeviceType::OPENCL), DataTypeToEnum::v())); space_tensor->Resize(space_shape); { Tensor::MappingGuard space_mapper(space_tensor.get()); T *space_ptr = space_tensor->mutable_data(); MACE_CHECK(static_cast(space_tensor->size()) == space_data.size()) - << "Space tensor size:" << space_tensor->size() - << ", space data size:" << space_data.size(); + << "Space tensor size:" << space_tensor->size() + << ", space data size:" << space_data.size(); memcpy(space_ptr, space_data.data(), space_data.size() * sizeof(T)); } - auto batch_tensor = unique_ptr(new Tensor(GetDeviceAllocator(DeviceType::OPENCL), - DataTypeToEnum::v())); + auto batch_tensor = unique_ptr(new Tensor( + GetDeviceAllocator(DeviceType::OPENCL), DataTypeToEnum::v())); batch_tensor->Resize(batch_shape); { Tensor::MappingGuard batch_mapper(batch_tensor.get()); @@ -91,113 +92,81 @@ void TestBidirectionalTransform(const std::vector &space_shape, memcpy(batch_ptr, batch_data.data(), batch_data.size() * sizeof(T)); } - RunSpaceToBatch(space_shape, space_data, - block_data, - padding_data, - batch_tensor.get()); + RunSpaceToBatch(space_shape, space_data, block_data, + padding_data, batch_tensor.get()); - RunBatchToSpace(batch_shape, batch_data, - block_data, - padding_data, - space_tensor.get()); + RunBatchToSpace(batch_shape, batch_data, block_data, + padding_data, space_tensor.get()); } TEST(SpaceToBatchTest, SmallData) { - TestBidirectionalTransform({1, 2, 2, 1}, - {1, 2, 3, 4}, - {2, 2}, - {0, 0, 0, 0}, - {4, 1, 1, 1}, - {1, 2, 3, 4} - ); + TestBidirectionalTransform({1, 2, 2, 1}, {1, 2, 3, 4}, {2, 2}, + {0, 0, 0, 0}, {4, 1, 1, 1}, {1, 2, 3, 4}); } TEST(SpaceToBatchTest, SmallDataWithOnePadding) { - TestBidirectionalTransform({1, 2, 2, 1}, - {1, 2, 3, 4}, - {3, 3}, - {1, 0, 1, 0}, - {9, 1, 1, 1}, - {0, 0, 0, 0, 1, 2, 0, 3, 4} - ); + TestBidirectionalTransform({1, 2, 2, 1}, {1, 2, 3, 4}, {3, 3}, + {1, 0, 1, 0}, {9, 1, 1, 1}, + {0, 0, 0, 0, 1, 2, 0, 3, 4}); } TEST(SpaceToBatchTest, SmallDataWithTwoPadding) { - TestBidirectionalTransform({1, 2, 2, 1}, - {1, 2, 3, 4}, - {2, 2}, - {1, 1, 1, 1}, - {4, 2, 2, 1}, - {0, 0, 0, 4, 0, 0, 3, 0, 0, 2, 0, 0, 1, 0, 0, 0} - ); + TestBidirectionalTransform( + {1, 2, 2, 1}, {1, 2, 3, 4}, {2, 2}, {1, 1, 1, 1}, {4, 2, 2, 1}, + {0, 0, 0, 4, 0, 0, 3, 0, 0, 2, 0, 0, 1, 0, 0, 0}); } TEST(SpaceToBatchTest, SmallDataWithLargeImage) { - TestBidirectionalTransform({1, 2, 10, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, - {2, 2}, - {0, 0, 0, 0}, - {4, 1, 5, 1}, - {1, 3, 5, 7, 9, - 2, 4, 6, 8, 10, - 11, 13, 15, 17, 19, - 12, 14, 16, 18, 20} - ); + TestBidirectionalTransform( + {1, 2, 10, 1}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, + {2, 2}, {0, 0, 0, 0}, {4, 1, 5, 1}, + {1, 3, 5, 7, 9, 2, 4, 6, 8, 10, 11, 13, 15, 17, 19, 12, 14, 16, 18, 20}); } TEST(SpaceToBatchTest, MultiChannelData) { - TestBidirectionalTransform({1, 2, 2, 3}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, - {2, 2}, - {0, 0, 0, 0}, - {4, 1, 1, 3}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12} - ); + TestBidirectionalTransform( + {1, 2, 2, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, {2, 2}, + {0, 0, 0, 0}, {4, 1, 1, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); } TEST(SpaceToBatchTest, LargerMultiChannelData) { - TestBidirectionalTransform({1, 4, 4, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, - {2, 2}, - {0, 0, 0, 0}, - {4, 2, 2, 1}, - {1, 3, 9, 11, 2, 4, 10, 12, 5, 7, 13, 15, 6, 8, 14, 16} - ); + TestBidirectionalTransform( + {1, 4, 4, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + {2, 2}, {0, 0, 0, 0}, {4, 2, 2, 1}, + {1, 3, 9, 11, 2, 4, 10, 12, 5, 7, 13, 15, 6, 8, 14, 16}); } TEST(SpaceToBatchTest, MultiBatchData) { - TestBidirectionalTransform({2, 2, 4, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, - {2, 2}, - {0, 0, 0, 0}, - {8, 1, 2, 1}, - {1, 3, 2, 4, 5, 7, 6, 8, 9, 11, 10, 12, 13, 15, 14, 16} - ); + TestBidirectionalTransform( + {2, 2, 4, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + {2, 2}, {0, 0, 0, 0}, {8, 1, 2, 1}, + {1, 3, 2, 4, 5, 7, 6, 8, 9, 11, 10, 12, 13, 15, 14, 16}); } TEST(SpaceToBatchTest, MultiBatchAndChannelData) { - TestBidirectionalTransform({2, 2, 4, 2}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, - {2, 2}, - {0, 0, 0, 0}, - {8, 1, 2, 2}, - {1, 2, 5, 6, 3, 4, 7, 8, 9, 10, 13, 14, 11, 12, 15, 16, - 17, 18, 21, 22, 19, 20, 23, 24, 25, 26, 29, 30, 27, 28, 31, 32} - ); + TestBidirectionalTransform( + {2, 2, 4, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, + {2, 2}, {0, 0, 0, 0}, {8, 1, 2, 2}, + {1, 2, 5, 6, 3, 4, 7, 8, 9, 10, 13, 14, 11, 12, 15, 16, + 17, 18, 21, 22, 19, 20, 23, 24, 25, 26, 29, 30, 27, 28, 31, 32}); } -//TEST(SpaceTobatchTest, CompareTF) { +// TEST(SpaceTobatchTest, CompareTF) { // // const std::string space_file = "/data/local/tmp/test/input"; // const std::string batch_file = "/data/local/tmp/test/output"; // const std::vector space_shape = {1, 256, 256, 32}; -// const int space_size = std::accumulate(space_shape.begin(), space_shape.end(), 1, std::multiplies()); +// const int space_size = std::accumulate(space_shape.begin(), +// space_shape.end(), 1, std::multiplies()); // const std::vector batch_shape = {4, 130, 130, 32}; -// const int batch_size = std::accumulate(batch_shape.begin(), batch_shape.end(), 1, std::multiplies()); +// const int batch_size = std::accumulate(batch_shape.begin(), +// batch_shape.end(), 1, std::multiplies()); // -// auto space_tensor = unique_ptr(new Tensor(GetDeviceAllocator(DeviceType::OPENCL), +// auto space_tensor = unique_ptr(new +// Tensor(GetDeviceAllocator(DeviceType::OPENCL), // DataTypeToEnum::v())); // space_tensor->Resize(space_shape); // std::vector space_data(space_size, 0.0); @@ -216,7 +185,8 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) { // VLOG(0) << "open space file failed"; // } // -// auto batch_tensor = unique_ptr(new Tensor(GetDeviceAllocator(DeviceType::OPENCL), +// auto batch_tensor = unique_ptr(new +// Tensor(GetDeviceAllocator(DeviceType::OPENCL), // DataTypeToEnum::v())); // std::vector batch_data(batch_size, 0.0); // batch_tensor->Resize(batch_shape); @@ -231,7 +201,8 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) { // } // Tensor::MappingGuard batch_mapper(batch_tensor.get()); // float *batch_ptr = batch_tensor->mutable_data(); -// MACE_CHECK(static_cast(batch_tensor->size()) == batch_data.size()); +// MACE_CHECK(static_cast(batch_tensor->size()) == +// batch_data.size()); // memcpy(batch_ptr, batch_data.data(), batch_data.size() * sizeof(float)); // } // @@ -245,4 +216,3 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) { // {2, 2, 2, 2}, // space_tensor.get()); //} - diff --git a/mace/python/tools/model.template b/mace/python/tools/model.template index 6588d9a2c4492e962b3310cf33fad7bef7a8513a..5989b86fcf5960200fa50449a06d923ade1bb093 100644 --- a/mace/python/tools/model.template +++ b/mace/python/tools/model.template @@ -27,12 +27,12 @@ void Create{{tensor.name}}(std::vector &tensors) { #include "mace/core/public/mace.h" namespace { -static void UpdateOp(mace::OperatorDef &op, - const std::string &name, - const std::string &type, - const std::vector &inputs, - const std::vector &outputs, - const std::vector &output_types) { +void UpdateOp(mace::OperatorDef &op, + const std::string &name, + const std::string &type, + const std::vector &inputs, + const std::vector &outputs, + const std::vector &output_types) { op.set_name(name); op.set_type(type); op.set_input(inputs); diff --git a/tools/bazel-adb-run.sh b/tools/bazel-adb-run.sh index 116e64dbfa1dd41faa5222b829604cc17a50f2ed..91347fb3e521f044a4617e93c1804cd73c1458d7 100755 --- a/tools/bazel-adb-run.sh +++ b/tools/bazel-adb-run.sh @@ -17,9 +17,8 @@ BAZEL_BIN_PATH=${BAZEL_BIN_PATH#//} BAZEL_BIN_PATH=bazel-bin/$BAZEL_BIN_PATH BIN_NAME=`echo $BAZEL_TARGET | cut -d: -f2` -ANDROID_ABI=armeabi-v7a ANDROID_ABI=arm64-v8a -STRIP="" +ANDROID_ABI=armeabi-v7a STRIP="--strip always" VLOG_LEVEL=0 PROFILINE="--define profiling=true" @@ -31,7 +30,7 @@ bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET \ --crosstool_top=//external:android/crosstool \ --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ --cpu=$ANDROID_ABI \ - --define neon=true + --define neon=false if [ $? -ne 0 ]; then exit 1