From baf2dcd13f4baf8f374cc7c9c889f904b93e7836 Mon Sep 17 00:00:00 2001 From: Liangliang He Date: Thu, 28 Dec 2017 10:45:21 +0800 Subject: [PATCH] Resolve operator and allocator registering static variable issue --- mace/core/BUILD | 23 +-- mace/core/allocator.cc | 2 + mace/core/mace.cc | 9 +- mace/core/net.cc | 43 ++--- mace/core/net.h | 28 ++-- mace/core/operator.cc | 108 +++++++------ mace/core/operator.h | 82 +++------- mace/core/public/mace.h | 5 +- mace/core/registry.h | 37 ++--- mace/core/runtime/opencl/opencl_allocator.cc | 2 - mace/core/tensor.h | 3 +- mace/examples/BUILD | 1 - mace/kernels/BUILD | 1 - mace/ops/addn.cc | 38 +++-- mace/ops/addn_benchmark.cc | 12 +- mace/ops/batch_norm.cc | 38 +++-- mace/ops/batch_norm_benchmark.cc | 24 +-- mace/ops/batch_norm_test.cc | 120 ++++++++------ mace/ops/batch_to_space.cc | 20 ++- mace/ops/batch_to_space_benchmark.cc | 20 +-- mace/ops/bias_add.cc | 46 +++--- mace/ops/bias_add_benchmark.cc | 22 +-- mace/ops/bias_add_test.cc | 47 +++--- mace/ops/buffer_to_image.cc | 20 ++- mace/ops/channel_shuffle.cc | 11 +- mace/ops/channel_shuffle_benchmark.cc | 3 +- mace/ops/channel_shuffle_test.cc | 1 - mace/ops/concat.cc | 39 +++-- mace/ops/concat_benchmark.cc | 9 +- mace/ops/concat_test.cc | 44 +++-- mace/ops/conv_2d.cc | 51 +++--- mace/ops/conv_2d_benchmark.cc | 58 +++---- mace/ops/conv_2d_test.cc | 141 +++++++++------- mace/ops/core_test.cc | 19 ++- mace/ops/depthwise_conv2d.cc | 29 ++-- mace/ops/depthwise_conv2d_test.cc | 14 +- mace/ops/depthwise_conv_2d_benchmark.cc | 31 ++-- mace/ops/fused_conv_2d.cc | 45 +++--- mace/ops/fused_conv_2d_test.cc | 125 ++++++++------ mace/ops/global_avg_pooling.cc | 20 ++- mace/ops/global_avg_pooling_benchmark.cc | 3 +- mace/ops/image_to_buffer.cc | 20 ++- mace/ops/ops_test_util.h | 45 +++--- mace/ops/pooling.cc | 47 +++--- mace/ops/pooling_benchmark.cc | 3 +- mace/ops/pooling_test.cc | 79 +++++---- mace/ops/relu.cc | 38 +++-- mace/ops/relu_benchmark.cc | 9 +- mace/ops/relu_test.cc | 85 +++++----- mace/ops/resize_bilinear.cc | 38 +++-- mace/ops/resize_bilinear_benchmark.cc | 31 ++-- mace/ops/resize_bilinear_test.cc | 26 +-- mace/ops/space_to_batch.cc | 20 ++- mace/ops/space_to_batch_benchmark.cc | 27 ++-- mace/ops/space_to_batch_test.cc | 162 ++++++++----------- mace/python/tools/model.template | 12 +- tools/bazel-adb-run.sh | 5 +- 57 files changed, 1090 insertions(+), 951 deletions(-) diff --git a/mace/core/BUILD b/mace/core/BUILD index e1334527..252982f0 100644 --- a/mace/core/BUILD +++ b/mace/core/BUILD @@ -10,38 +10,27 @@ licenses(["notice"]) # Apache 2.0 load("//mace:mace.bzl", "if_android", "if_profiling_enabled", "if_embed_binary_program") cc_library( - name = "opencl_runtime", + name = "core", srcs = glob([ + "*.cc", "runtime/opencl/*.cc", ]), hdrs = glob([ + "*.h", + "public/*.h", "runtime/opencl/cl2.hpp", "runtime/opencl/*.h", ]), copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"] + if_profiling_enabled(["-DMACE_OPENCL_PROFILING"]) + if_embed_binary_program(["-DMACE_EMBED_BINARY_PROGRAM"]), - linkopts = ["-ldl"], + linkopts = if_android(["-pie", "-ldl"]), deps = [ - ":core", + "//mace/utils:utils_hdrs", "//mace/utils:logging", "//mace/utils:tuner", "@opencl_headers//:opencl20_headers", ], - alwayslink = 1, -) - - -cc_library( - name = "core", - srcs = glob(["*.cc"]), - hdrs = glob(["*.h", "public/*.h"]), - copts = ["-std=c++11", "-D_GLIBCXX_USE_C99_MATH_TR1"], - linkopts = if_android(["-pie"]), - deps = [ - "//mace/utils:utils_hdrs", - "//mace/utils:logging", - ], ) cc_library( diff --git a/mace/core/allocator.cc b/mace/core/allocator.cc index d05c45b3..fcf3ef6a 100644 --- a/mace/core/allocator.cc +++ b/mace/core/allocator.cc @@ -3,6 +3,7 @@ // #include "mace/core/allocator.h" +#include "mace/core/runtime/opencl/opencl_allocator.h" namespace mace { @@ -22,5 +23,6 @@ Allocator *GetDeviceAllocator(DeviceType type) { MACE_REGISTER_ALLOCATOR(DeviceType::CPU, new CPUAllocator()); MACE_REGISTER_ALLOCATOR(DeviceType::NEON, new CPUAllocator()); +MACE_REGISTER_ALLOCATOR(DeviceType::OPENCL, new OpenCLAllocator()); } // namespace mace diff --git a/mace/core/mace.cc b/mace/core/mace.cc index 9d0d0c76..c25deea3 100644 --- a/mace/core/mace.cc +++ b/mace/core/mace.cc @@ -5,6 +5,7 @@ #include "mace/core/public/mace.h" #include "mace/core/types.h" #include "mace/core/net.h" +#include "mace/core/operator.h" #include "mace/core/workspace.h" #include "mace/utils/logging.h" @@ -481,17 +482,19 @@ const OperatorDef &NetDef::op(const int idx) const { // Mace Engine MaceEngine::MaceEngine(const NetDef *net_def, DeviceType device_type): - device_type_(device_type), ws_(new Workspace()), net_(nullptr) { + op_registry_(new OperatorRegistry()), device_type_(device_type), + ws_(new Workspace()), net_(nullptr) { ws_->LoadModelTensor(*net_def, device_type); // Init model - auto net = CreateNet(*net_def, ws_.get(), device_type, NetMode::INIT); + auto net = CreateNet(op_registry_, *net_def, ws_.get(), + device_type, NetMode::INIT); if(!net->Run()) { LOG(FATAL) << "Net init run failed"; } ws_->CreateTensor("mace_input_node:0", GetDeviceAllocator(device_type_), DT_FLOAT); - net_ = std::move(CreateNet(*net_def, ws_.get(), device_type)); + net_ = std::move(CreateNet(op_registry_, *net_def, ws_.get(), device_type)); } MaceEngine::~MaceEngine() = default; bool MaceEngine::Run(const float *input, diff --git a/mace/core/net.cc b/mace/core/net.cc index 3a65de85..515d0e98 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -3,22 +3,24 @@ // #include "mace/core/net.h" -#include "mace/core/operator.h" #include "mace/core/workspace.h" #include "mace/utils/utils.h" namespace mace { -NetBase::NetBase(const std::shared_ptr &net_def, +NetBase::NetBase(const std::shared_ptr op_registry, + const std::shared_ptr net_def, Workspace *ws, DeviceType type) - : name_(net_def->name()) {} + : op_registry_(op_registry), name_(net_def->name()) {} -SimpleNet::SimpleNet(const std::shared_ptr &net_def, +SimpleNet::SimpleNet(const std::shared_ptr op_registry, + const std::shared_ptr net_def, Workspace *ws, DeviceType type, const NetMode mode) - : NetBase(net_def, ws, type), device_type_(type){ + : NetBase(op_registry, net_def, ws, type), + device_type_(type) { VLOG(1) << "Constructing SimpleNet " << net_def->name(); for (int idx = 0; idx < net_def->op_size(); ++idx) { const auto &operator_def = net_def->op(idx); @@ -26,7 +28,7 @@ SimpleNet::SimpleNet(const std::shared_ptr &net_def, << operator_def.type(); std::unique_ptr op{nullptr}; OperatorDef temp_def(operator_def); - op = CreateOperator(temp_def, ws, type, mode); + op = op_registry->CreateOperator(temp_def, ws, type, mode); if (op) { operators_.emplace_back(std::move(op)); } @@ -62,9 +64,8 @@ bool SimpleNet::Run(RunMetadata *run_metadata) { } if (run_metadata != nullptr) { - OperatorStats op_stats = { op->debug_def().name(), - op->debug_def().type(), - call_stats }; + OperatorStats op_stats = {op->debug_def().name(), op->debug_def().type(), + call_stats}; run_metadata->op_stats.emplace_back(op_stats); } @@ -80,19 +81,23 @@ bool SimpleNet::Run(RunMetadata *run_metadata) { return true; } -unique_ptr CreateNet(const NetDef &net_def, - Workspace *ws, - DeviceType type, - const NetMode mode) { +std::unique_ptr CreateNet( + const std::shared_ptr op_registry, + const NetDef &net_def, + Workspace *ws, + DeviceType type, + const NetMode mode) { std::shared_ptr tmp_net_def(new NetDef(net_def)); - return CreateNet(tmp_net_def, ws, type, mode); + return CreateNet(op_registry, tmp_net_def, ws, type, mode); } -unique_ptr CreateNet(const std::shared_ptr &net_def, - Workspace *ws, - DeviceType type, - const NetMode mode) { - unique_ptr net(new SimpleNet(net_def, ws, type, mode)); +std::unique_ptr CreateNet( + const std::shared_ptr op_registry, + const std::shared_ptr net_def, + Workspace *ws, + DeviceType type, + const NetMode mode) { + unique_ptr net(new SimpleNet(op_registry, net_def, ws, type, mode)); return net; } diff --git a/mace/core/net.h b/mace/core/net.h index 8619bcb8..cb8116a9 100644 --- a/mace/core/net.h +++ b/mace/core/net.h @@ -6,6 +6,7 @@ #define MACE_CORE_NET_H_ #include "mace/core/common.h" +#include "mace/core/operator.h" #include "mace/core/public/mace.h" namespace mace { @@ -16,7 +17,8 @@ class Workspace; class NetBase { public: - NetBase(const std::shared_ptr &net_def, + NetBase(const std::shared_ptr op_registry, + const std::shared_ptr net_def, Workspace *ws, DeviceType type); virtual ~NetBase() noexcept {} @@ -27,13 +29,15 @@ class NetBase { protected: string name_; + const std::shared_ptr op_registry_; DISABLE_COPY_AND_ASSIGN(NetBase); }; class SimpleNet : public NetBase { public: - SimpleNet(const std::shared_ptr &net_def, + SimpleNet(const std::shared_ptr op_registry, + const std::shared_ptr net_def, Workspace *ws, DeviceType type, const NetMode mode = NetMode::NORMAL); @@ -47,14 +51,18 @@ class SimpleNet : public NetBase { DISABLE_COPY_AND_ASSIGN(SimpleNet); }; -unique_ptr CreateNet(const NetDef &net_def, - Workspace *ws, - DeviceType type, - const NetMode mode = NetMode::NORMAL); -unique_ptr CreateNet(const std::shared_ptr &net_def, - Workspace *ws, - DeviceType type, - const NetMode mode = NetMode::NORMAL); +std::unique_ptr CreateNet( + const std::shared_ptr op_registry, + const NetDef &net_def, + Workspace *ws, + DeviceType type, + const NetMode mode = NetMode::NORMAL); +std::unique_ptr CreateNet( + const std::shared_ptr op_registry, + const std::shared_ptr net_def, + Workspace *ws, + DeviceType type, + const NetMode mode = NetMode::NORMAL); } // namespace mace diff --git a/mace/core/operator.cc b/mace/core/operator.cc index 20261054..ae1770b1 100644 --- a/mace/core/operator.cc +++ b/mace/core/operator.cc @@ -2,12 +2,19 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // +#include + #include "mace/core/operator.h" namespace mace { +OperatorBase::OperatorBase(const OperatorDef &operator_def, Workspace *ws) + : operator_ws_(ws), + operator_def_(std::make_shared(operator_def)) {} -OpKeyBuilder::OpKeyBuilder(const char *op_name): op_name_(op_name) {} +OpKeyBuilder::OpKeyBuilder(const char *op_name) : op_name_(op_name) {} + +OpKeyBuilder &OpKeyBuilder::Device(DeviceType device) { device_type_ = device; } OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name, const DataType allowed) { @@ -17,61 +24,72 @@ OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name, const std::string OpKeyBuilder::Build() { static const std::vector type_order = {"T"}; - std::string key = op_name_; + std::stringstream ss; + ss << op_name_; + ss << device_type_; for (auto type : type_order) { - key += type + "_" + DataTypeToString(type_constraint_[type]); + ss << type << "_" << DataTypeToString(type_constraint_[type]); } - return key; -} -std::map *gDeviceTypeRegistry() { - static std::map g_device_type_registry; - return &g_device_type_registry; + return ss.str(); } -MACE_DEFINE_REGISTRY(CPUOperatorRegistry, - OperatorBase, - const OperatorDef &, - Workspace *); -MACE_REGISTER_DEVICE_TYPE(DeviceType::CPU, CPUOperatorRegistry); - -MACE_DEFINE_REGISTRY(NEONOperatorRegistry, - OperatorBase, - const OperatorDef &, - Workspace *); -MACE_REGISTER_DEVICE_TYPE(DeviceType::NEON, NEONOperatorRegistry); - -MACE_DEFINE_REGISTRY(OPENCLOperatorRegistry, - OperatorBase, - const OperatorDef &, - Workspace *); -MACE_REGISTER_DEVICE_TYPE(DeviceType::OPENCL, OPENCLOperatorRegistry); - -unique_ptr CreateOperator(const OperatorDef &operator_def, - Workspace *ws, - DeviceType type, - const NetMode mode) { - OperatorRegistry *registry = gDeviceTypeRegistry()->at(type); - const int dtype = ArgumentHelper::GetSingleArgument(operator_def, - "T", - static_cast(DT_FLOAT)); - const int op_mode_i= ArgumentHelper::GetSingleArgument(operator_def, - "mode", - static_cast(NetMode::NORMAL)); +std::unique_ptr OperatorRegistry::CreateOperator( + const OperatorDef &operator_def, + Workspace *ws, + DeviceType type, + const NetMode mode) const { + const int dtype = ArgumentHelper::GetSingleArgument( + operator_def, "T", static_cast(DT_FLOAT)); + const int op_mode_i = ArgumentHelper::GetSingleArgument( + operator_def, "mode", static_cast(NetMode::NORMAL)); const NetMode op_mode = static_cast(op_mode_i); if (op_mode == mode) { - return registry->Create(OpKeyBuilder(operator_def.type().data()) - .TypeConstraint("T", static_cast(dtype)) - .Build(), - operator_def, - ws); + return registry_.Create( + OpKeyBuilder(operator_def.type().data()) + .Device(type) + .TypeConstraint("T", static_cast(dtype)) + .Build(), + operator_def, ws); } else { return nullptr; } } -OperatorBase::OperatorBase(const OperatorDef &operator_def, Workspace *ws) - : operator_ws_(ws), - operator_def_(std::make_shared(operator_def)) {} +extern void Register_AddN(OperatorRegistry *op_registry); +extern void Register_BatchNorm(OperatorRegistry *op_registry); +extern void Register_BatchToSpaceND(OperatorRegistry *op_registry); +extern void Register_BiasAdd(OperatorRegistry *op_registry); +extern void Register_BufferToImage(OperatorRegistry *op_registry); +extern void Register_ChannelShuffle(OperatorRegistry *op_registry); +extern void Register_Concat(OperatorRegistry *op_registry); +extern void Register_Conv2D(OperatorRegistry *op_registry); +extern void Register_DepthwiseConv2d(OperatorRegistry *op_registry); +extern void Register_FusedConv2D(OperatorRegistry *op_registry); +extern void Register_GlobalAvgPooling(OperatorRegistry *op_registry); +extern void Register_ImageToBuffer(OperatorRegistry *op_registry); +extern void Register_Pooling(OperatorRegistry *op_registry); +extern void Register_Relu(OperatorRegistry *op_registry); +extern void Register_ResizeBilinear(OperatorRegistry *op_registry); +extern void Register_SpaceToBatchND(OperatorRegistry *op_registry); + +OperatorRegistry::OperatorRegistry() { + Register_AddN(this); + Register_BatchNorm(this); + Register_BatchToSpaceND(this); + Register_BiasAdd(this); + Register_BufferToImage(this); + Register_ChannelShuffle(this); + Register_Concat(this); + Register_Conv2D(this); + Register_DepthwiseConv2d(this); + Register_FusedConv2D(this); + Register_GlobalAvgPooling(this); + Register_ImageToBuffer(this); + Register_Pooling(this); + Register_Relu(this); + Register_ResizeBilinear(this); + Register_SpaceToBatchND(this); +} } // namespace mace diff --git a/mace/core/operator.h b/mace/core/operator.h index 66e4701e..d673ca81 100644 --- a/mace/core/operator.h +++ b/mace/core/operator.h @@ -5,13 +5,13 @@ #ifndef MACE_CORE_OPERATOR_H #define MACE_CORE_OPERATOR_H -#include "mace/core/common.h" #include "mace/core/arg_helper.h" +#include "mace/core/common.h" #include "mace/core/future.h" +#include "mace/core/public/mace.h" #include "mace/core/registry.h" #include "mace/core/tensor.h" #include "mace/core/workspace.h" -#include "mace/core/public/mace.h" namespace mace { @@ -102,7 +102,7 @@ class Operator : public OperatorBase { } } } - virtual bool Run(StatsFuture *future) override = 0; + virtual bool Run(StatsFuture *future) override = 0; ~Operator() noexcept override {} }; @@ -122,29 +122,12 @@ class Operator : public OperatorBase { #define OP_OUTPUT_TAGS(first_input, ...) \ enum _OutputTags { first_input = 0, __VA_ARGS__ } -typedef Registry - OperatorRegistry; -typedef Registry *( - *RegistryFunction)(); -std::map *gDeviceTypeRegistry(); - -struct DeviceTypeRegisterer { - explicit DeviceTypeRegisterer(int32_t type, RegistryFunction func) { - if (gDeviceTypeRegistry()->count(type)) { - LOG(ERROR) << "Device type " << type - << "registered twice. This should not happen. Did you have " - "duplicated numbers assigned to different devices?"; - std::exit(1); - } - // Calling the registry function to get the actual registry pointer. - gDeviceTypeRegistry()->emplace(type, func()); - } -}; - class OpKeyBuilder { public: explicit OpKeyBuilder(const char *op_name); + OpKeyBuilder &Device(DeviceType device); + OpKeyBuilder &TypeConstraint(const char *attr_name, const DataType allowed); template @@ -154,6 +137,7 @@ class OpKeyBuilder { private: std::string op_name_; + DeviceType device_type_; std::map type_constraint_; }; @@ -162,48 +146,30 @@ OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name) { return this->TypeConstraint(attr_name, DataTypeToEnum::value); } +class OperatorRegistry { + public: + typedef Registry + RegistryType; + OperatorRegistry(); + ~OperatorRegistry() = default; + RegistryType *registry() { return ®istry_; }; + std::unique_ptr CreateOperator(const OperatorDef &operator_def, + Workspace *ws, + DeviceType type, + const NetMode mode) const; + private: + RegistryType registry_; + DISABLE_COPY_AND_ASSIGN(OperatorRegistry); +}; -#define MACE_REGISTER_DEVICE_TYPE(type, registry_function) \ - namespace { \ - static DeviceTypeRegisterer MACE_ANONYMOUS_VARIABLE(DeviceType)( \ - type, ®istry_function); \ - } - -MACE_DECLARE_REGISTRY(CPUOperatorRegistry, - OperatorBase, - const OperatorDef &, - Workspace *); - -#define REGISTER_CPU_OPERATOR_CREATOR(key, ...) \ - MACE_REGISTER_CREATOR(CPUOperatorRegistry, key, __VA_ARGS__) -#define REGISTER_CPU_OPERATOR(name, ...) \ - MACE_REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__) - -MACE_DECLARE_REGISTRY(NEONOperatorRegistry, - OperatorBase, - const OperatorDef &, - Workspace *); - -#define REGISTER_NEON_OPERATOR_CREATOR(key, ...) \ - MACE_REGISTER_CREATOR(NEONOperatorRegistry, key, __VA_ARGS__) -#define REGISTER_NEON_OPERATOR(name, ...) \ - MACE_REGISTER_CLASS(NEONOperatorRegistry, name, __VA_ARGS__) - -MACE_DECLARE_REGISTRY(OPENCLOperatorRegistry, +MACE_DECLARE_REGISTRY(OpRegistry, OperatorBase, const OperatorDef &, Workspace *); -#define REGISTER_OPENCL_OPERATOR_CREATOR(key, ...) \ - MACE_REGISTER_CREATOR(OPENCLOperatorRegistry, key, __VA_ARGS__) -#define REGISTER_OPENCL_OPERATOR(name, ...) \ - MACE_REGISTER_CLASS(OPENCLOperatorRegistry, name, __VA_ARGS__) - -unique_ptr CreateOperator(const OperatorDef &operator_def, - Workspace *ws, - DeviceType type, - const NetMode mode); +#define REGISTER_OPERATOR(op_registry, name, ...) \ + MACE_REGISTER_CLASS(OpRegistry, op_registry->registry(), name, __VA_ARGS__) } // namespace mace diff --git a/mace/core/public/mace.h b/mace/core/public/mace.h index 1b0d3ceb..a363610f 100644 --- a/mace/core/public/mace.h +++ b/mace/core/public/mace.h @@ -302,10 +302,12 @@ class NetDef { class Workspace; class NetBase; +class OperatorRegistry; class MaceEngine { public: - explicit MaceEngine(const NetDef *net_def, DeviceType device_type); + explicit MaceEngine(const NetDef *net_def, + DeviceType device_type); ~MaceEngine(); bool Run(const float *input, const std::vector &input_shape, @@ -314,6 +316,7 @@ class MaceEngine { MaceEngine &operator=(const MaceEngine&) = delete; private: + std::shared_ptr op_registry_; DeviceType device_type_; std::unique_ptr ws_; std::unique_ptr net_; diff --git a/mace/core/registry.h b/mace/core/registry.h index c92ebb12..5c82ef2e 100644 --- a/mace/core/registry.h +++ b/mace/core/registry.h @@ -17,24 +17,27 @@ class Registry { Registry() : registry_() {} void Register(const SrcType &key, Creator creator) { + VLOG(2) << "Registering: " << key; std::lock_guard lock(register_mutex_); MACE_CHECK(registry_.count(key) == 0, "Key already registered."); registry_[key] = creator; } - inline bool Has(const SrcType &key) { return registry_.count(key) != 0; } + inline bool Has(const SrcType &key) const { + return registry_.count(key) != 0; + } - unique_ptr Create(const SrcType &key, Args... args) { + unique_ptr Create(const SrcType &key, Args... args) const { if (registry_.count(key) == 0) { LOG(FATAL) << "Key not registered: " << key; } - return registry_[key](args...); + return registry_.at(key)(args...); } /** * Returns the keys currently registered as a vector. */ - vector Keys() { + vector Keys() const { vector keys; for (const auto &it : registry_) { keys.push_back(it.first); @@ -77,39 +80,31 @@ class Registerer { typedef Registerer \ Registerer##RegistryName; +/* #define MACE_DEFINE_TYPED_REGISTRY(RegistryName, SrcType, ObjectType, ...) \ Registry *RegistryName() { \ static Registry *registry = \ new Registry(); \ return registry; \ } +*/ #define MACE_DECLARE_REGISTRY(RegistryName, ObjectType, ...) \ MACE_DECLARE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \ ##__VA_ARGS__) +/* #define MACE_DEFINE_REGISTRY(RegistryName, ObjectType, ...) \ MACE_DEFINE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \ ##__VA_ARGS__) +*/ -#define MACE_REGISTER_TYPED_CREATOR(RegistryName, key, ...) \ - namespace { \ - static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \ - key, RegistryName(), __VA_ARGS__); - -#define MACE_REGISTER_TYPED_CLASS(RegistryName, key, ...) \ - namespace { \ - static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \ - key, \ - RegistryName(), \ - Registerer##RegistryName::DefaultCreator<__VA_ARGS__>); \ - } - -#define MACE_REGISTER_CREATOR(RegistryName, key, ...) \ - MACE_REGISTER_TYPED_CREATOR(RegistryName, key, __VA_ARGS__) +#define MACE_REGISTER_TYPED_CLASS(RegistryName, registry, key, ...) \ + Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(l_##RegistryName)( \ + key, registry, Registerer##RegistryName::DefaultCreator<__VA_ARGS__>); -#define MACE_REGISTER_CLASS(RegistryName, key, ...) \ - MACE_REGISTER_TYPED_CLASS(RegistryName, key, __VA_ARGS__) +#define MACE_REGISTER_CLASS(RegistryName, registry, key, ...) \ + MACE_REGISTER_TYPED_CLASS(RegistryName, registry, key, __VA_ARGS__) } // namespace mace diff --git a/mace/core/runtime/opencl/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc index 4e14636e..929b4818 100644 --- a/mace/core/runtime/opencl/opencl_allocator.cc +++ b/mace/core/runtime/opencl/opencl_allocator.cc @@ -127,6 +127,4 @@ void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) { bool OpenCLAllocator::OnHost() { return false; } -MACE_REGISTER_ALLOCATOR(DeviceType::OPENCL, new OpenCLAllocator()); - } // namespace mace diff --git a/mace/core/tensor.h b/mace/core/tensor.h index 2d00699e..51dc311f 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -105,7 +105,8 @@ class Tensor { inline index_t dim_size() const { return shape_.size(); } inline index_t dim(unsigned int index) const { - MACE_CHECK(index < shape_.size(), "Exceeding ndim limit"); + MACE_CHECK(index < shape_.size(), "Dim out of range: ", + index, " >= ", shape_.size()); return shape_[index]; } diff --git a/mace/examples/BUILD b/mace/examples/BUILD index ffdb4f72..9db67631 100644 --- a/mace/examples/BUILD +++ b/mace/examples/BUILD @@ -11,7 +11,6 @@ cc_binary( deps = [ "//mace/core", "//mace/ops", - "//mace/core:opencl_runtime", ], ) diff --git a/mace/kernels/BUILD b/mace/kernels/BUILD index da1ee8ec..a2ad37db 100644 --- a/mace/kernels/BUILD +++ b/mace/kernels/BUILD @@ -26,7 +26,6 @@ cc_library( linkopts = if_android(["-lm"]), deps = [ "//mace/core", - "//mace/core:opencl_runtime", "//mace/utils:utils_hdrs", ], ) diff --git a/mace/ops/addn.cc b/mace/ops/addn.cc index d29944de..d9b514d4 100644 --- a/mace/ops/addn.cc +++ b/mace/ops/addn.cc @@ -6,26 +6,32 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("AddN") - .TypeConstraint("T") - .Build(), - AddNOp); +void Register_AddN(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + AddNOp); #if MACE_ENABLE_NEON -REGISTER_NEON_OPERATOR(OpKeyBuilder("AddN") - .TypeConstraint("T") - .Build(), - AddNOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN") + .Device(DeviceType::NEON) + .TypeConstraint("T") + .Build(), + AddNOp); #endif // MACE_ENABLE_NEON -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("AddN") - .TypeConstraint("T") - .Build(), - AddNOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + AddNOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("AddN") - .TypeConstraint("T") - .Build(), - AddNOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + AddNOp); +} } // namespace mace diff --git a/mace/ops/addn_benchmark.cc b/mace/ops/addn_benchmark.cc index 717be1ea..2d529318 100644 --- a/mace/ops/addn_benchmark.cc +++ b/mace/ops/addn_benchmark.cc @@ -15,8 +15,8 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) { OpsTestNet net; // Add input data for (int i = 0; i < inputs; ++i) { - net.AddRandomInput( - internal::MakeString("Input", i).c_str(), {n, h, w, c}); + net.AddRandomInput(internal::MakeString("Input", i).c_str(), + {n, h, w, c}); } if (D == DeviceType::OPENCL) { @@ -30,16 +30,16 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) { op_def_builder.Input(internal::MakeString("InputImage", i).c_str()); } op_def_builder.Output("OutputImage") - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); } else { OpDefBuilder op_def_builder("AddN", "AddNBM"); for (int i = 0; i < inputs; ++i) { op_def_builder.Input(internal::MakeString("Input", i).c_str()); } op_def_builder.Output("Output") - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); } // Warm-up diff --git a/mace/ops/batch_norm.cc b/mace/ops/batch_norm.cc index 6136c814..ade5c7c7 100644 --- a/mace/ops/batch_norm.cc +++ b/mace/ops/batch_norm.cc @@ -6,26 +6,32 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("BatchNorm") - .TypeConstraint("T") - .Build(), - BatchNormOp); +void Register_BatchNorm(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + BatchNormOp); #if MACE_ENABLE_NEON -REGISTER_NEON_OPERATOR(OpKeyBuilder("BatchNorm") - .TypeConstraint("T") - .Build(), - BatchNormOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm") + .Device(DeviceType::NEON) + .TypeConstraint("T") + .Build(), + BatchNormOp); #endif // MACE_ENABLE_NEON -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchNorm") - .TypeConstraint("T") - .Build(), - BatchNormOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + BatchNormOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchNorm") - .TypeConstraint("T") - .Build(), - BatchNormOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + BatchNormOp); +} } // namespace mace diff --git a/mace/ops/batch_norm_benchmark.cc b/mace/ops/batch_norm_benchmark.cc index 3f54e745..976bc241 100644 --- a/mace/ops/batch_norm_benchmark.cc +++ b/mace/ops/batch_norm_benchmark.cc @@ -23,11 +23,16 @@ static void BatchNorm( net.AddRandomInput("Var", {channels}, true); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Scale", "ScaleImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Offset", "OffsetImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Mean", "MeanImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Var", "VarImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormBM") .Input("InputImage") .Input("ScaleImage") @@ -37,8 +42,7 @@ static void BatchNorm( .AddFloatArg("epsilon", 1e-3) .Output("Output") .Finalize(net.NewOperatorDef()); - } - else { + } else { OpDefBuilder("BatchNorm", "BatchNormBM") .Input("Input") .Input("Scale") @@ -50,7 +54,6 @@ static void BatchNorm( .Finalize(net.NewOperatorDef()); } - // tuning setenv("MACE_TUNING", "1", 1); net.RunOp(D); @@ -79,9 +82,8 @@ static void BatchNorm( } \ BENCHMARK(BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) -#define BM_BATCH_NORM(N, C, H, W, TYPE) \ - BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \ - BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, NEON);\ +#define BM_BATCH_NORM(N, C, H, W, TYPE) \ + BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \ BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, OPENCL); BM_BATCH_NORM(1, 1, 512, 512, float); diff --git a/mace/ops/batch_norm_test.cc b/mace/ops/batch_norm_test.cc index 6cc6eea3..595635e7 100644 --- a/mace/ops/batch_norm_test.cc +++ b/mace/ops/batch_norm_test.cc @@ -15,18 +15,23 @@ void Simple() { // Add input data net.AddInputFromArray("Input", {1, 6, 2, 1}, - {5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}); + {5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}); net.AddInputFromArray("Scale", {1}, {4.0f}); net.AddInputFromArray("Offset", {1}, {2.0}); net.AddInputFromArray("Mean", {1}, {10}); net.AddInputFromArray("Var", {1}, {11.67f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Scale", "ScaleImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Offset", "OffsetImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Mean", "MeanImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Var", "VarImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") .Input("InputImage") @@ -41,7 +46,8 @@ void Simple() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("BatchNorm", "BatchNormTest") .Input("Input") @@ -64,9 +70,7 @@ void Simple() { ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-2); } -TEST_F(BatchNormOpTest, SimpleCPU) { - Simple(); -} +TEST_F(BatchNormOpTest, SimpleCPU) { Simple(); } /* TEST_F(BatchNormOpTest, SimpleNEON) { @@ -74,9 +78,7 @@ TEST_F(BatchNormOpTest, SimpleNEON) { } */ -TEST_F(BatchNormOpTest, SimpleOPENCL) { - Simple(); -} +TEST_F(BatchNormOpTest, SimpleOPENCL) { Simple(); } /* TEST_F(BatchNormOpTest, SimpleRandomNeon) { @@ -100,7 +102,8 @@ TEST_F(BatchNormOpTest, SimpleRandomNeon) { .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, channels, height, width}); + net.AddRandomInput("Input", {batch, channels, height, +width}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); @@ -141,7 +144,8 @@ TEST_F(BatchNormOpTest, ComplexRandomNeon) { .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, channels, height, width}); + net.AddRandomInput("Input", {batch, channels, height, +width}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); @@ -184,7 +188,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, height, width, channels}); + net.AddRandomInput( + "Input", {batch, height, width, channels}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); @@ -198,11 +203,16 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { expected.Copy(*net.GetOutput("Output")); // Run on opencl - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Scale", "ScaleImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Offset", "OffsetImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Mean", "MeanImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Var", "VarImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") .Input("InputImage") @@ -223,7 +233,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { net.RunOp(DeviceType::OPENCL); net.Sync(); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2); } @@ -249,7 +260,8 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, height, width, channels}); + net.AddRandomInput( + "Input", {batch, height, width, channels}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); @@ -263,11 +275,16 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { expected.Copy(*net.GetOutput("Output")); // Run on opencl - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Scale", "ScaleImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Offset", "OffsetImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Mean", "MeanImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Var", "VarImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") .Input("InputImage") @@ -289,7 +306,8 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { net.RunOp(DeviceType::OPENCL); net.Sync(); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.5); } @@ -315,7 +333,8 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, height, width, channels}); + net.AddRandomInput( + "Input", {batch, height, width, channels}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); @@ -328,13 +347,17 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { Tensor expected; expected.Copy(*net.GetOutput("Output")); - // Run on opencl - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Scale", "ScaleImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Offset", "OffsetImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Mean", "MeanImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Var", "VarImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") .Input("InputImage") @@ -355,7 +378,8 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { net.RunOp(DeviceType::OPENCL); net.Sync(); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2); } @@ -381,7 +405,8 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, height, width, channels}); + net.AddRandomInput( + "Input", {batch, height, width, channels}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); @@ -394,13 +419,17 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { Tensor expected; expected.Copy(*net.GetOutput("Output")); - // Run on opencl - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Scale", "ScaleImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Offset", "OffsetImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Mean", "MeanImage", kernels::BufferType::ARGUMENT); - BufferToImage(net, "Var", "VarImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Scale", "ScaleImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Offset", "OffsetImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Mean", "MeanImage", + kernels::BufferType::ARGUMENT); + BufferToImage(net, "Var", "VarImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") .Input("InputImage") @@ -422,7 +451,8 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { net.RunOp(DeviceType::OPENCL); net.Sync(); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.5); } } diff --git a/mace/ops/batch_to_space.cc b/mace/ops/batch_to_space.cc index 4e4ea661..ece90171 100644 --- a/mace/ops/batch_to_space.cc +++ b/mace/ops/batch_to_space.cc @@ -6,13 +6,17 @@ namespace mace { -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchToSpaceND") - .TypeConstraint("T") - .Build(), - BatchToSpaceNDOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BatchToSpaceND") - .TypeConstraint("T") - .Build(), - BatchToSpaceNDOp); +void Register_BatchToSpaceND(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchToSpaceND") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + BatchToSpaceNDOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchToSpaceND") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + BatchToSpaceNDOp); +} } // namespace mace diff --git a/mace/ops/batch_to_space_benchmark.cc b/mace/ops/batch_to_space_benchmark.cc index a3a0e0b6..93df21f9 100644 --- a/mace/ops/batch_to_space_benchmark.cc +++ b/mace/ops/batch_to_space_benchmark.cc @@ -14,7 +14,8 @@ static void BMBatchToSpace( OpsTestNet net; net.AddRandomInput("Input", {batch, height, width, channels}); - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest") .Input("InputImage") .Output("OutputImage") @@ -36,16 +37,17 @@ static void BMBatchToSpace( } #define BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, TYPE, DEVICE) \ - static void BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ - BMBatchToSpace(iters, N, C, H, W, ARG); \ - } \ + static void \ + BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::ItemsProcessed(tot); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + BMBatchToSpace(iters, N, C, H, W, ARG); \ + } \ BENCHMARK(BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE) -#define BM_BATCH_TO_SPACE(N, H, W, C, ARG, TYPE) \ +#define BM_BATCH_TO_SPACE(N, H, W, C, ARG, TYPE) \ BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, TYPE, OPENCL); BM_BATCH_TO_SPACE(128, 8, 8, 128, 2, float); diff --git a/mace/ops/bias_add.cc b/mace/ops/bias_add.cc index 5dc8a443..01a7582d 100644 --- a/mace/ops/bias_add.cc +++ b/mace/ops/bias_add.cc @@ -6,28 +6,34 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("BiasAdd") - .TypeConstraint("T") - .Build(), - BiasAddOp); +void Register_BiasAdd(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BiasAdd") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + BiasAddOp); -/* -#if __ARM_NEON -REGISTER_NEON_OPERATOR(OpKeyBuilder("BiasAdd") - .TypeConstraint("T") - .Build(), - BiasAddOp); -#endif // __ARM_NEON -*/ + /* + #if __ARM_NEON + REGISTER_OPERATOR(op_registry,OpKeyBuilder("BiasAdd") + .Device(DeviceType::NEON) + .TypeConstraint("T") + .Build(), + BiasAddOp); + #endif // __ARM_NEON + */ -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BiasAdd") - .TypeConstraint("T") - .Build(), - BiasAddOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BiasAdd") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + BiasAddOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BiasAdd") - .TypeConstraint("T") - .Build(), - BiasAddOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BiasAdd") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + BiasAddOp); +} } // namespace mace diff --git a/mace/ops/bias_add_benchmark.cc b/mace/ops/bias_add_benchmark.cc index d277a15d..917c28a1 100644 --- a/mace/ops/bias_add_benchmark.cc +++ b/mace/ops/bias_add_benchmark.cc @@ -9,8 +9,7 @@ namespace mace { template -static void BiasAdd( - int iters, int batch, int channels, int height, int width) { +static void BiasAdd(int iters, int batch, int channels, int height, int width) { mace::testing::StopTiming(); OpsTestNet net; @@ -20,15 +19,16 @@ static void BiasAdd( net.AddRandomInput("Bias", {channels}, true); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BiasAdd", "BiasAddBM") .Input("InputImage") .Input("BiasImage") .Output("Output") .Finalize(net.NewOperatorDef()); - } - else { + } else { OpDefBuilder("BiasAdd", "BiasAddBM") .Input("Input") .Input("Bias") @@ -51,12 +51,12 @@ static void BiasAdd( #define BM_BIAS_ADD_MACRO(N, C, H, W, TYPE, DEVICE) \ static void BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::ItemsProcessed(tot); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ BiasAdd(iters, N, C, H, W); \ - } \ + } \ BENCHMARK(BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) #define BM_BIAS_ADD(N, C, H, W, TYPE) \ diff --git a/mace/ops/bias_add_test.cc b/mace/ops/bias_add_test.cc index b9e34738..ce83ebd7 100644 --- a/mace/ops/bias_add_test.cc +++ b/mace/ops/bias_add_test.cc @@ -15,12 +15,14 @@ void BiasAddSimple() { // Add input data net.AddInputFromArray("Input", {1, 6, 2, 1}, - {5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}); + {5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}); net.AddInputFromArray("Bias", {1}, {0.5f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BiasAdd", "BiasAddTest") .Input("InputImage") @@ -31,7 +33,8 @@ void BiasAddSimple() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("BiasAdd", "BiasAddTest") .Input("Input") @@ -43,16 +46,14 @@ void BiasAddSimple() { } // Check - auto expected = - CreateTensor({1, 6, 2, 1}, {5.5, 5.5, 7.5, 7.5, 9.5, 9.5, 11.5, - 11.5, 13.5, 13.5, 15.5, 15.5}); + auto expected = CreateTensor( + {1, 6, 2, 1}, + {5.5, 5.5, 7.5, 7.5, 9.5, 9.5, 11.5, 11.5, 13.5, 13.5, 15.5, 15.5}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-2); } -TEST_F(BiasAddOpTest, BiasAddSimpleCPU) { - BiasAddSimple(); -} +TEST_F(BiasAddOpTest, BiasAddSimpleCPU) { BiasAddSimple(); } TEST_F(BiasAddOpTest, BiasAddSimpleOPENCL) { BiasAddSimple(); @@ -76,7 +77,8 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, height, width, channels}); + net.AddRandomInput( + "Input", {batch, height, width, channels}); net.AddRandomInput("Bias", {channels}, true); // run cpu @@ -87,8 +89,10 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { expected.Copy(*net.GetOutput("Output")); // Run on opencl - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BiasAdd", "BiasAddTest") .Input("InputImage") @@ -100,7 +104,8 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { net.RunOp(DeviceType::OPENCL); net.Sync(); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2); } @@ -122,7 +127,8 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, height, width, channels}); + net.AddRandomInput( + "Input", {batch, height, width, channels}); net.AddRandomInput("Bias", {channels}, true); // run cpu @@ -132,10 +138,11 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { Tensor expected; expected.Copy(*net.GetOutput("Output")); - // Run on opencl - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("BiasAdd", "BiasAddTest") .Input("InputImage") @@ -147,8 +154,8 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { net.RunOp(DeviceType::OPENCL); net.Sync(); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2); } - } diff --git a/mace/ops/buffer_to_image.cc b/mace/ops/buffer_to_image.cc index 56711794..c9118a19 100644 --- a/mace/ops/buffer_to_image.cc +++ b/mace/ops/buffer_to_image.cc @@ -6,14 +6,18 @@ namespace mace { -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BufferToImage") - .TypeConstraint("T") - .Build(), - BufferToImageOp); +void Register_BufferToImage(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BufferToImage") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + BufferToImageOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("BufferToImage") - .TypeConstraint("T") - .Build(), - BufferToImageOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("BufferToImage") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + BufferToImageOp); +} } // namespace mace diff --git a/mace/ops/channel_shuffle.cc b/mace/ops/channel_shuffle.cc index 7d36b1af..b12d8beb 100644 --- a/mace/ops/channel_shuffle.cc +++ b/mace/ops/channel_shuffle.cc @@ -6,9 +6,12 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("ChannelShuffle") - .TypeConstraint("T") - .Build(), - ChannelShuffleOp); +void Register_ChannelShuffle(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("ChannelShuffle") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + ChannelShuffleOp); +} } // namespace mace diff --git a/mace/ops/channel_shuffle_benchmark.cc b/mace/ops/channel_shuffle_benchmark.cc index d9f63f3a..ca75ce10 100644 --- a/mace/ops/channel_shuffle_benchmark.cc +++ b/mace/ops/channel_shuffle_benchmark.cc @@ -23,7 +23,8 @@ static void ChannelShuffle( .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, channels, height, width}); + net.AddRandomInput("Input", + {batch, channels, height, width}); // Warm-up for (int i = 0; i < 5; ++i) { diff --git a/mace/ops/channel_shuffle_test.cc b/mace/ops/channel_shuffle_test.cc index f42e3b1b..8f5736d4 100644 --- a/mace/ops/channel_shuffle_test.cc +++ b/mace/ops/channel_shuffle_test.cc @@ -17,7 +17,6 @@ TEST_F(ChannelShuffleOpTest, C8G4) { .AddIntArg("group", 4) .Finalize(net.NewOperatorDef()); - // Add input data net.AddInputFromArray( "Input", {1, 8, 1, 2}, diff --git a/mace/ops/concat.cc b/mace/ops/concat.cc index 929da85f..71be2fc3 100644 --- a/mace/ops/concat.cc +++ b/mace/ops/concat.cc @@ -6,21 +6,28 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("Concat") - .TypeConstraint("T") - .Build(), - ConcatOp); -REGISTER_CPU_OPERATOR(OpKeyBuilder("Concat") - .TypeConstraint("T") - .Build(), - ConcatOp); +void Register_Concat(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + ConcatOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + ConcatOp); + + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + ConcatOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + ConcatOp); +} -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Concat") - .TypeConstraint("T") - .Build(), - ConcatOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Concat") - .TypeConstraint("T") - .Build(), - ConcatOp); } // namespace mace diff --git a/mace/ops/concat_benchmark.cc b/mace/ops/concat_benchmark.cc index 0e442525..6a3dda02 100644 --- a/mace/ops/concat_benchmark.cc +++ b/mace/ops/concat_benchmark.cc @@ -60,8 +60,10 @@ static void OpenclConcatHelper(int iters, net.AddRandomInput("Input0", shape0); net.AddRandomInput("Input1", shape1); - BufferToImage(net, "Input0", "InputImage0", kernels::BufferType::IN_OUT); - BufferToImage(net, "Input1", "InputImage1", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input0", "InputImage0", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Input1", "InputImage1", + kernels::BufferType::IN_OUT); OpDefBuilder("Concat", "ConcatBM") .Input("InputImage0") .Input("InputImage1") @@ -75,7 +77,8 @@ static void OpenclConcatHelper(int iters, net.RunOp(DeviceType::OPENCL); } - const int64_t tot = static_cast(iters) * + const int64_t tot = + static_cast(iters) * (net.GetTensor("Input0")->size() + net.GetTensor("Input1")->size()); mace::testing::ItemsProcessed(tot); testing::BytesProcessed(tot * sizeof(T)); diff --git a/mace/ops/concat_test.cc b/mace/ops/concat_test.cc index ca1c06d6..49d55d2a 100644 --- a/mace/ops/concat_test.cc +++ b/mace/ops/concat_test.cc @@ -97,7 +97,9 @@ TEST_F(ConcatOpTest, CPURandom) { for (int i = 0; i < num_inputs; ++i) { builder = builder.Input(("Input" + ToString(i)).c_str()); } - builder.AddIntArg("axis", axis).Output("Output").Finalize(net.NewOperatorDef()); + builder.AddIntArg("axis", axis) + .Output("Output") + .Finalize(net.NewOperatorDef()); std::vector shape_data; GenerateRandomIntTypeData({dim}, shape_data, 1, dim); @@ -110,8 +112,8 @@ TEST_F(ConcatOpTest, CPURandom) { concat_axis_size += input_shapes[i][axis]; GenerateRandomRealTypeData(input_shapes[i], inputs[i]); input_ptrs[i] = inputs[i].data(); - net.AddInputFromArray(("Input" + ToString(i)).c_str(), - input_shapes[i], inputs[i]); + net.AddInputFromArray( + ("Input" + ToString(i)).c_str(), input_shapes[i], inputs[i]); } // Run @@ -137,7 +139,7 @@ TEST_F(ConcatOpTest, CPURandom) { } } -template +template void OpenclRandomTest(const std::vector> &shapes, const int axis) { srand(time(nullptr)); @@ -149,9 +151,9 @@ void OpenclRandomTest(const std::vector> &shapes, const std::string input_name = ("Input" + ToString(i)).c_str(); const std::string image_name = ("InputImage" + ToString(i)).c_str(); concat_axis_size += shapes[i][axis]; - net.AddRandomInput(input_name, - shapes[i]); - BufferToImage(net, input_name, image_name, kernels::BufferType::IN_OUT); + net.AddRandomInput(input_name, shapes[i]); + BufferToImage(net, input_name, image_name, + kernels::BufferType::IN_OUT); } auto builder = OpDefBuilder("Concat", "ConcatTest"); @@ -167,7 +169,8 @@ void OpenclRandomTest(const std::vector> &shapes, // Run net.RunOp(DeviceType::OPENCL); - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); // Check auto output = net.GetOutput("Output"); @@ -182,15 +185,16 @@ void OpenclRandomTest(const std::vector> &shapes, while (output_ptr != (output->data() + output->size())) { for (int i = 0; i < num_inputs; ++i) { index_t num_elements = - std::accumulate(shapes[i].begin() + axis, shapes[i].end(), - 1, std::multiplies()); + std::accumulate(shapes[i].begin() + axis, shapes[i].end(), 1, + std::multiplies()); const std::string input_name = ("Input" + ToString(i)).c_str(); const Tensor *input_tensor = net.GetTensor(input_name.data()); Tensor::MappingGuard input_guard(input_tensor); const float *input_ptr = input_tensor->data() + k * num_elements; for (int j = 0; j < num_elements; ++j) { - EXPECT_NEAR(*(input_ptr + j), *output_ptr++, 1e-2) << "With index: " << i << ", " << j; + EXPECT_NEAR(*(input_ptr + j), *output_ptr++, 1e-2) + << "With index: " << i << ", " << j; } } k++; @@ -198,25 +202,13 @@ void OpenclRandomTest(const std::vector> &shapes, } TEST_F(ConcatOpTest, OPENCLAligned) { - OpenclRandomTest({ - {3, 32, 32, 32}, - {3, 32, 32, 64} - }, - 3); + OpenclRandomTest({{3, 32, 32, 32}, {3, 32, 32, 64}}, 3); } TEST_F(ConcatOpTest, OPENCLHalfAligned) { - OpenclRandomTest({ - {3, 32, 32, 32}, - {3, 32, 32, 64} - }, - 3); + OpenclRandomTest({{3, 32, 32, 32}, {3, 32, 32, 64}}, 3); } TEST_F(ConcatOpTest, OPENCLUnAligned) { - OpenclRandomTest({ - {3, 32, 32, 13}, - {3, 32, 32, 17} - }, - 3); + OpenclRandomTest({{3, 32, 32, 13}, {3, 32, 32, 17}}, 3); } diff --git a/mace/ops/conv_2d.cc b/mace/ops/conv_2d.cc index ad771df6..f7191c37 100644 --- a/mace/ops/conv_2d.cc +++ b/mace/ops/conv_2d.cc @@ -6,31 +6,38 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("Conv2D") - .TypeConstraint("T") - .Build(), - Conv2dOp); - -REGISTER_CPU_OPERATOR(OpKeyBuilder("Conv2D") - .TypeConstraint("T") - .Build(), - Conv2dOp); +void Register_Conv2D(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + Conv2dOp); + + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + Conv2dOp); #if MACE_ENABLE_NEON -REGISTER_NEON_OPERATOR(OpKeyBuilder("Conv2D") - .TypeConstraint("T") - .Build(), - Conv2dOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D") + .Device(DeviceType::NEON) + .TypeConstraint("T") + .Build(), + Conv2dOp); #endif // MACE_ENABLE_NEON -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Conv2D") - .TypeConstraint("T") - .Build(), - Conv2dOp); - -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Conv2D") - .TypeConstraint("T") - .Build(), - Conv2dOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + Conv2dOp); + + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + Conv2dOp); +} } // namespace mace diff --git a/mace/ops/conv_2d_benchmark.cc b/mace/ops/conv_2d_benchmark.cc index 0870f2b8..83e264d8 100644 --- a/mace/ops/conv_2d_benchmark.cc +++ b/mace/ops/conv_2d_benchmark.cc @@ -33,9 +33,12 @@ static void Conv2d(int iters, net.AddRandomInput("Bias", {output_channels}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") @@ -89,7 +92,7 @@ static void Conv2d(int iters, BENCHMARK( \ BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE) -#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \ +#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \ BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, OPENCL); // ICNet @@ -106,28 +109,29 @@ BM_CONV_2D(1, 3, 512, 512, 7, 7, 2, SAME, 64, half); BM_CONV_2D(1, 512, 64, 64, 1, 1, 1, SAME, 256, half); // Test RGB <-> YUV -//BM_CONV_2D(1, 3, 2160, 1080, 1, 1, 1, VALID, 3, float); -//BM_CONV_2D(1, 3, 480, 480, 1, 1, 1, VALID, 3, float); +// BM_CONV_2D(1, 3, 2160, 1080, 1, 1, 1, VALID, 3, float); +// BM_CONV_2D(1, 3, 480, 480, 1, 1, 1, VALID, 3, float); // -//BM_CONV_2D(1, 64, 32, 32, 1, 1, 1, VALID, 128, float); -//BM_CONV_2D(1, 64, 33, 31, 1, 1, 1, VALID, 128, float); // Test bad alignments -//BM_CONV_2D(1, 3, 512, 512, 1, 1, 1, VALID, 3, float); -//BM_CONV_2D(1, 32, 112, 112, 1, 1, 1, VALID, 64, float); -//BM_CONV_2D(1, 64, 56, 56, 1, 1, 1, VALID, 128, float); -//BM_CONV_2D(1, 256, 28, 28, 1, 1, 1, VALID, 256, float); -//BM_CONV_2D(1, 1024, 7, 7, 1, 1, 1, VALID, 1024, float); -//BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 128, float); -//BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, VALID, 128, float); -//BM_CONV_2D(1, 3, 512, 512, 3, 3, 1, VALID, 3, float); -//BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, SAME, 128, float); -//BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, SAME, 128, float); -//BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, VALID, 128, float); -//BM_CONV_2D(1, 3, 512, 512, 3, 3, 2, VALID, 3, float); -//BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, VALID, 128, float); -//BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, SAME, 128, float); -//BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, SAME, 128, float); -//BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, VALID, 128, float); -//BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, VALID, 128, float); -//BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, SAME, 128, float); -//BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, SAME, 128, float); +// BM_CONV_2D(1, 64, 32, 32, 1, 1, 1, VALID, 128, float); +// BM_CONV_2D(1, 64, 33, 31, 1, 1, 1, VALID, 128, float); // Test bad +// alignments +// BM_CONV_2D(1, 3, 512, 512, 1, 1, 1, VALID, 3, float); +// BM_CONV_2D(1, 32, 112, 112, 1, 1, 1, VALID, 64, float); +// BM_CONV_2D(1, 64, 56, 56, 1, 1, 1, VALID, 128, float); +// BM_CONV_2D(1, 256, 28, 28, 1, 1, 1, VALID, 256, float); +// BM_CONV_2D(1, 1024, 7, 7, 1, 1, 1, VALID, 1024, float); +// BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 128, float); +// BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, VALID, 128, float); +// BM_CONV_2D(1, 3, 512, 512, 3, 3, 1, VALID, 3, float); +// BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, SAME, 128, float); +// BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, SAME, 128, float); +// BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, VALID, 128, float); +// BM_CONV_2D(1, 3, 512, 512, 3, 3, 2, VALID, 3, float); +// BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, VALID, 128, float); +// BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, SAME, 128, float); +// BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, SAME, 128, float); +// BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, VALID, 128, float); +// BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, VALID, 128, float); +// BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, SAME, 128, float); +// BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, SAME, 128, float); } // namespace mace diff --git a/mace/ops/conv_2d_test.cc b/mace/ops/conv_2d_test.cc index f137da6e..d4df0df8 100644 --- a/mace/ops/conv_2d_test.cc +++ b/mace/ops/conv_2d_test.cc @@ -10,7 +10,7 @@ using namespace mace; class Conv2dOpTest : public OpsTestBase {}; -template +template void TestSimple3x3VALID() { OpsTestNet net; OpDefBuilder("Conv2D", "Conv2dTest") @@ -42,10 +42,9 @@ void TestSimple3x3VALID() { auto expected = CreateTensor({1, 1, 1, 1}, {18.1f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); - } -template +template void TestSimple3x3SAME() { OpsTestNet net; OpDefBuilder("Conv2D", "Conv2dTest") @@ -86,7 +85,7 @@ TEST_F(Conv2dOpTest, NEONSimple) { } #endif -template +template void TestNHWCSimple3x3VALID() { OpsTestNet net; // Add input data @@ -100,9 +99,12 @@ void TestNHWCSimple3x3VALID() { net.AddInputFromArray("Bias", {1}, {0.1f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") @@ -117,7 +119,8 @@ void TestNHWCSimple3x3VALID() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("Conv2D", "Conv2dTest") @@ -138,7 +141,7 @@ void TestNHWCSimple3x3VALID() { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.01); } -template +template void TestNHWCSimple3x3SAME() { OpsTestNet net; @@ -153,9 +156,12 @@ void TestNHWCSimple3x3SAME() { net.AddInputFromArray("Bias", {1}, {0.1f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") @@ -170,7 +176,8 @@ void TestNHWCSimple3x3SAME() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("Conv2D", "Conv2dTest") @@ -204,7 +211,7 @@ TEST_F(Conv2dOpTest, OPENCLSimple) { TestNHWCSimple3x3SAME(); } -template +template void TestSimple3x3WithoutBias() { OpsTestNet net; OpDefBuilder("Conv2D", "Conv2dTest") @@ -234,14 +241,13 @@ void TestSimple3x3WithoutBias() { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } - #ifdef __ARM_NEON TEST_F(Conv2dOpTest, NEONWithouBias) { TestSimple3x3WithoutBias(); } #endif -template +template void TestNHWCSimple3x3WithoutBias() { OpsTestNet net; @@ -255,8 +261,10 @@ void TestNHWCSimple3x3WithoutBias() { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") @@ -270,7 +278,8 @@ void TestNHWCSimple3x3WithoutBias() { // Run net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("Conv2D", "Conv2dTest") .Input("Input") @@ -300,7 +309,7 @@ TEST_F(Conv2dOpTest, OPENCLWithoutBias) { TestNHWCSimple3x3WithoutBias(); } -template +template static void TestCombined3x3() { // Construct graph OpsTestNet net; @@ -335,17 +344,13 @@ static void TestCombined3x3() { 4.2f, 6.2f, 4.2f, 6.2f, 9.2f, 6.2f, 4.2f, 6.2f, 4.2f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); - } - #ifdef __ARM_NEON -TEST_F(Conv2dOpTest, NEONCombined) { - TestCombined3x3(); -} +TEST_F(Conv2dOpTest, NEONCombined) { TestCombined3x3(); } #endif -template +template static void TestNHWCCombined3x3() { // Construct graph OpsTestNet net; @@ -353,8 +358,8 @@ static void TestNHWCCombined3x3() { // Add input data net.AddInputFromArray( "Input", {1, 5, 5, 2}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); net.AddInputFromArray( "Filter", {3, 3, 2, 2}, {1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, 1.0f, 0.5f, @@ -363,9 +368,12 @@ static void TestNHWCCombined3x3() { net.AddInputFromArray("Bias", {2}, {0.1f, 0.2f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2DTest") .Input("InputImage") @@ -380,7 +388,8 @@ static void TestNHWCCombined3x3() { // Run net.RunOp(D); - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("Conv2D", "Conv2DTest") .Input("Input") @@ -394,16 +403,13 @@ static void TestNHWCCombined3x3() { .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); - } // Check auto expected = CreateTensor( - {1, 3, 3, 2}, {8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f, - 12.1f, 6.2f, 18.1f, 9.2f, 12.1f, 6.2f, - 8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f}); + {1, 3, 3, 2}, {8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 18.1f, + 9.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.01); - } TEST_F(Conv2dOpTest, CPUStride2) { @@ -414,7 +420,7 @@ TEST_F(Conv2dOpTest, OPENCLStride2) { TestNHWCCombined3x3(); } -template +template void TestConv1x1() { // Construct graph OpsTestNet net; @@ -435,9 +441,12 @@ void TestConv1x1() { net.AddInputFromArray("Bias", {2}, {0.1f, 0.2f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2DTest") .Input("InputImage") @@ -451,7 +460,8 @@ void TestConv1x1() { // Run net.RunOp(D); - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("Conv2D", "Conv2DTest") .Input("Input") @@ -479,15 +489,11 @@ void TestConv1x1() { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } -TEST_F(Conv2dOpTest, CPUConv1x1) { - TestConv1x1(); -} +TEST_F(Conv2dOpTest, CPUConv1x1) { TestConv1x1(); } -TEST_F(Conv2dOpTest, OPENCLConv1x1) { - TestConv1x1(); -} +TEST_F(Conv2dOpTest, OPENCLConv1x1) { TestConv1x1(); } -template +template static void TestComplexConvNxNS12(const std::vector &shape) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, @@ -526,9 +532,12 @@ static void TestComplexConvNxNS12(const std::vector &shape) { expected.Copy(*net.GetOutput("Output")); // run on gpu - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") @@ -543,7 +552,8 @@ static void TestComplexConvNxNS12(const std::vector &shape) { // Run on device net.RunOp(D); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.001); }; @@ -592,15 +602,20 @@ static void TestHalfComplexConvNxNS12(const std::vector &input_shape, .Finalize(net.NewOperatorDef()); std::vector float_input_data; - GenerateRandomRealTypeData({batch, height, width, input_channels}, float_input_data); + GenerateRandomRealTypeData({batch, height, width, input_channels}, + float_input_data); std::vector float_filter_data; - GenerateRandomRealTypeData({kernel_h, kernel_w, input_channels, output_channels}, float_filter_data); + GenerateRandomRealTypeData( + {kernel_h, kernel_w, input_channels, output_channels}, + float_filter_data); std::vector float_bias_data; GenerateRandomRealTypeData({output_channels}, float_bias_data); // Add input data - net.AddInputFromArray("Input", {batch, height, width, input_channels}, float_input_data); net.AddInputFromArray( - "Filter", {kernel_h, kernel_w, input_channels, output_channels}, float_filter_data); + "Input", {batch, height, width, input_channels}, float_input_data); + net.AddInputFromArray( + "Filter", {kernel_h, kernel_w, input_channels, output_channels}, + float_filter_data); net.AddInputFromArray("Bias", {output_channels}, float_bias_data); // run on cpu @@ -610,9 +625,12 @@ static void TestHalfComplexConvNxNS12(const std::vector &input_shape, expected.Copy(*net.GetOutput("Output")); // run on gpu - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") @@ -627,7 +645,8 @@ static void TestHalfComplexConvNxNS12(const std::vector &input_shape, // Run on device net.RunOp(D); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.5); }; diff --git a/mace/ops/core_test.cc b/mace/ops/core_test.cc index 8761d04f..206c4d6c 100644 --- a/mace/ops/core_test.cc +++ b/mace/ops/core_test.cc @@ -7,7 +7,6 @@ namespace mace { TEST(CoreTest, INIT_MODE) { - std::vector op_defs; Workspace ws; @@ -18,10 +17,11 @@ TEST(CoreTest, INIT_MODE) { .Output("B2IOutput") .AddIntArg("buffer_type", kernels::BufferType::FILTER) .AddIntArg("mode", static_cast(NetMode::INIT)) - .Finalize(&op_defs[op_defs.size()-1]); + .Finalize(&op_defs[op_defs.size() - 1]); Tensor *input = - ws.CreateTensor("Input", GetDeviceAllocator(DeviceType::OPENCL), DataTypeToEnum::v()); + ws.CreateTensor("Input", GetDeviceAllocator(DeviceType::OPENCL), + DataTypeToEnum::v()); input->Resize({1, 3, 3, 3}); { Tensor::MappingGuard input_mapper(input); @@ -34,23 +34,26 @@ TEST(CoreTest, INIT_MODE) { .Input("B2IOutput") .Output("Output") .AddIntArg("buffer_type", kernels::BufferType::FILTER) - .Finalize(&op_defs[op_defs.size()-1]); + .Finalize(&op_defs[op_defs.size() - 1]); NetDef net_def; for (auto &op_def : op_defs) { net_def.add_op()->CopyFrom(op_def); } - auto net = CreateNet(net_def, &ws, DeviceType::OPENCL, NetMode::INIT); + std::shared_ptr op_registry(new OperatorRegistry()); + auto net = + CreateNet(op_registry, net_def, &ws, DeviceType::OPENCL, NetMode::INIT); net->Run(); EXPECT_TRUE(ws.GetTensor("B2IOutput") != nullptr); EXPECT_TRUE(ws.GetTensor("Output") == nullptr); - net = CreateNet(net_def, &ws, DeviceType::OPENCL); + net = CreateNet(op_registry, net_def, &ws, DeviceType::OPENCL); net->Run(); EXPECT_TRUE(ws.GetTensor("Output") != nullptr); - ExpectTensorNear(*ws.GetTensor("Input"), *ws.GetTensor("Output"), 1e-5); + ExpectTensorNear(*ws.GetTensor("Input"), *ws.GetTensor("Output"), + 1e-5); } -} // namespace mace +} // namespace mace diff --git a/mace/ops/depthwise_conv2d.cc b/mace/ops/depthwise_conv2d.cc index ed13d099..4e99a378 100644 --- a/mace/ops/depthwise_conv2d.cc +++ b/mace/ops/depthwise_conv2d.cc @@ -6,21 +6,26 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("DepthwiseConv2d") - .TypeConstraint("T") - .Build(), - DepthwiseConv2dOp); +void Register_DepthwiseConv2d(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + DepthwiseConv2dOp); #if MACE_ENABLE_NEON -REGISTER_NEON_OPERATOR(OpKeyBuilder("DepthwiseConv2d") - .TypeConstraint("T") - .Build(), - DepthwiseConv2dOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d") + .Device(DeviceType::NEON) + .TypeConstraint("T") + .Build(), + DepthwiseConv2dOp); #endif // MACE_ENABLE_NEON -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("DepthwiseConv2d") - .TypeConstraint("T") - .Build(), - DepthwiseConv2dOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + DepthwiseConv2dOp); +} } // namespace mace diff --git a/mace/ops/depthwise_conv2d_test.cc b/mace/ops/depthwise_conv2d_test.cc index b3fbdeb2..e2f2872d 100644 --- a/mace/ops/depthwise_conv2d_test.cc +++ b/mace/ops/depthwise_conv2d_test.cc @@ -26,7 +26,7 @@ void SimpleValidTest() { // Add input data net.AddInputFromArray("Input", {1, 2, 2, 3}, - {1, 3, 5, 7, 9, 11, 2, 4, 6, 8, 10, 12}); + {1, 3, 5, 7, 9, 11, 2, 4, 6, 8, 10, 12}); net.AddInputFromArray( "Filter", {2, 2, 2, 2}, {1.0f, 5.0f, 9.0f, 13.0f, 2.0f, 6.0f, 10.0f, 14.0f, 3.0f, 7.0f, 11.0f, @@ -41,12 +41,9 @@ void SimpleValidTest() { {196.1f, 252.1f, 216.2f, 280.2f, 272.3f, 344.3f, 296.4f, 376.4f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); - } -TEST_F(DepthwiseConv2dOpTest, SimpleCPU) { - SimpleValidTest(); -} +TEST_F(DepthwiseConv2dOpTest, SimpleCPU) { SimpleValidTest(); } template void TestNxNS12(const index_t height, const index_t width) { @@ -72,8 +69,10 @@ void TestNxNS12(const index_t height, const index_t width) { .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, input_channels, height, width}); - net.AddRandomInput("Filter", {multiplier, input_channels, kernel_h, kernel_w}); + net.AddRandomInput("Input", + {batch, input_channels, height, width}); + net.AddRandomInput( + "Filter", {multiplier, input_channels, kernel_h, kernel_w}); net.AddRandomInput("Bias", {multiplier * input_channels}); // Run on device net.RunOp(D); @@ -93,7 +92,6 @@ void TestNxNS12(const index_t height, const index_t width) { func(kernel_size, kernel_size, stride, stride, SAME); } } - } #if __ARM_NEON diff --git a/mace/ops/depthwise_conv_2d_benchmark.cc b/mace/ops/depthwise_conv_2d_benchmark.cc index b8b277c0..8b4a5776 100644 --- a/mace/ops/depthwise_conv_2d_benchmark.cc +++ b/mace/ops/depthwise_conv_2d_benchmark.cc @@ -38,8 +38,8 @@ static void DepthwiseConv2d(int iters, // Add input data net.AddRandomInput("Input", {batch, channels, height, width}); net.AddRandomInput("Filter", - {output_channels, channels, kernel_h, kernel_w}); - net.AddRandomInput("Bias", {output_channels*channels}); + {output_channels, channels, kernel_h, kernel_w}); + net.AddRandomInput("Bias", {output_channels * channels}); // Warm-up for (int i = 0; i < 5; ++i) { @@ -54,23 +54,22 @@ static void DepthwiseConv2d(int iters, net.Sync(); } -#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, \ - DEVICE) \ - static void \ +#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, \ + DEVICE) \ + static void \ BM_DEPTHWISE_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ - DepthwiseConv2d(iters, N, C, H, W, KH, KW, STRIDE, \ - mace::Padding::P, OC); \ - } \ - BENCHMARK( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::ItemsProcessed(tot); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + DepthwiseConv2d(iters, N, C, H, W, KH, KW, STRIDE, \ + mace::Padding::P, OC); \ + } \ + BENCHMARK( \ BM_DEPTHWISE_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE) -#define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \ - BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \ - BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, NEON);\ +#define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \ + BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \ BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, OPENCL); BM_DEPTHWISE_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 2, float); diff --git a/mace/ops/fused_conv_2d.cc b/mace/ops/fused_conv_2d.cc index 6e6b0172..fd17a12a 100644 --- a/mace/ops/fused_conv_2d.cc +++ b/mace/ops/fused_conv_2d.cc @@ -6,25 +6,30 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("FusedConv2D") - .TypeConstraint("T") - .Build(), - FusedConv2dOp); - -REGISTER_CPU_OPERATOR(OpKeyBuilder("FusedConv2D") - .TypeConstraint("T") - .Build(), - FusedConv2dOp); - - -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("FusedConv2D") - .TypeConstraint("T") - .Build(), - FusedConv2dOp); - -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("FusedConv2D") - .TypeConstraint("T") - .Build(), - FusedConv2dOp); +void Register_FusedConv2D(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + FusedConv2dOp); + + REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + FusedConv2dOp); + + REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + FusedConv2dOp); + + REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + FusedConv2dOp); +} } // namespace mace diff --git a/mace/ops/fused_conv_2d_test.cc b/mace/ops/fused_conv_2d_test.cc index f1effb3e..eef3b1a7 100644 --- a/mace/ops/fused_conv_2d_test.cc +++ b/mace/ops/fused_conv_2d_test.cc @@ -9,7 +9,7 @@ using namespace mace; class FusedConv2dOpTest : public OpsTestBase {}; -template +template void TestNHWCSimple3x3VALID() { OpsTestNet net; // Add input data @@ -23,9 +23,12 @@ void TestNHWCSimple3x3VALID() { net.AddInputFromArray("Bias", {1}, {-0.1f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("InputImage") .Input("FilterImage") @@ -40,7 +43,8 @@ void TestNHWCSimple3x3VALID() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("FusedConv2D", "FusedConv2dTest") @@ -61,7 +65,7 @@ void TestNHWCSimple3x3VALID() { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.01); } -template +template void TestNHWCSimple3x3SAME() { OpsTestNet net; @@ -76,9 +80,12 @@ void TestNHWCSimple3x3SAME() { net.AddInputFromArray("Bias", {1}, {-0.1f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("InputImage") .Input("FilterImage") @@ -93,7 +100,8 @@ void TestNHWCSimple3x3SAME() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("FusedConv2D", "FusedConv2dTest") @@ -111,8 +119,7 @@ void TestNHWCSimple3x3SAME() { } auto expected = CreateTensor( - {1, 3, 3, 1}, - {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}); + {1, 3, 3, 1}, {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.01); } @@ -127,7 +134,7 @@ TEST_F(FusedConv2dOpTest, OPENCLSimple) { TestNHWCSimple3x3SAME(); } -template +template void TestNHWCSimple3x3WithoutBias() { OpsTestNet net; @@ -141,8 +148,10 @@ void TestNHWCSimple3x3WithoutBias() { 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("InputImage") @@ -156,7 +165,8 @@ void TestNHWCSimple3x3WithoutBias() { // Run net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("Input") @@ -186,7 +196,7 @@ TEST_F(FusedConv2dOpTest, OPENCLWithoutBias) { TestNHWCSimple3x3WithoutBias(); } -template +template void TestConv1x1() { // Construct graph OpsTestNet net; @@ -207,9 +217,12 @@ void TestConv1x1() { net.AddInputFromArray("Bias", {2}, {0.1f, 0.2f}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("InputImage") @@ -223,7 +236,8 @@ void TestConv1x1() { // Run net.RunOp(D); - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("Input") @@ -251,15 +265,11 @@ void TestConv1x1() { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } -TEST_F(FusedConv2dOpTest, CPUConv1x1) { - TestConv1x1(); -} +TEST_F(FusedConv2dOpTest, CPUConv1x1) { TestConv1x1(); } -TEST_F(FusedConv2dOpTest, OPENCLConv1x1) { - TestConv1x1(); -} +TEST_F(FusedConv2dOpTest, OPENCLConv1x1) { TestConv1x1(); } -template +template static void TestComplexConvNxNS12(const std::vector &shape) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, @@ -298,9 +308,12 @@ static void TestComplexConvNxNS12(const std::vector &shape) { expected.Copy(*net.GetOutput("Output")); // run on gpu - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("InputImage") @@ -315,7 +328,8 @@ static void TestComplexConvNxNS12(const std::vector &shape) { // Run on device net.RunOp(D); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.001); }; @@ -331,7 +345,7 @@ TEST_F(FusedConv2dOpTest, OPENCLUnalignedConvNxNS12) { TestComplexConvNxNS12({107, 113, 5, 7}); } -template +template static void TestHalfComplexConvNxNS12(const std::vector &shape) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, @@ -357,15 +371,20 @@ static void TestHalfComplexConvNxNS12(const std::vector &shape) { .Finalize(net.NewOperatorDef()); std::vector float_input_data; - GenerateRandomRealTypeData({batch, height, width, input_channels}, float_input_data); + GenerateRandomRealTypeData({batch, height, width, input_channels}, + float_input_data); std::vector float_filter_data; - GenerateRandomRealTypeData({kernel_h, kernel_w, input_channels, output_channels}, float_filter_data); + GenerateRandomRealTypeData( + {kernel_h, kernel_w, input_channels, output_channels}, + float_filter_data); std::vector float_bias_data; GenerateRandomRealTypeData({output_channels}, float_bias_data); // Add input data - net.AddInputFromArray("Input", {batch, height, width, input_channels}, float_input_data); net.AddInputFromArray( - "Filter", {kernel_h, kernel_w, input_channels, output_channels}, float_filter_data); + "Input", {batch, height, width, input_channels}, float_input_data); + net.AddInputFromArray( + "Filter", {kernel_h, kernel_w, input_channels, output_channels}, + float_filter_data); net.AddInputFromArray("Bias", {output_channels}, float_bias_data); // run on cpu @@ -375,9 +394,12 @@ static void TestHalfComplexConvNxNS12(const std::vector &shape) { expected.Copy(*net.GetOutput("Output")); // run on gpu - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("InputImage") @@ -392,7 +414,8 @@ static void TestHalfComplexConvNxNS12(const std::vector &shape) { // Run on device net.RunOp(D); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.2); }; @@ -408,7 +431,7 @@ TEST_F(FusedConv2dOpTest, OPENCLHalfAlignedConvNxNS12) { TestHalfComplexConvNxNS12({32, 32, 32, 64}); } -template +template static void TestGeneralConvNxNS12(const std::vector &image_shape, const std::vector &filter_shape) { testing::internal::LogToStderr(); @@ -449,9 +472,12 @@ static void TestGeneralConvNxNS12(const std::vector &image_shape, expected.Copy(*net.GetOutput("Output")); // run on gpu - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); - BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); - BufferToImage(net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); + BufferToImage(net, "Filter", "FilterImage", + kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("FusedConv2D", "FusedConv2dTest") .Input("InputImage") @@ -466,7 +492,8 @@ static void TestGeneralConvNxNS12(const std::vector &image_shape, // Run on device net.RunOp(D); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.001); }; @@ -477,13 +504,11 @@ static void TestGeneralConvNxNS12(const std::vector &image_shape, } TEST_F(FusedConv2dOpTest, OPENCL7X7ConvNxNS12) { - TestGeneralConvNxNS12({32, 32}, - {7, 7, 3, 64}); + TestGeneralConvNxNS12({32, 32}, {7, 7, 3, 64}); } TEST_F(FusedConv2dOpTest, OPENCL15X1ConvNxNS12) { - TestGeneralConvNxNS12({40, 40}, - {15, 1, 32, 64}); + TestGeneralConvNxNS12({40, 40}, {15, 1, 32, 64}); } template diff --git a/mace/ops/global_avg_pooling.cc b/mace/ops/global_avg_pooling.cc index f495c712..65fd7f43 100644 --- a/mace/ops/global_avg_pooling.cc +++ b/mace/ops/global_avg_pooling.cc @@ -6,16 +6,20 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("GlobalAvgPooling") - .TypeConstraint("T") - .Build(), - GlobalAvgPoolingOp); +void Register_GlobalAvgPooling(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("GlobalAvgPooling") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + GlobalAvgPoolingOp); #if MACE_ENABLE_NEON -REGISTER_NEON_OPERATOR(OpKeyBuilder("GlobalAvgPooling") - .TypeConstraint("T") - .Build(), - GlobalAvgPoolingOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("GlobalAvgPooling") + .Device(DeviceType::NEON) + .TypeConstraint("T") + .Build(), + GlobalAvgPoolingOp); #endif // MACE_ENABLE_NEON +} } // namespace mace diff --git a/mace/ops/global_avg_pooling_benchmark.cc b/mace/ops/global_avg_pooling_benchmark.cc index 3638243f..523ea924 100644 --- a/mace/ops/global_avg_pooling_benchmark.cc +++ b/mace/ops/global_avg_pooling_benchmark.cc @@ -22,7 +22,8 @@ static void GlobalAvgPooling( .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, channels, height, width}); + net.AddRandomInput("Input", + {batch, channels, height, width}); // Warm-up for (int i = 0; i < 5; ++i) { diff --git a/mace/ops/image_to_buffer.cc b/mace/ops/image_to_buffer.cc index bcf8b997..88835145 100644 --- a/mace/ops/image_to_buffer.cc +++ b/mace/ops/image_to_buffer.cc @@ -6,14 +6,18 @@ namespace mace { -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ImageToBuffer") - .TypeConstraint("T") - .Build(), - ImageToBufferOp); +void Register_ImageToBuffer(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("ImageToBuffer") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + ImageToBufferOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ImageToBuffer") - .TypeConstraint("T") - .Build(), - ImageToBufferOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("ImageToBuffer") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + ImageToBufferOp); +} } // namespace mace diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index e9f2fd04..b9fd14c9 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -10,9 +10,9 @@ #include "gtest/gtest.h" #include "mace/core/common.h" #include "mace/core/net.h" +#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/tensor.h" #include "mace/core/workspace.h" -#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/kernels/opencl/helper.h" #include "mace/utils/utils.h" @@ -56,7 +56,8 @@ class OpDefBuilder { return *this; } - OpDefBuilder AddIntsArg(const std::string &name, const std::vector &values) { + OpDefBuilder AddIntsArg(const std::string &name, + const std::vector &values) { auto arg = op_def_.add_arg(); arg->set_name(name); for (auto value : values) { @@ -65,7 +66,8 @@ class OpDefBuilder { return *this; } - OpDefBuilder AddFloatsArg(const std::string &name, const std::vector &values) { + OpDefBuilder AddFloatsArg(const std::string &name, + const std::vector &values) { auto arg = op_def_.add_arg(); arg->set_name(name); for (auto value : values) { @@ -75,7 +77,7 @@ class OpDefBuilder { } OpDefBuilder AddStringsArg(const std::string &name, - const std::vector &values) { + const std::vector &values) { auto arg = op_def_.add_arg(); arg->set_name(name); for (auto value : values) { @@ -94,7 +96,7 @@ class OpDefBuilder { class OpsTestNet { public: - OpsTestNet() {} + OpsTestNet() : op_registry_(new OperatorRegistry()) {}; template void AddInputFromArray(const std::string &name, @@ -135,10 +137,11 @@ class OpsTestNet { std::mt19937 gen(rd()); std::normal_distribution nd(0, 1); if (DataTypeToEnum::value == DT_HALF) { - std::generate(input_data, input_data + input->size(), - [&gen, &nd, positive] { - return half_float::half_cast(positive ? std::abs(nd(gen)) : nd(gen)); - }); + std::generate( + input_data, input_data + input->size(), [&gen, &nd, positive] { + return half_float::half_cast(positive ? std::abs(nd(gen)) + : nd(gen)); + }); } else { std::generate(input_data, input_data + input->size(), [&gen, &nd, positive] { @@ -160,7 +163,7 @@ class OpsTestNet { for (auto &op_def_ : op_defs_) { net_def.add_op()->CopyFrom(op_def_); } - net_ = CreateNet(net_def, &ws_, device); + net_ = CreateNet(op_registry_, net_def, &ws_, device); device_ = device; return net_->Run(); } @@ -182,6 +185,7 @@ class OpsTestNet { } public: + std::shared_ptr op_registry_; Workspace ws_; std::vector op_defs_; std::unique_ptr net_; @@ -211,7 +215,8 @@ void GenerateRandomRealTypeData(const std::vector &shape, res.resize(size); if (DataTypeToEnum::value == DT_HALF) { - std::generate(res.begin(), res.end(), [&gen, &nd] { return half_float::half_cast(nd(gen)); }); + std::generate(res.begin(), res.end(), + [&gen, &nd] { return half_float::half_cast(nd(gen)); }); } else { std::generate(res.begin(), res.end(), [&gen, &nd] { return nd(gen); }); } @@ -236,7 +241,8 @@ void GenerateRandomIntTypeData(const std::vector &shape, template unique_ptr CreateTensor(const std::vector &shape, const std::vector &data) { - unique_ptr res(new Tensor(GetDeviceAllocator(DeviceType::CPU), DataTypeToEnum::v())); + unique_ptr res( + new Tensor(GetDeviceAllocator(DeviceType::CPU), DataTypeToEnum::v())); res->Resize(shape); T *input_data = res->mutable_data(); memcpy(input_data, data.data(), data.size() * sizeof(T)); @@ -268,9 +274,9 @@ inline std::string ShapeToString(const Tensor &x) { template struct is_floating_point_type { - static const bool value = - std::is_same::value || std::is_same::value - || std::is_same::value; + static const bool value = std::is_same::value || + std::is_same::value || + std::is_same::value; }; template @@ -293,7 +299,9 @@ inline void AssertSameDims(const Tensor &x, const Tensor &y) { << "y.shape [ " << ShapeToString(y) << "]"; } -template ::value> +template ::value> struct Expector; // Partial specialization for float and double. @@ -343,7 +351,6 @@ struct Expector { } } } - }; template @@ -355,8 +362,8 @@ void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) { template void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) { - static_assert(is_floating_point_type::value - && is_floating_point_type::value, + static_assert(is_floating_point_type::value && + is_floating_point_type::value, "T is not a floating point type"); Expector::Near(x, y, abs_err); } diff --git a/mace/ops/pooling.cc b/mace/ops/pooling.cc index 3a467d12..d372f242 100644 --- a/mace/ops/pooling.cc +++ b/mace/ops/pooling.cc @@ -6,29 +6,36 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("Pooling") - .TypeConstraint("T") - .Build(), - PoolingOp); -REGISTER_CPU_OPERATOR(OpKeyBuilder("Pooling") - .TypeConstraint("T") - .Build(), - PoolingOp); +void Register_Pooling(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + PoolingOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + PoolingOp); #if MACE_ENABLE_NEON -REGISTER_NEON_OPERATOR(OpKeyBuilder("Pooling") - .TypeConstraint("T") - .Build(), - PoolingOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling") + .Device(DeviceType::NEON) + .TypeConstraint("T") + .Build(), + PoolingOp); #endif // MACE_ENABLE_NEON -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Pooling") - .TypeConstraint("T") - .Build(), - PoolingOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Pooling") - .TypeConstraint("T") - .Build(), - PoolingOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + PoolingOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + PoolingOp); +} } // namespace mace diff --git a/mace/ops/pooling_benchmark.cc b/mace/ops/pooling_benchmark.cc index 479563ec..2a6580d8 100644 --- a/mace/ops/pooling_benchmark.cc +++ b/mace/ops/pooling_benchmark.cc @@ -35,7 +35,8 @@ static void Pooling(int iters, .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("Input", {batch, channels, height, width}); + net.AddRandomInput("Input", + {batch, channels, height, width}); // Warm-up for (int i = 0; i < 5; ++i) { diff --git a/mace/ops/pooling_test.cc b/mace/ops/pooling_test.cc index 1e1a992a..c802c126 100644 --- a/mace/ops/pooling_test.cc +++ b/mace/ops/pooling_test.cc @@ -29,7 +29,7 @@ TEST_F(PoolingOpTest, MAX_VALID) { // Add input data net.AddInputFromArray( "Input", {1, 4, 4, 2}, - {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); // Run @@ -42,7 +42,6 @@ TEST_F(PoolingOpTest, MAX_VALID) { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } - TEST_F(PoolingOpTest, MAX_SAME) { // Construct graph OpsTestNet net; @@ -122,7 +121,7 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } -template +template static void SimpleMaxPooling3S2() { // Construct graph OpsTestNet net; @@ -130,11 +129,12 @@ static void SimpleMaxPooling3S2() { // Add input data net.AddInputFromArray( "Input", {1, 3, 9, 1}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("Pooling", "PoolingTest") .Input("InputImage") .Output("OutputImage") @@ -145,7 +145,8 @@ static void SimpleMaxPooling3S2() { .AddIntsArg("dilations", {1, 1}) .Finalize(net.NewOperatorDef()); net.RunOp(D); - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { // Run OpDefBuilder("Pooling", "PoolingTest") @@ -166,15 +167,13 @@ static void SimpleMaxPooling3S2() { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } -TEST_F(PoolingOpTest, CPUSimpleMaxPooling3S2) { - SimpleMaxPooling3S2(); -} +TEST_F(PoolingOpTest, CPUSimpleMaxPooling3S2) { SimpleMaxPooling3S2(); } TEST_F(PoolingOpTest, OPENCLSimpleMaxPooling3S2) { SimpleMaxPooling3S2(); } -template +template static void MaxPooling3S2(const std::vector &input_shape, const std::vector strides, Padding padding) { @@ -211,13 +210,14 @@ static void MaxPooling3S2(const std::vector &input_shape, .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); net.RunOp(D); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.001); } // TODO(chenghui) : there is a bug. -//TEST_F(PoolingOpTest, NEONAlignedMaxPooling3S2) { +// TEST_F(PoolingOpTest, NEONAlignedMaxPooling3S2) { // AlignedMaxPooling3S2(Padding::VALID); // AlignedMaxPooling3S2(Padding::SAME); //} @@ -259,7 +259,7 @@ TEST_F(PoolingOpTest, AVG_VALID) { // Add input data net.AddInputFromArray( "Input", {1, 4, 4, 2}, - {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); // Run @@ -272,7 +272,7 @@ TEST_F(PoolingOpTest, AVG_VALID) { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } -template +template static void SimpleAvgPoolingTest() { // Construct graph OpsTestNet net; @@ -282,7 +282,8 @@ static void SimpleAvgPoolingTest() { "Input", {1, 2, 8, 1}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("Pooling", "PoolingTest") .Input("InputImage") .Output("OutputImage") @@ -294,7 +295,8 @@ static void SimpleAvgPoolingTest() { .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); // Check auto expected = CreateTensor({1, 1, 4, 1}, {4.5, 6.5, 8.5, 10.5}); @@ -306,11 +308,11 @@ TEST_F(PoolingOpTest, OPENCLSimpleAvgPooling) { SimpleAvgPoolingTest(); } -template +template static void AvgPoolingTest(const std::vector &shape, - const std::vector &kernels, - const std::vector &strides, - Padding padding) { + const std::vector &kernels, + const std::vector &strides, + Padding padding) { // Construct graph OpsTestNet net; OpDefBuilder("Pooling", "PoolingTest") @@ -343,38 +345,49 @@ static void AvgPoolingTest(const std::vector &shape, .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); net.RunOp(D); - ImageToBuffer(net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "OPENCLOutput", + kernels::BufferType::IN_OUT); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.01); } TEST_F(PoolingOpTest, OPENCLAlignedAvgPooling) { - AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::VALID); - AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::SAME); + AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, + Padding::VALID); + AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, + Padding::SAME); } TEST_F(PoolingOpTest, OPENCLHalfAlignedAvgPooling) { - AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::VALID); + AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, + Padding::VALID); AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::SAME); } TEST_F(PoolingOpTest, OPENCLAlignedLargeKernelAvgPooling) { - AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, Padding::VALID); - AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, Padding::SAME); + AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, + Padding::VALID); + AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, + Padding::SAME); } TEST_F(PoolingOpTest, OPENCLHalfAlignedLargeKernelAvgPooling) { - AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, Padding::VALID); - AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, Padding::SAME); + AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, + Padding::VALID); + AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, + Padding::SAME); } TEST_F(PoolingOpTest, OPENCLUnAlignedAvgPooling) { - AvgPoolingTest({3, 31, 37, 128}, {2, 2}, {2, 2}, Padding::VALID); - AvgPoolingTest({3, 31, 37, 128}, {2, 2}, {2, 2}, Padding::SAME); + AvgPoolingTest({3, 31, 37, 128}, {2, 2}, {2, 2}, + Padding::VALID); + AvgPoolingTest({3, 31, 37, 128}, {2, 2}, {2, 2}, + Padding::SAME); } TEST_F(PoolingOpTest, OPENCLUnAlignedLargeKernelAvgPooling) { - AvgPoolingTest({3, 31, 37, 128}, {8, 8}, {8, 8}, Padding::VALID); - AvgPoolingTest({3, 31, 37, 128}, {8, 8}, {8, 8}, Padding::SAME); + AvgPoolingTest({3, 31, 37, 128}, {8, 8}, {8, 8}, + Padding::VALID); + AvgPoolingTest({3, 31, 37, 128}, {8, 8}, {8, 8}, + Padding::SAME); } - diff --git a/mace/ops/relu.cc b/mace/ops/relu.cc index 0197e65c..f9f7b3be 100644 --- a/mace/ops/relu.cc +++ b/mace/ops/relu.cc @@ -6,26 +6,32 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("Relu") - .TypeConstraint("T") - .Build(), - ReluOp); +void Register_Relu(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Relu") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + ReluOp); #if MACE_ENABLE_NEON -REGISTER_NEON_OPERATOR(OpKeyBuilder("Relu") - .TypeConstraint("T") - .Build(), - ReluOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Relu") + .Device(DeviceType::NEON) + .TypeConstraint("T") + .Build(), + ReluOp); #endif // MACE_ENABLE_NEON -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Relu") - .TypeConstraint("T") - .Build(), - ReluOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Relu") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + ReluOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("Relu") - .TypeConstraint("T") - .Build(), - ReluOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("Relu") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + ReluOp); +} } // namespace mace diff --git a/mace/ops/relu_benchmark.cc b/mace/ops/relu_benchmark.cc index c68009c9..1b5d3624 100644 --- a/mace/ops/relu_benchmark.cc +++ b/mace/ops/relu_benchmark.cc @@ -19,7 +19,8 @@ static void ReluBenchmark( net.AddRandomInput("Input", {batch, height, width, channels}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("Relu", "ReluBM") .Input("InputImage") @@ -54,9 +55,9 @@ static void ReluBenchmark( } \ BENCHMARK(BM_RELU_##N##C##H##W##_##TYPE##_##DEVICE) -#define BM_RELU(N, C, H, W, TYPE) \ - BM_RELU_MACRO(N, C, H, W, TYPE, CPU); \ - BM_RELU_MACRO(N, C, H, W, TYPE, NEON);\ +#define BM_RELU(N, C, H, W, TYPE) \ + BM_RELU_MACRO(N, C, H, W, TYPE, CPU); \ + BM_RELU_MACRO(N, C, H, W, TYPE, NEON); \ BM_RELU_MACRO(N, C, H, W, TYPE, OPENCL); BM_RELU(1, 1, 512, 512, float); diff --git a/mace/ops/relu_test.cc b/mace/ops/relu_test.cc index e74b927e..e2a59a23 100644 --- a/mace/ops/relu_test.cc +++ b/mace/ops/relu_test.cc @@ -14,13 +14,13 @@ void TestSimple() { OpsTestNet net; // Add input data - net.AddInputFromArray("Input", - {2, 2, 2, 2}, - {-7, 7, -6, 6, -5, 5, -4, 4, - -3, 3, -2, 2, -1, 1, 0, 0}); + net.AddInputFromArray( + "Input", {2, 2, 2, 2}, + {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("Relu", "ReluTest") .Input("InputImage") @@ -31,7 +31,8 @@ void TestSimple() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("Relu", "ReluTest") .Input("Input") @@ -42,38 +43,30 @@ void TestSimple() { net.RunOp(D); } - auto expected = CreateTensor({2, 2, 2, 2}, - {0, 7, 0, 6, 0, 5, 0, 4, - 0, 3, 0, 2, 0, 1, 0, 0}); + auto expected = CreateTensor( + {2, 2, 2, 2}, {0, 7, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } -TEST_F(ReluOpTest, CPUSimple) { - TestSimple(); -} +TEST_F(ReluOpTest, CPUSimple) { TestSimple(); } #if __ARM_NEON -TEST_F(ReluOpTest, NEONSimple) { - TestSimple(); -} +TEST_F(ReluOpTest, NEONSimple) { TestSimple(); } #endif -TEST_F(ReluOpTest, OPENCLSimple) { - TestSimple(); -} +TEST_F(ReluOpTest, OPENCLSimple) { TestSimple(); } template void TestUnalignedSimple() { OpsTestNet net; // Add input data - net.AddInputFromArray("Input", - {1, 3, 2, 1}, - {-7, 7, -6, 6, -5, 5}); + net.AddInputFromArray("Input", {1, 3, 2, 1}, {-7, 7, -6, 6, -5, 5}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("Relu", "ReluTest") .Input("InputImage") @@ -84,7 +77,8 @@ void TestUnalignedSimple() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("Relu", "ReluTest") .Input("Input") @@ -95,8 +89,7 @@ void TestUnalignedSimple() { net.RunOp(D); } - auto expected = CreateTensor({1, 3, 2, 1}, - {0, 7, 0, 6, 0, 5}); + auto expected = CreateTensor({1, 3, 2, 1}, {0, 7, 0, 6, 0, 5}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } @@ -120,13 +113,13 @@ void TestSimpleReluX() { OpsTestNet net; // Add input data - net.AddInputFromArray("Input", - {2, 2, 2, 2}, - {-7, 7, -6, 6, -5, 5, -4, 4, - -3, 3, -2, 2, -1, 1, 0, 0}); + net.AddInputFromArray( + "Input", {2, 2, 2, 2}, + {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("Relu", "ReluTest") .Input("InputImage") @@ -138,7 +131,8 @@ void TestSimpleReluX() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("Relu", "ReluTest") .Input("Input") @@ -150,38 +144,31 @@ void TestSimpleReluX() { net.RunOp(D); } - auto expected = CreateTensor({2, 2, 2, 2}, - {0, 6, 0, 6, 0, 5, 0, 4, - 0, 3, 0, 2, 0, 1, 0, 0}); + auto expected = CreateTensor( + {2, 2, 2, 2}, {0, 6, 0, 6, 0, 5, 0, 4, 0, 3, 0, 2, 0, 1, 0, 0}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } -TEST_F(ReluOpTest, CPUSimpleReluX) { - TestSimpleReluX(); -} +TEST_F(ReluOpTest, CPUSimpleReluX) { TestSimpleReluX(); } #if __ARM_NEON -TEST_F(ReluOpTest, NEONSimpleReluX) { - TestSimpleReluX(); -} +TEST_F(ReluOpTest, NEONSimpleReluX) { TestSimpleReluX(); } #endif -TEST_F(ReluOpTest, OPENCLSimpleReluX) { - TestSimpleReluX(); -} +TEST_F(ReluOpTest, OPENCLSimpleReluX) { TestSimpleReluX(); } template void TestUnalignedSimpleReluX() { OpsTestNet net; // Add input data - net.AddInputFromArray("Input", - {1, 1, 7, 1}, + net.AddInputFromArray("Input", {1, 1, 7, 1}, {-7, 7, -6, 6, -5, 5, -4}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("Relu", "ReluTest") .Input("InputImage") @@ -193,7 +180,8 @@ void TestUnalignedSimpleReluX() { net.RunOp(D); // Transfer output - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); } else { OpDefBuilder("Relu", "ReluTest") .Input("Input") @@ -205,8 +193,7 @@ void TestUnalignedSimpleReluX() { net.RunOp(D); } - auto expected = CreateTensor({1, 1, 7, 1}, - {0, 6, 0, 6, 0, 5, 0}); + auto expected = CreateTensor({1, 1, 7, 1}, {0, 6, 0, 6, 0, 5, 0}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } diff --git a/mace/ops/resize_bilinear.cc b/mace/ops/resize_bilinear.cc index 89f460fe..b44f462b 100644 --- a/mace/ops/resize_bilinear.cc +++ b/mace/ops/resize_bilinear.cc @@ -6,26 +6,32 @@ namespace mace { -REGISTER_CPU_OPERATOR(OpKeyBuilder("ResizeBilinear") - .TypeConstraint("T") - .Build(), - ResizeBilinearOp); +void Register_ResizeBilinear(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), + ResizeBilinearOp); #if MACE_ENABLE_NEON -REGISTER_NEON_OPERATOR(OpKeyBuilder("ResizeBilinear") - .TypeConstraint("T") - .Build(), - ResizeBilinearOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear") + .Device(DeviceType::NEON) + .TypeConstraint("T") + .Build(), + ResizeBilinearOp); #endif // MACE_ENABLE_NEON -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ResizeBilinear") - .TypeConstraint("T") - .Build(), - ResizeBilinearOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + ResizeBilinearOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ResizeBilinear") - .TypeConstraint("T") - .Build(), - ResizeBilinearOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + ResizeBilinearOp); +} } // namespace mace diff --git a/mace/ops/resize_bilinear_benchmark.cc b/mace/ops/resize_bilinear_benchmark.cc index 9a51b03c..46b96123 100644 --- a/mace/ops/resize_bilinear_benchmark.cc +++ b/mace/ops/resize_bilinear_benchmark.cc @@ -26,22 +26,23 @@ static void ResizeBilinearBenchmark(int iters, net.AddInputFromArray("OutSize", {2}, {output_height, output_width}); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("ResizeBilinear", "ResizeBilinearBenchmark") - .Input("InputImage") - .Input("OutSize") - .Output("OutputImage") - .AddIntsArg("size", {output_height, output_width}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("OutSize") + .Output("OutputImage") + .AddIntsArg("size", {output_height, output_width}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); } else { OpDefBuilder("ResizeBilinear", "ResizeBilinearBenchmark") - .Input("Input") - .Input("OutSize") - .Output("Output") - .AddIntsArg("size", {output_height, output_width}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("Input") + .Input("OutSize") + .Output("Output") + .AddIntsArg("size", {output_height, output_width}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); } // Warm-up @@ -68,8 +69,8 @@ static void ResizeBilinearBenchmark(int iters, BENCHMARK( \ BM_RESIZE_BILINEAR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_##DEVICE) -#define BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1, TYPE) \ - BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, CPU); \ +#define BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1, TYPE) \ + BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, CPU); \ BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, OPENCL); // SNPE 835 GPU: 6870us diff --git a/mace/ops/resize_bilinear_test.cc b/mace/ops/resize_bilinear_test.cc index 8d7f2d55..06b715a0 100644 --- a/mace/ops/resize_bilinear_test.cc +++ b/mace/ops/resize_bilinear_test.cc @@ -80,29 +80,31 @@ void TestRandomResizeBilinear() { {batch, in_height, in_width, channels}); OpDefBuilder("ResizeBilinear", "ResizeBilinearTest") - .Input("Input") - .Output("Output") - .AddIntArg("align_corners", align_corners) - .AddIntsArg("size", {height, width}) - .Finalize(net.NewOperatorDef()); + .Input("Input") + .Output("Output") + .AddIntArg("align_corners", align_corners) + .AddIntsArg("size", {height, width}) + .Finalize(net.NewOperatorDef()); // Run on CPU net.RunOp(DeviceType::CPU); Tensor expected; expected.Copy(*net.GetOutput("Output")); if (D == DeviceType::OPENCL) { - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("ResizeBilinear", "ResizeBilinearTest") - .Input("InputImage") - .Output("OutputImage") - .AddIntArg("align_corners", align_corners) - .AddIntsArg("size", {height, width}) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Output("OutputImage") + .AddIntArg("align_corners", align_corners) + .AddIntsArg("size", {height, width}) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); - ImageToBuffer(net, "OutputImage", "DeviceOutput", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "DeviceOutput", + kernels::BufferType::IN_OUT); } else { // TODO support NEON } diff --git a/mace/ops/space_to_batch.cc b/mace/ops/space_to_batch.cc index 9b24f591..0e5c293d 100644 --- a/mace/ops/space_to_batch.cc +++ b/mace/ops/space_to_batch.cc @@ -6,13 +6,17 @@ namespace mace { -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("SpaceToBatchND") - .TypeConstraint("T") - .Build(), - SpaceToBatchNDOp); -REGISTER_OPENCL_OPERATOR(OpKeyBuilder("SpaceToBatchND") - .TypeConstraint("T") - .Build(), - SpaceToBatchNDOp); +void Register_SpaceToBatchND(OperatorRegistry *op_registry) { + REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToBatchND") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + SpaceToBatchNDOp); + REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToBatchND") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), + SpaceToBatchNDOp); +} } // namespace mace diff --git a/mace/ops/space_to_batch_benchmark.cc b/mace/ops/space_to_batch_benchmark.cc index 9afa88b9..a2fea8dc 100644 --- a/mace/ops/space_to_batch_benchmark.cc +++ b/mace/ops/space_to_batch_benchmark.cc @@ -15,7 +15,8 @@ static void BMSpaceToBatch( OpsTestNet net; net.AddRandomInput("Input", {batch, height, width, channels}); - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest") .Input("InputImage") .Output("OutputImage") @@ -36,17 +37,19 @@ static void BMSpaceToBatch( net.Sync(); } -#define BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, DEVICE) \ - static void BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ - BMSpaceToBatch(iters, N, H, W, C, SHAPE); \ - } \ - BENCHMARK(BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE) - -#define BM_SPACE_TO_BATCH(N, H, W, C, SHAPE, TYPE) \ +#define BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, DEVICE) \ + static void \ + BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::ItemsProcessed(tot); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + BMSpaceToBatch(iters, N, H, W, C, SHAPE); \ + } \ + BENCHMARK( \ + BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE) + +#define BM_SPACE_TO_BATCH(N, H, W, C, SHAPE, TYPE) \ BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, OPENCL); BM_SPACE_TO_BATCH(128, 16, 16, 128, 2, float); diff --git a/mace/ops/space_to_batch_test.cc b/mace/ops/space_to_batch_test.cc index 4c1dbbdc..bebbafef 100644 --- a/mace/ops/space_to_batch_test.cc +++ b/mace/ops/space_to_batch_test.cc @@ -2,23 +2,23 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // +#include #include "gtest/gtest.h" #include "mace/ops/ops_test_util.h" -#include using namespace mace; -template +template void RunSpaceToBatch(const std::vector &input_shape, const std::vector &input_data, const std::vector &block_shape_data, const std::vector &padding_data, const Tensor *expected) { OpsTestNet net; - net.AddInputFromArray( - "Input", input_shape, input_data); + net.AddInputFromArray("Input", input_shape, input_data); - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest") .Input("InputImage") .Output("OutputImage") @@ -29,12 +29,13 @@ void RunSpaceToBatch(const std::vector &input_shape, // Run net.RunOp(D); - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); // Check ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-8); } -template +template void RunBatchToSpace(const std::vector &input_shape, const std::vector &input_data, const std::vector &block_shape_data, @@ -42,10 +43,10 @@ void RunBatchToSpace(const std::vector &input_shape, const Tensor *expected) { OpsTestNet net; // Add input data - net.AddInputFromArray( - "Input", input_shape, input_data); + net.AddInputFromArray("Input", input_shape, input_data); - BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); + BufferToImage(net, "Input", "InputImage", + kernels::BufferType::IN_OUT); OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest") .Input("InputImage") .Output("OutputImage") @@ -56,33 +57,33 @@ void RunBatchToSpace(const std::vector &input_shape, // Run net.RunOp(D); - ImageToBuffer(net, "OutputImage", "Output", kernels::BufferType::IN_OUT); + ImageToBuffer(net, "OutputImage", "Output", + kernels::BufferType::IN_OUT); // Check ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-8); } -template +template void TestBidirectionalTransform(const std::vector &space_shape, const std::vector &space_data, const std::vector &block_data, const std::vector &padding_data, const std::vector &batch_shape, const std::vector &batch_data) { - - auto space_tensor = unique_ptr(new Tensor(GetDeviceAllocator(DeviceType::OPENCL), - DataTypeToEnum::v())); + auto space_tensor = unique_ptr(new Tensor( + GetDeviceAllocator(DeviceType::OPENCL), DataTypeToEnum::v())); space_tensor->Resize(space_shape); { Tensor::MappingGuard space_mapper(space_tensor.get()); T *space_ptr = space_tensor->mutable_data(); MACE_CHECK(static_cast(space_tensor->size()) == space_data.size()) - << "Space tensor size:" << space_tensor->size() - << ", space data size:" << space_data.size(); + << "Space tensor size:" << space_tensor->size() + << ", space data size:" << space_data.size(); memcpy(space_ptr, space_data.data(), space_data.size() * sizeof(T)); } - auto batch_tensor = unique_ptr(new Tensor(GetDeviceAllocator(DeviceType::OPENCL), - DataTypeToEnum::v())); + auto batch_tensor = unique_ptr(new Tensor( + GetDeviceAllocator(DeviceType::OPENCL), DataTypeToEnum::v())); batch_tensor->Resize(batch_shape); { Tensor::MappingGuard batch_mapper(batch_tensor.get()); @@ -91,113 +92,81 @@ void TestBidirectionalTransform(const std::vector &space_shape, memcpy(batch_ptr, batch_data.data(), batch_data.size() * sizeof(T)); } - RunSpaceToBatch(space_shape, space_data, - block_data, - padding_data, - batch_tensor.get()); + RunSpaceToBatch(space_shape, space_data, block_data, + padding_data, batch_tensor.get()); - RunBatchToSpace(batch_shape, batch_data, - block_data, - padding_data, - space_tensor.get()); + RunBatchToSpace(batch_shape, batch_data, block_data, + padding_data, space_tensor.get()); } TEST(SpaceToBatchTest, SmallData) { - TestBidirectionalTransform({1, 2, 2, 1}, - {1, 2, 3, 4}, - {2, 2}, - {0, 0, 0, 0}, - {4, 1, 1, 1}, - {1, 2, 3, 4} - ); + TestBidirectionalTransform({1, 2, 2, 1}, {1, 2, 3, 4}, {2, 2}, + {0, 0, 0, 0}, {4, 1, 1, 1}, {1, 2, 3, 4}); } TEST(SpaceToBatchTest, SmallDataWithOnePadding) { - TestBidirectionalTransform({1, 2, 2, 1}, - {1, 2, 3, 4}, - {3, 3}, - {1, 0, 1, 0}, - {9, 1, 1, 1}, - {0, 0, 0, 0, 1, 2, 0, 3, 4} - ); + TestBidirectionalTransform({1, 2, 2, 1}, {1, 2, 3, 4}, {3, 3}, + {1, 0, 1, 0}, {9, 1, 1, 1}, + {0, 0, 0, 0, 1, 2, 0, 3, 4}); } TEST(SpaceToBatchTest, SmallDataWithTwoPadding) { - TestBidirectionalTransform({1, 2, 2, 1}, - {1, 2, 3, 4}, - {2, 2}, - {1, 1, 1, 1}, - {4, 2, 2, 1}, - {0, 0, 0, 4, 0, 0, 3, 0, 0, 2, 0, 0, 1, 0, 0, 0} - ); + TestBidirectionalTransform( + {1, 2, 2, 1}, {1, 2, 3, 4}, {2, 2}, {1, 1, 1, 1}, {4, 2, 2, 1}, + {0, 0, 0, 4, 0, 0, 3, 0, 0, 2, 0, 0, 1, 0, 0, 0}); } TEST(SpaceToBatchTest, SmallDataWithLargeImage) { - TestBidirectionalTransform({1, 2, 10, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, - {2, 2}, - {0, 0, 0, 0}, - {4, 1, 5, 1}, - {1, 3, 5, 7, 9, - 2, 4, 6, 8, 10, - 11, 13, 15, 17, 19, - 12, 14, 16, 18, 20} - ); + TestBidirectionalTransform( + {1, 2, 10, 1}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, + {2, 2}, {0, 0, 0, 0}, {4, 1, 5, 1}, + {1, 3, 5, 7, 9, 2, 4, 6, 8, 10, 11, 13, 15, 17, 19, 12, 14, 16, 18, 20}); } TEST(SpaceToBatchTest, MultiChannelData) { - TestBidirectionalTransform({1, 2, 2, 3}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, - {2, 2}, - {0, 0, 0, 0}, - {4, 1, 1, 3}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12} - ); + TestBidirectionalTransform( + {1, 2, 2, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, {2, 2}, + {0, 0, 0, 0}, {4, 1, 1, 3}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}); } TEST(SpaceToBatchTest, LargerMultiChannelData) { - TestBidirectionalTransform({1, 4, 4, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, - {2, 2}, - {0, 0, 0, 0}, - {4, 2, 2, 1}, - {1, 3, 9, 11, 2, 4, 10, 12, 5, 7, 13, 15, 6, 8, 14, 16} - ); + TestBidirectionalTransform( + {1, 4, 4, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + {2, 2}, {0, 0, 0, 0}, {4, 2, 2, 1}, + {1, 3, 9, 11, 2, 4, 10, 12, 5, 7, 13, 15, 6, 8, 14, 16}); } TEST(SpaceToBatchTest, MultiBatchData) { - TestBidirectionalTransform({2, 2, 4, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, - {2, 2}, - {0, 0, 0, 0}, - {8, 1, 2, 1}, - {1, 3, 2, 4, 5, 7, 6, 8, 9, 11, 10, 12, 13, 15, 14, 16} - ); + TestBidirectionalTransform( + {2, 2, 4, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + {2, 2}, {0, 0, 0, 0}, {8, 1, 2, 1}, + {1, 3, 2, 4, 5, 7, 6, 8, 9, 11, 10, 12, 13, 15, 14, 16}); } TEST(SpaceToBatchTest, MultiBatchAndChannelData) { - TestBidirectionalTransform({2, 2, 4, 2}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, - 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, - {2, 2}, - {0, 0, 0, 0}, - {8, 1, 2, 2}, - {1, 2, 5, 6, 3, 4, 7, 8, 9, 10, 13, 14, 11, 12, 15, 16, - 17, 18, 21, 22, 19, 20, 23, 24, 25, 26, 29, 30, 27, 28, 31, 32} - ); + TestBidirectionalTransform( + {2, 2, 4, 2}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, + {2, 2}, {0, 0, 0, 0}, {8, 1, 2, 2}, + {1, 2, 5, 6, 3, 4, 7, 8, 9, 10, 13, 14, 11, 12, 15, 16, + 17, 18, 21, 22, 19, 20, 23, 24, 25, 26, 29, 30, 27, 28, 31, 32}); } -//TEST(SpaceTobatchTest, CompareTF) { +// TEST(SpaceTobatchTest, CompareTF) { // // const std::string space_file = "/data/local/tmp/test/input"; // const std::string batch_file = "/data/local/tmp/test/output"; // const std::vector space_shape = {1, 256, 256, 32}; -// const int space_size = std::accumulate(space_shape.begin(), space_shape.end(), 1, std::multiplies()); +// const int space_size = std::accumulate(space_shape.begin(), +// space_shape.end(), 1, std::multiplies()); // const std::vector batch_shape = {4, 130, 130, 32}; -// const int batch_size = std::accumulate(batch_shape.begin(), batch_shape.end(), 1, std::multiplies()); +// const int batch_size = std::accumulate(batch_shape.begin(), +// batch_shape.end(), 1, std::multiplies()); // -// auto space_tensor = unique_ptr(new Tensor(GetDeviceAllocator(DeviceType::OPENCL), +// auto space_tensor = unique_ptr(new +// Tensor(GetDeviceAllocator(DeviceType::OPENCL), // DataTypeToEnum::v())); // space_tensor->Resize(space_shape); // std::vector space_data(space_size, 0.0); @@ -216,7 +185,8 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) { // VLOG(0) << "open space file failed"; // } // -// auto batch_tensor = unique_ptr(new Tensor(GetDeviceAllocator(DeviceType::OPENCL), +// auto batch_tensor = unique_ptr(new +// Tensor(GetDeviceAllocator(DeviceType::OPENCL), // DataTypeToEnum::v())); // std::vector batch_data(batch_size, 0.0); // batch_tensor->Resize(batch_shape); @@ -231,7 +201,8 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) { // } // Tensor::MappingGuard batch_mapper(batch_tensor.get()); // float *batch_ptr = batch_tensor->mutable_data(); -// MACE_CHECK(static_cast(batch_tensor->size()) == batch_data.size()); +// MACE_CHECK(static_cast(batch_tensor->size()) == +// batch_data.size()); // memcpy(batch_ptr, batch_data.data(), batch_data.size() * sizeof(float)); // } // @@ -245,4 +216,3 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) { // {2, 2, 2, 2}, // space_tensor.get()); //} - diff --git a/mace/python/tools/model.template b/mace/python/tools/model.template index 6588d9a2..5989b86f 100644 --- a/mace/python/tools/model.template +++ b/mace/python/tools/model.template @@ -27,12 +27,12 @@ void Create{{tensor.name}}(std::vector &tensors) { #include "mace/core/public/mace.h" namespace { -static void UpdateOp(mace::OperatorDef &op, - const std::string &name, - const std::string &type, - const std::vector &inputs, - const std::vector &outputs, - const std::vector &output_types) { +void UpdateOp(mace::OperatorDef &op, + const std::string &name, + const std::string &type, + const std::vector &inputs, + const std::vector &outputs, + const std::vector &output_types) { op.set_name(name); op.set_type(type); op.set_input(inputs); diff --git a/tools/bazel-adb-run.sh b/tools/bazel-adb-run.sh index 116e64db..91347fb3 100755 --- a/tools/bazel-adb-run.sh +++ b/tools/bazel-adb-run.sh @@ -17,9 +17,8 @@ BAZEL_BIN_PATH=${BAZEL_BIN_PATH#//} BAZEL_BIN_PATH=bazel-bin/$BAZEL_BIN_PATH BIN_NAME=`echo $BAZEL_TARGET | cut -d: -f2` -ANDROID_ABI=armeabi-v7a ANDROID_ABI=arm64-v8a -STRIP="" +ANDROID_ABI=armeabi-v7a STRIP="--strip always" VLOG_LEVEL=0 PROFILINE="--define profiling=true" @@ -31,7 +30,7 @@ bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET \ --crosstool_top=//external:android/crosstool \ --host_crosstool_top=@bazel_tools//tools/cpp:toolchain \ --cpu=$ANDROID_ABI \ - --define neon=true + --define neon=false if [ $? -ne 0 ]; then exit 1 -- GitLab