From 8ae8f575d65c39932ecce6e82de62224d8ee68b3 Mon Sep 17 00:00:00 2001 From: Liangliang He Date: Fri, 15 Sep 2017 16:46:02 +0800 Subject: [PATCH] Fix google coding style --- mace/core/allocator.cc | 10 +- mace/core/allocator.h | 10 +- mace/core/common.h | 14 +- mace/core/logging.cc | 5 +- mace/core/logging.h | 39 +++-- mace/core/macros.h | 3 +- mace/core/net.cc | 27 ++-- mace/core/net.h | 29 ++-- mace/core/operator.cc | 29 ++-- mace/core/operator.h | 83 ++++------ mace/core/proto_utils.cc | 161 +++++++++---------- mace/core/proto_utils.h | 93 +++++------ mace/core/registry.h | 43 +++--- mace/core/serializer.cc | 29 ++-- mace/core/serializer.h | 8 +- mace/core/tensor.h | 55 +++---- mace/core/testing/test_benchmark.cc | 9 +- mace/core/testing/test_benchmark.h | 6 +- mace/core/testing/test_benchmark_main.cc | 1 - mace/core/types.h | 31 ++-- mace/core/workspace.cc | 17 +- mace/core/workspace.h | 8 +- mace/examples/benchmark_example.cc | 5 +- mace/kernels/addn.h | 16 +- mace/kernels/batch_norm.h | 39 ++--- mace/kernels/conv_2d.h | 179 ++++++++++------------ mace/kernels/conv_pool_2d_util.cc | 38 ++--- mace/kernels/conv_pool_2d_util.h | 26 ++-- mace/kernels/neon/addn_neon.cc | 13 +- mace/kernels/neon/batch_norm_neon.cc | 38 ++--- mace/kernels/neon/conv_2d_neon.cc | 82 ++++------ mace/kernels/neon/conv_2d_neon_1x1.cc | 61 ++++---- mace/kernels/neon/conv_2d_neon_3x3.cc | 139 +++++++++-------- mace/kernels/neon/conv_2d_neon_5x5.cc | 32 ++-- mace/kernels/neon/max_pooling_neon_2x2.cc | 19 +-- mace/kernels/neon/max_pooling_neon_3x3.cc | 23 ++- mace/kernels/neon/pooling_neon.cc | 54 +++---- mace/kernels/neon/relu_neon.cc | 13 +- mace/kernels/pooling.h | 64 ++++---- mace/kernels/relu.h | 8 +- mace/kernels/resize_bilinear.h | 59 ++++--- mace/ops/addn.cc | 4 +- mace/ops/addn.h | 8 +- mace/ops/addn_benchmark.cc | 27 ++-- mace/ops/addn_test.cc | 2 +- mace/ops/batch_norm.cc | 4 +- mace/ops/batch_norm.h | 89 ++++++----- mace/ops/batch_norm_benchmark.cc | 42 ++--- mace/ops/batch_norm_test.cc | 35 ++--- mace/ops/conv_2d.cc | 4 +- mace/ops/conv_2d.h | 29 ++-- mace/ops/conv_2d_benchmark.cc | 48 +++--- mace/ops/conv_2d_test.cc | 176 +++++++++------------ mace/ops/conv_pool_2d_base.h | 17 +- mace/ops/ops_test_util.h | 85 +++++----- mace/ops/pooling.cc | 5 +- mace/ops/pooling.h | 45 +++--- mace/ops/pooling_benchmark.cc | 36 +++-- mace/ops/pooling_test.cc | 117 ++++++-------- mace/ops/relu.cc | 4 +- mace/ops/relu.h | 8 +- mace/ops/relu_benchmark.cc | 24 ++- mace/ops/relu_test.cc | 2 +- mace/ops/resize_bilinear.cc | 7 +- mace/ops/resize_bilinear.h | 19 +-- mace/ops/resize_bilinear_test.cc | 2 +- 66 files changed, 1096 insertions(+), 1361 deletions(-) diff --git a/mace/core/allocator.cc b/mace/core/allocator.cc index 371bd593..61d5ee2a 100644 --- a/mace/core/allocator.cc +++ b/mace/core/allocator.cc @@ -7,13 +7,9 @@ namespace mace { static std::unique_ptr g_cpu_allocator(new CPUAllocator()); -CPUAllocator* cpu_allocator() { - return g_cpu_allocator.get(); -} +CPUAllocator* cpu_allocator() { return g_cpu_allocator.get(); } -void SetCPUAllocator(CPUAllocator* alloc) { - g_cpu_allocator.reset(alloc); -} +void SetCPUAllocator(CPUAllocator* alloc) { g_cpu_allocator.reset(alloc); } Allocator* GetDeviceAllocator(DeviceType type) { switch (type) { @@ -26,4 +22,4 @@ Allocator* GetDeviceAllocator(DeviceType type) { return nullptr; } -} // namespace mace +} // namespace mace diff --git a/mace/core/allocator.h b/mace/core/allocator.h index 0cde9c61..bfce30e5 100644 --- a/mace/core/allocator.h +++ b/mace/core/allocator.h @@ -39,7 +39,7 @@ class Allocator { } }; -class CPUAllocator: public Allocator { +class CPUAllocator : public Allocator { public: ~CPUAllocator() override {} void* New(size_t nbytes) override { @@ -55,9 +55,7 @@ class CPUAllocator: public Allocator { return data; } - void Delete(void* data) override { - free(data); - } + void Delete(void* data) override { free(data); } void CopyBytes(void* dst, const void* src, size_t size) override { memcpy(dst, src, size); @@ -85,6 +83,6 @@ struct DeviceContext { Allocator* GetDeviceAllocator(DeviceType type); -} // namespace mace +} // namespace mace -#endif // MACE_CORE_ALLOCATOR_H_ +#endif // MACE_CORE_ALLOCATOR_H_ diff --git a/mace/core/common.h b/mace/core/common.h index df22eacd..b52526f7 100644 --- a/mace/core/common.h +++ b/mace/core/common.h @@ -5,12 +5,12 @@ #ifndef MACE_CORE_COMMON_H_ #define MACE_CORE_COMMON_H_ -#include +#include #include -#include #include +#include +#include #include -#include #include "mace/core/logging.h" @@ -24,9 +24,9 @@ typedef int64_t index_t; // Disable the copy and assignment operator for a class. #ifndef DISABLE_COPY_AND_ASSIGN -#define DISABLE_COPY_AND_ASSIGN(classname) \ -private: \ - classname(const classname&) = delete; \ +#define DISABLE_COPY_AND_ASSIGN(classname) \ + private: \ + classname(const classname&) = delete; \ classname& operator=(const classname&) = delete #endif @@ -35,4 +35,4 @@ private: \ // TODO: need to fine tune this #define kCostPerGroup 1024000000 -#endif // MACE_CORE_COMMON_H_ +#endif // MACE_CORE_COMMON_H_ diff --git a/mace/core/logging.cc b/mace/core/logging.cc index f01d0980..ca479176 100644 --- a/mace/core/logging.cc +++ b/mace/core/logging.cc @@ -2,7 +2,6 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // - #include "mace/core/logging.h" #include @@ -62,11 +61,11 @@ void LogMessage::GenerateLogMessage() { #else void LogMessage::GenerateLogMessage() { - fprintf(stderr, "%c %s:%d] %s\n", "IWEF"[severity_], fname_, line_, str().c_str()); + fprintf(stderr, "%c %s:%d] %s\n", "IWEF"[severity_], fname_, line_, + str().c_str()); } #endif - namespace { // Parse log level (int64_t) from environment variable (char*) diff --git a/mace/core/logging.h b/mace/core/logging.h index be31a70a..61a39251 100644 --- a/mace/core/logging.h +++ b/mace/core/logging.h @@ -5,8 +5,8 @@ #ifndef MACE_CORE_LOGGING_H_ #define MACE_CORE_LOGGING_H_ -#include #include +#include #include #undef ERROR @@ -30,8 +30,8 @@ inline void MakeStringInternal(std::stringstream& ss, const T& t) { } template -inline void -MakeStringInternal(std::stringstream& ss, const T& t, const Args&... args) { +inline void MakeStringInternal(std::stringstream& ss, const T& t, + const Args&... args) { MakeStringInternal(ss, t); MakeStringInternal(ss, args...); } @@ -48,9 +48,7 @@ template <> inline string MakeString(const string& str) { return str; } -inline string MakeString(const char* c_str) { - return string(c_str); -} +inline string MakeString(const char* c_str) { return string(c_str); } class LogMessage : public std::basic_ostringstream { public: @@ -85,8 +83,7 @@ class LogMessageFatal : public LogMessage { ::mace::internal::LogMessage(__FILE__, __LINE__, mace::WARNING) #define _MACE_LOG_ERROR \ ::mace::internal::LogMessage(__FILE__, __LINE__, mace::ERROR) -#define _MACE_LOG_FATAL \ - ::mace::internal::LogMessageFatal(__FILE__, __LINE__) +#define _MACE_LOG_FATAL ::mace::internal::LogMessageFatal(__FILE__, __LINE__) #define _MACE_LOG_QFATAL _MACE_LOG_FATAL @@ -96,10 +93,10 @@ class LogMessageFatal : public LogMessage { // Turn VLOG off when under mobile devices for considerations of binary size. #define VLOG_IS_ON(lvl) ((lvl) <= 0) #else -// Otherwise, Set MACE_CPP_MIN_VLOG_LEVEL environment to update minimum log level +// Otherwise, Set MACE_CPP_MIN_VLOG_LEVEL environment to update minimum log +// level // of VLOG -#define VLOG_IS_ON(lvl) \ - ((lvl) <= ::mace::internal::LogMessage::MinVLogLevel()) +#define VLOG_IS_ON(lvl) ((lvl) <= ::mace::internal::LogMessage::MinVLogLevel()) #endif #define VLOG(lvl) \ @@ -113,16 +110,16 @@ class LogMessageFatal : public LogMessage { // MACE_CHECK(fp->Write(x) == 4) // MACE_CHECK(fp->Write(x) == 4, "Write failed") // which are not correct for MACE_ASSERT. -#define MACE_CHECK(condition, ...) \ - if (!(condition)) \ - LOG(FATAL) << "Check failed: " #condition " " \ - << ::mace::internal::MakeString(__VA_ARGS__) +#define MACE_CHECK(condition, ...) \ + if (!(condition)) \ + LOG(FATAL) << "Check failed: " #condition " " \ + << ::mace::internal::MakeString(__VA_ARGS__) #ifndef NDEBUG -#define MACE_ASSERT(condition, ...) \ - if (!(condition)) \ - LOG(FATAL) << "Assert failed: " #condition " " \ - << ::mace::internal::MakeString(__VA_ARGS__) +#define MACE_ASSERT(condition, ...) \ + if (!(condition)) \ + LOG(FATAL) << "Assert failed: " #condition " " \ + << ::mace::internal::MakeString(__VA_ARGS__) #else #define MACE_ASSERT(condition, ...) ((void)0) #endif @@ -135,9 +132,9 @@ T&& CheckNotNull(const char* file, int line, const char* exprtext, T&& t) { return std::forward(t); } -#define MACE_CHECK_NOTNULL(val) \ +#define MACE_CHECK_NOTNULL(val) \ ::mace::internal::CheckNotNull(__FILE__, __LINE__, \ - "'" #val "' Must be non NULL", (val)) + "'" #val "' Must be non NULL", (val)) } // namespace internal } // namespace mace diff --git a/mace/core/macros.h b/mace/core/macros.h index e23699ae..ced106e5 100644 --- a/mace/core/macros.h +++ b/mace/core/macros.h @@ -17,5 +17,4 @@ #define MACE_PREDICT_TRUE(x) (x) #endif - -#endif //MACE_CORE_MACROS_H_ +#endif // MACE_CORE_MACROS_H_ diff --git a/mace/core/net.cc b/mace/core/net.cc index a8f1f80e..33be1650 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -6,22 +6,19 @@ namespace mace { -NetBase::NetBase(const std::shared_ptr &net_def, - Workspace *ws, +NetBase::NetBase(const std::shared_ptr& net_def, Workspace* ws, DeviceType type) - : name_(net_def->name()) { -} - + : name_(net_def->name()) {} -SimpleNet::SimpleNet(const std::shared_ptr &net_def, - Workspace *ws, - DeviceType type) : NetBase(net_def, ws, type) { +SimpleNet::SimpleNet(const std::shared_ptr& net_def, + Workspace* ws, DeviceType type) + : NetBase(net_def, ws, type) { VLOG(1) << "Constructing SimpleNet " << net_def->name(); for (int idx = 0; idx < net_def->op_size(); ++idx) { const auto& operator_def = net_def->op(idx); VLOG(1) << "Creating operator " << operator_def.name() << ":" << operator_def.type(); - std::unique_ptr op {nullptr}; + std::unique_ptr op{nullptr}; OperatorDef temp_def(operator_def); op = CreateOperator(temp_def, ws, type); operators_.emplace_back(std::move(op)); @@ -40,20 +37,16 @@ bool SimpleNet::Run() { return true; } -unique_ptr CreateNet(const NetDef& net_def, - Workspace* ws, +unique_ptr CreateNet(const NetDef& net_def, Workspace* ws, DeviceType type) { std::shared_ptr tmp_net_def(new NetDef(net_def)); return CreateNet(tmp_net_def, ws, type); } -unique_ptr CreateNet( - const std::shared_ptr& net_def, - Workspace* ws, - DeviceType type) { +unique_ptr CreateNet(const std::shared_ptr& net_def, + Workspace* ws, DeviceType type) { unique_ptr net(new SimpleNet(net_def, ws, type)); return net; } - -} // namespace mace +} // namespace mace diff --git a/mace/core/net.h b/mace/core/net.h index 93ce98ce..621b7ae3 100644 --- a/mace/core/net.h +++ b/mace/core/net.h @@ -6,35 +6,31 @@ #define MACE_CORE_NET_H_ #include "mace/core/common.h" -#include "mace/proto/mace.pb.h" #include "mace/core/operator.h" #include "mace/core/workspace.h" +#include "mace/proto/mace.pb.h" namespace mace { class NetBase { public: - NetBase(const std::shared_ptr &net_def, - Workspace* ws, + NetBase(const std::shared_ptr& net_def, Workspace* ws, DeviceType type); virtual ~NetBase() noexcept {} virtual bool Run() = 0; - const string &Name() const { - return name_; - } + const string& Name() const { return name_; } protected: string name_; - DISABLE_COPY_AND_ASSIGN(NetBase); + DISABLE_COPY_AND_ASSIGN(NetBase); }; class SimpleNet : public NetBase { public: - SimpleNet(const std::shared_ptr& net_def, - Workspace* ws, + SimpleNet(const std::shared_ptr& net_def, Workspace* ws, DeviceType type); bool Run() override; @@ -42,17 +38,14 @@ class SimpleNet : public NetBase { protected: vector > operators_; - DISABLE_COPY_AND_ASSIGN(SimpleNet); + DISABLE_COPY_AND_ASSIGN(SimpleNet); }; -unique_ptr CreateNet(const NetDef& net_def, - Workspace* ws, +unique_ptr CreateNet(const NetDef& net_def, Workspace* ws, DeviceType type); -unique_ptr CreateNet( - const std::shared_ptr& net_def, - Workspace* ws, - DeviceType type); +unique_ptr CreateNet(const std::shared_ptr& net_def, + Workspace* ws, DeviceType type); -} // namespace mace +} // namespace mace -#endif // MACE_CORE_NET_H_ +#endif // MACE_CORE_NET_H_ diff --git a/mace/core/operator.cc b/mace/core/operator.cc index a755577b..2af4db46 100644 --- a/mace/core/operator.cc +++ b/mace/core/operator.cc @@ -11,33 +11,22 @@ std::map* gDeviceTypeRegistry() { return &g_device_type_registry; } -MACE_DEFINE_REGISTRY( - CPUOperatorRegistry, - OperatorBase, - const OperatorDef&, - Workspace*); +MACE_DEFINE_REGISTRY(CPUOperatorRegistry, OperatorBase, const OperatorDef&, + Workspace*); MACE_REGISTER_DEVICE_TYPE(DeviceType::CPU, CPUOperatorRegistry); -MACE_DEFINE_REGISTRY( - NEONOperatorRegistry, - OperatorBase, - const OperatorDef&, - Workspace*); +MACE_DEFINE_REGISTRY(NEONOperatorRegistry, OperatorBase, const OperatorDef&, + Workspace*); MACE_REGISTER_DEVICE_TYPE(DeviceType::NEON, NEONOperatorRegistry); -unique_ptr CreateOperator( - const OperatorDef& operator_def, - Workspace* ws, - DeviceType type) { +unique_ptr CreateOperator(const OperatorDef& operator_def, + Workspace* ws, DeviceType type) { OperatorRegistry* registry = gDeviceTypeRegistry()->at(type); return registry->Create(operator_def.type(), operator_def, ws); } - -OperatorBase::OperatorBase(const OperatorDef &operator_def, Workspace *ws) +OperatorBase::OperatorBase(const OperatorDef& operator_def, Workspace* ws) : operator_ws_(ws), - operator_def_(std::make_shared(operator_def)) { -} - + operator_def_(std::make_shared(operator_def)) {} -} // namespace mace +} // namespace mace diff --git a/mace/core/operator.h b/mace/core/operator.h index 4ec4e7b1..4c677073 100644 --- a/mace/core/operator.h +++ b/mace/core/operator.h @@ -5,12 +5,12 @@ #ifndef MACE_CORE_OPERATOR_H #define MACE_CORE_OPERATOR_H -#include "mace/core/proto_utils.h" #include "mace/core/common.h" -#include "mace/proto/mace.pb.h" -#include "mace/core/tensor.h" +#include "mace/core/proto_utils.h" #include "mace/core/registry.h" +#include "mace/core/tensor.h" #include "mace/core/workspace.h" +#include "mace/proto/mace.pb.h" namespace mace { @@ -23,22 +23,21 @@ class OperatorBase { MACE_CHECK(operator_def_, "operator_def was null!"); return ArgumentHelper::HasArgument(*operator_def_, name); } - template + template inline T GetSingleArgument(const string &name, const T &default_value) const { MACE_CHECK(operator_def_, "operator_def was null!"); return ArgumentHelper::GetSingleArgument( *operator_def_, name, default_value); } - template + template inline bool HasSingleArgumentOfType(const string &name) const { MACE_CHECK(operator_def_, "operator_def was null!"); return ArgumentHelper::HasSingleArgumentOfType( *operator_def_, name); } - template + template inline vector GetRepeatedArgument( - const string &name, - const vector &default_value = {}) const { + const string &name, const vector &default_value = {}) const { MACE_CHECK(operator_def_, "operator_def was null!"); return ArgumentHelper::GetRepeatedArgument( *operator_def_, name, default_value); @@ -49,9 +48,7 @@ class OperatorBase { return inputs_[idx]; } - inline Tensor *Output(int idx) { - return outputs_[idx]; - } + inline Tensor *Output(int idx) { return outputs_[idx]; } inline int InputSize() { return inputs_.size(); } inline int OutputSize() { return outputs_.size(); } @@ -70,9 +67,7 @@ class OperatorBase { operator_def_ = operator_def; } - inline bool has_debug_def() const { - return operator_def_ != nullptr; - } + inline bool has_debug_def() const { return operator_def_ != nullptr; } protected: Workspace *operator_ws_; @@ -80,7 +75,7 @@ class OperatorBase { vector inputs_; vector outputs_; - DISABLE_COPY_AND_ASSIGN(OperatorBase); + DISABLE_COPY_AND_ASSIGN(OperatorBase); }; template @@ -90,26 +85,22 @@ class Operator : public OperatorBase { : OperatorBase(operator_def, ws) { for (const string &input_str : operator_def.input()) { const Tensor *tensor = ws->GetTensor(input_str); - MACE_CHECK( - tensor != nullptr, - "op ", - operator_def.type(), - ": Encountered a non-existing input tensor: ", - input_str); + MACE_CHECK(tensor != nullptr, "op ", operator_def.type(), + ": Encountered a non-existing input tensor: ", input_str); inputs_.push_back(tensor); } for (const string &output_str : operator_def.output()) { - outputs_.push_back(MACE_CHECK_NOTNULL(ws->CreateTensor(output_str, - DeviceContext::allocator(), - DataTypeToEnum::v()))); + outputs_.push_back(MACE_CHECK_NOTNULL(ws->CreateTensor( + output_str, DeviceContext::allocator(), DataTypeToEnum::v()))); } } virtual bool Run() override = 0; ~Operator() noexcept override {} }; -// OP_INPUT_TAGS and OP_OUTPUT_TAGS are optional features to name the indices of the +// OP_INPUT_TAGS and OP_OUTPUT_TAGS are optional features to name the indices of +// the // operator's inputs and outputs, in order to avoid confusion. For example, for // a fully convolution layer that has input, weight and bias, you can define its // input tags as: @@ -119,9 +110,9 @@ class Operator : public OperatorBase { // you can now do // auto& weight = Input(WEIGHT); // to make it more clear. -#define OP_INPUT_TAGS(first_input, ...) \ +#define OP_INPUT_TAGS(first_input, ...) \ enum _InputTags { first_input = 0, __VA_ARGS__ } -#define OP_OUTPUT_TAGS(first_input, ...) \ +#define OP_OUTPUT_TAGS(first_input, ...) \ enum _OutputTags { first_input = 0, __VA_ARGS__ } typedef Registry @@ -135,7 +126,7 @@ struct DeviceTypeRegisterer { if (gDeviceTypeRegistry()->count(type)) { LOG(ERROR) << "Device type " << type << "registered twice. This should not happen. Did you have " - "duplicated numbers assigned to different devices?"; + "duplicated numbers assigned to different devices?"; std::exit(1); } // Calling the registry function to get the actual registry pointer. @@ -143,39 +134,31 @@ struct DeviceTypeRegisterer { } }; -#define MACE_REGISTER_DEVICE_TYPE(type, registry_function) \ - namespace { \ - static DeviceTypeRegisterer MACE_ANONYMOUS_VARIABLE( \ - DeviceType)(type, ®istry_function); \ +#define MACE_REGISTER_DEVICE_TYPE(type, registry_function) \ + namespace { \ + static DeviceTypeRegisterer MACE_ANONYMOUS_VARIABLE(DeviceType)( \ + type, ®istry_function); \ } -MACE_DECLARE_REGISTRY( - CPUOperatorRegistry, - OperatorBase, - const OperatorDef&, - Workspace*); +MACE_DECLARE_REGISTRY(CPUOperatorRegistry, OperatorBase, const OperatorDef &, + Workspace *); #define REGISTER_CPU_OPERATOR_CREATOR(key, ...) \ MACE_REGISTER_CREATOR(CPUOperatorRegistry, key, __VA_ARGS__) -#define REGISTER_CPU_OPERATOR(name, ...) \ +#define REGISTER_CPU_OPERATOR(name, ...) \ MACE_REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__) -MACE_DECLARE_REGISTRY( - NEONOperatorRegistry, - OperatorBase, - const OperatorDef&, - Workspace*); +MACE_DECLARE_REGISTRY(NEONOperatorRegistry, OperatorBase, const OperatorDef &, + Workspace *); #define REGISTER_NEON_OPERATOR_CREATOR(key, ...) \ MACE_REGISTER_CREATOR(NEONOperatorRegistry, key, __VA_ARGS__) -#define REGISTER_NEON_OPERATOR(name, ...) \ +#define REGISTER_NEON_OPERATOR(name, ...) \ MACE_REGISTER_CLASS(NEONOperatorRegistry, name, __VA_ARGS__) -unique_ptr CreateOperator( - const OperatorDef &operator_def, - Workspace *ws, - DeviceType type); +unique_ptr CreateOperator(const OperatorDef &operator_def, + Workspace *ws, DeviceType type); -} // namespace mace +} // namespace mace -#endif //MACE_CORE_OPERATOR_H +#endif // MACE_CORE_OPERATOR_H diff --git a/mace/core/proto_utils.cc b/mace/core/proto_utils.cc index 0658913a..9906dd26 100644 --- a/mace/core/proto_utils.cc +++ b/mace/core/proto_utils.cc @@ -5,9 +5,9 @@ #include "mace/core/proto_utils.h" #include +#include #include #include -#include #include "google/protobuf/io/coded_stream.h" #include "google/protobuf/io/zero_copy_stream_impl.h" @@ -82,13 +82,12 @@ bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) { return proto->ParseFromCodedStream(&coded_stream); } -void WriteProtoToBinaryFile( - const MessageLite& /*proto*/, - const char* /*filename*/) { +void WriteProtoToBinaryFile(const MessageLite& /*proto*/, + const char* /*filename*/) { LOG(FATAL) << "Not implemented yet."; } -#else // MACE_USE_LITE_PROTO +#else // MACE_USE_LITE_PROTO // Full protocol buffer. @@ -118,7 +117,7 @@ void WriteProtoToTextFile(const Message& proto, const char* filename) { } bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) { -#if defined (_MSC_VER) // for MSC compiler binary flag needs to be specified +#if defined(_MSC_VER) // for MSC compiler binary flag needs to be specified int fd = open(filename, O_RDONLY | O_BINARY); #else int fd = open(filename, O_RDONLY); @@ -138,8 +137,8 @@ bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) { void WriteProtoToBinaryFile(const MessageLite& proto, const char* filename) { int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644); - MACE_CHECK( - fd != -1, "File cannot be created: ", filename, " error number: ", errno); + MACE_CHECK(fd != -1, "File cannot be created: ", filename, " error number: ", + errno); std::unique_ptr raw_output(new FileOutputStream(fd)); std::unique_ptr coded_output( new CodedOutputStream(raw_output.get())); @@ -151,18 +150,17 @@ void WriteProtoToBinaryFile(const MessageLite& proto, const char* filename) { #endif // MACE_USE_LITE_PROTO -ArgumentHelper::ArgumentHelper(const OperatorDef &def) { - for (auto &arg : def.arg()) { +ArgumentHelper::ArgumentHelper(const OperatorDef& def) { + for (auto& arg : def.arg()) { if (arg_map_.find(arg.name()) != arg_map_.end()) { MACE_CHECK( arg.SerializeAsString() == arg_map_[arg.name()].SerializeAsString(), - "Found argument of the same name '", - arg.name(), - "' but with different contents: ", - ProtoDebugString(def)); + "Found argument of the same name '", arg.name(), + "' but with different contents: ", ProtoDebugString(def)); LOG(WARNING) << "Duplicated argument name found in operator def: " - << ProtoDebugString(def) << ", arg: " << ProtoDebugString(arg); + << ProtoDebugString(def) + << ", arg: " << ProtoDebugString(arg); } arg_map_[arg.name()] = arg; @@ -171,10 +169,9 @@ ArgumentHelper::ArgumentHelper(const OperatorDef &def) { ArgumentHelper::ArgumentHelper(const NetDef& netdef) { for (auto& arg : netdef.arg()) { - MACE_CHECK( - arg_map_.count(arg.name()) == 0, - "Duplicated argument name found in net def: ", - ProtoDebugString(netdef)); + MACE_CHECK(arg_map_.count(arg.name()) == 0, + "Duplicated argument name found in net def: ", + ProtoDebugString(netdef)); arg_map_[arg.name()] = arg; } } @@ -192,32 +189,24 @@ bool SupportsLosslessConversion(const InputType& value) { } } -#define INSTANTIATE_GET_SINGLE_ARGUMENT( \ - T, fieldname, enforce_lossless_conversion) \ +#define INSTANTIATE_GET_SINGLE_ARGUMENT(T, fieldname, \ + enforce_lossless_conversion) \ template <> \ - T ArgumentHelper::GetSingleArgument( \ - const string& name, const T& default_value) const { \ + T ArgumentHelper::GetSingleArgument(const string& name, \ + const T& default_value) const { \ if (arg_map_.count(name) == 0) { \ VLOG(1) << "Using default parameter value " << default_value \ << " for parameter " << name; \ return default_value; \ } \ - MACE_CHECK( \ - arg_map_.at(name).has_##fieldname(), \ - "Argument ", \ - name, \ - " does not have the right field: expected field " #fieldname); \ + MACE_CHECK(arg_map_.at(name).has_##fieldname(), "Argument ", name, \ + " does not have the right field: expected field " #fieldname); \ auto value = arg_map_.at(name).fieldname(); \ if (enforce_lossless_conversion) { \ auto supportsConversion = \ SupportsLosslessConversion(value); \ - MACE_CHECK( \ - supportsConversion, \ - "Value", \ - value, \ - " of argument ", \ - name, \ - "cannot be represented correctly in a target type"); \ + MACE_CHECK(supportsConversion, "Value", value, " of argument ", name, \ + "cannot be represented correctly in a target type"); \ } \ return value; \ } \ @@ -242,30 +231,25 @@ INSTANTIATE_GET_SINGLE_ARGUMENT(size_t, i, true) INSTANTIATE_GET_SINGLE_ARGUMENT(string, s, false) #undef INSTANTIATE_GET_SINGLE_ARGUMENT -#define INSTANTIATE_GET_REPEATED_ARGUMENT( \ - T, fieldname, enforce_lossless_conversion) \ - template <> \ - vector ArgumentHelper::GetRepeatedArgument( \ - const string& name, const std::vector& default_value) const { \ - if (arg_map_.count(name) == 0) { \ - return default_value; \ - } \ - vector values; \ - for (const auto& v : arg_map_.at(name).fieldname()) { \ - if (enforce_lossless_conversion) { \ - auto supportsConversion = \ - SupportsLosslessConversion(v); \ - MACE_CHECK( \ - supportsConversion, \ - "Value", \ - v, \ - " of argument ", \ - name, \ - "cannot be represented correctly in a target type"); \ - } \ - values.push_back(v); \ - } \ - return values; \ +#define INSTANTIATE_GET_REPEATED_ARGUMENT(T, fieldname, \ + enforce_lossless_conversion) \ + template <> \ + vector ArgumentHelper::GetRepeatedArgument( \ + const string& name, const std::vector& default_value) const { \ + if (arg_map_.count(name) == 0) { \ + return default_value; \ + } \ + vector values; \ + for (const auto& v : arg_map_.at(name).fieldname()) { \ + if (enforce_lossless_conversion) { \ + auto supportsConversion = \ + SupportsLosslessConversion(v); \ + MACE_CHECK(supportsConversion, "Value", v, " of argument ", name, \ + "cannot be represented correctly in a target type"); \ + } \ + values.push_back(v); \ + } \ + return values; \ } INSTANTIATE_GET_REPEATED_ARGUMENT(float, floats, false) @@ -281,14 +265,14 @@ INSTANTIATE_GET_REPEATED_ARGUMENT(size_t, ints, true) INSTANTIATE_GET_REPEATED_ARGUMENT(string, strings, false) #undef INSTANTIATE_GET_REPEATED_ARGUMENT -#define MACE_MAKE_SINGULAR_ARGUMENT(T, fieldname) \ -template <> \ -Argument MakeArgument(const string& name, const T& value) { \ - Argument arg; \ - arg.set_name(name); \ - arg.set_##fieldname(value); \ - return arg; \ -} +#define MACE_MAKE_SINGULAR_ARGUMENT(T, fieldname) \ + template <> \ + Argument MakeArgument(const string& name, const T& value) { \ + Argument arg; \ + arg.set_name(name); \ + arg.set_##fieldname(value); \ + return arg; \ + } MACE_MAKE_SINGULAR_ARGUMENT(bool, i) MACE_MAKE_SINGULAR_ARGUMENT(float, f) @@ -305,16 +289,16 @@ Argument MakeArgument(const string& name, const MessageLite& value) { return arg; } -#define MACE_MAKE_REPEATED_ARGUMENT(T, fieldname) \ -template <> \ -Argument MakeArgument(const string& name, const vector& value) { \ - Argument arg; \ - arg.set_name(name); \ - for (const auto& v : value) { \ - arg.add_##fieldname(v); \ - } \ - return arg; \ -} +#define MACE_MAKE_REPEATED_ARGUMENT(T, fieldname) \ + template <> \ + Argument MakeArgument(const string& name, const vector& value) { \ + Argument arg; \ + arg.set_name(name); \ + for (const auto& v : value) { \ + arg.add_##fieldname(v); \ + } \ + return arg; \ + } MACE_MAKE_REPEATED_ARGUMENT(float, floats) MACE_MAKE_REPEATED_ARGUMENT(int, ints) @@ -328,31 +312,24 @@ const Argument& GetArgument(const OperatorDef& def, const string& name) { return arg; } } - MACE_CHECK(false, - "Argument named ", - name, - "does not exist in operator ", - ProtoDebugString(def)); + MACE_CHECK(false, "Argument named ", name, "does not exist in operator ", + ProtoDebugString(def)); } -bool GetFlagArgument( - const OperatorDef& def, - const string& name, - bool def_value) { +bool GetFlagArgument(const OperatorDef& def, const string& name, + bool def_value) { for (const Argument& arg : def.arg()) { if (arg.name() == name) { - MACE_CHECK( - arg.has_i(), "Can't parse argument as bool: ", ProtoDebugString(arg)); + MACE_CHECK(arg.has_i(), "Can't parse argument as bool: ", + ProtoDebugString(arg)); return arg.i(); } } return def_value; } -Argument* GetMutableArgument( - const string& name, - const bool create_if_missing, - OperatorDef* def) { +Argument* GetMutableArgument(const string& name, const bool create_if_missing, + OperatorDef* def) { for (int i = 0; i < def->arg_size(); ++i) { if (def->arg(i).name() == name) { return def->mutable_arg(i); diff --git a/mace/core/proto_utils.h b/mace/core/proto_utils.h index e50294ad..5f8074ae 100644 --- a/mace/core/proto_utils.h +++ b/mace/core/proto_utils.h @@ -12,15 +12,14 @@ #include "google/protobuf/message.h" #endif // !MACE_USE_LITE_PROTO -#include "mace/proto/mace.pb.h" #include "mace/core/common.h" +#include "mace/proto/mace.pb.h" namespace mace { using std::string; using ::google::protobuf::MessageLite; - // Common interfaces that reads file contents into a string. bool ReadStringFromFile(const char* filename, string* str); bool WriteStringToFile(const string& str, const char* filename); @@ -46,22 +45,20 @@ inline string ProtoDebugString(const MessageLite& proto) { // Text format MessageLite wrappers: these functions do nothing but just // allowing things to compile. It will produce a runtime error if you are using // MessageLite but still want text support. -inline bool ReadProtoFromTextFile( - const char* /*filename*/, - MessageLite* /*proto*/) { +inline bool ReadProtoFromTextFile(const char* /*filename*/, + MessageLite* /*proto*/) { LOG(FATAL) << "If you are running lite version, you should not be " - << "calling any text-format protobuffers."; + << "calling any text-format protobuffers."; return false; // Just to suppress compiler warning. } inline bool ReadProtoFromTextFile(const string filename, MessageLite* proto) { return ReadProtoFromTextFile(filename.c_str(), proto); } -inline void WriteProtoToTextFile( - const MessageLite& /*proto*/, - const char* /*filename*/) { +inline void WriteProtoToTextFile(const MessageLite& /*proto*/, + const char* /*filename*/) { LOG(FATAL) << "If you are running lite version, you should not be " - << "calling any text-format protobuffers."; + << "calling any text-format protobuffers."; } inline void WriteProtoToTextFile(const MessageLite& proto, const string& filename) { @@ -107,16 +104,13 @@ inline bool ReadProtoFromFile(const string& filename, Message* proto) { #endif // MACE_USE_LITE_PROTO -template < - class IterableInputs = std::initializer_list, - class IterableOutputs = std::initializer_list, - class IterableArgs = std::initializer_list> -OperatorDef CreateOperatorDef( - const string& type, - const string& name, - const IterableInputs& inputs, - const IterableOutputs& outputs, - const IterableArgs& args) { +template , + class IterableOutputs = std::initializer_list, + class IterableArgs = std::initializer_list> +OperatorDef CreateOperatorDef(const string& type, const string& name, + const IterableInputs& inputs, + const IterableOutputs& outputs, + const IterableArgs& args) { OperatorDef def; def.set_type(type); def.set_name(name); @@ -134,20 +128,13 @@ OperatorDef CreateOperatorDef( // A simplified version compared to the full CreateOperator, if you do not need // to specify args. -template < - class IterableInputs = std::initializer_list, - class IterableOutputs = std::initializer_list> -inline OperatorDef CreateOperatorDef( - const string& type, - const string& name, - const IterableInputs& inputs, - const IterableOutputs& outputs) { - return CreateOperatorDef( - type, - name, - inputs, - outputs, - std::vector()); +template , + class IterableOutputs = std::initializer_list> +inline OperatorDef CreateOperatorDef(const string& type, const string& name, + const IterableInputs& inputs, + const IterableOutputs& outputs) { + return CreateOperatorDef(type, name, inputs, outputs, + std::vector()); } /** @@ -166,10 +153,8 @@ class ArgumentHelper { } template - static T GetSingleArgument( - const Def& def, - const string& name, - const T& default_value) { + static T GetSingleArgument(const Def& def, const string& name, + const T& default_value) { return ArgumentHelper(def).GetSingleArgument(name, default_value); } @@ -180,8 +165,7 @@ class ArgumentHelper { template static vector GetRepeatedArgument( - const Def& def, - const string& name, + const Def& def, const string& name, const std::vector& default_value = std::vector()) { return ArgumentHelper(def).GetRepeatedArgument(name, default_value); } @@ -192,9 +176,8 @@ class ArgumentHelper { } template - static vector GetRepeatedMessageArgument( - const Def& def, - const string& name) { + static vector GetRepeatedMessageArgument(const Def& def, + const string& name) { return ArgumentHelper(def).GetRepeatedMessageArgument(name); } @@ -216,9 +199,8 @@ class ArgumentHelper { MACE_CHECK(arg_map_.count(name), "Cannot find parameter named " + name); MessageType message; if (arg_map_.at(name).has_s()) { - MACE_CHECK( - message.ParseFromString(arg_map_.at(name).s()), - "Faild to parse content from the string"); + MACE_CHECK(message.ParseFromString(arg_map_.at(name).s()), + "Faild to parse content from the string"); } else { VLOG(1) << "Return empty message for parameter " << name; } @@ -230,9 +212,8 @@ class ArgumentHelper { MACE_CHECK(arg_map_.count(name), "Cannot find parameter named " + name); vector messages(arg_map_.at(name).strings_size()); for (int i = 0; i < messages.size(); ++i) { - MACE_CHECK( - messages[i].ParseFromString(arg_map_.at(name).strings(i)), - "Faild to parse content from the string"); + MACE_CHECK(messages[i].ParseFromString(arg_map_.at(name).strings(i)), + "Faild to parse content from the string"); } return messages; } @@ -242,15 +223,11 @@ class ArgumentHelper { }; const Argument& GetArgument(const OperatorDef& def, const string& name); -bool GetFlagArgument( - const OperatorDef& def, - const string& name, - bool def_value = false); - -Argument* GetMutableArgument( - const string& name, - const bool create_if_missing, - OperatorDef* def); +bool GetFlagArgument(const OperatorDef& def, const string& name, + bool def_value = false); + +Argument* GetMutableArgument(const string& name, const bool create_if_missing, + OperatorDef* def); template Argument MakeArgument(const string& name, const T& value); diff --git a/mace/core/registry.h b/mace/core/registry.h index 1f5a86fb..a4747e1b 100644 --- a/mace/core/registry.h +++ b/mace/core/registry.h @@ -12,7 +12,7 @@ namespace mace { template class Registry { public: - typedef std::function (Args ...)> Creator; + typedef std::function(Args...)> Creator; Registry() : registry_() {} @@ -24,7 +24,7 @@ class Registry { inline bool Has(const SrcType& key) { return registry_.count(key) != 0; } - unique_ptr Create(const SrcType& key, Args ... args) { + unique_ptr Create(const SrcType& key, Args... args) { if (registry_.count(key) == 0) { VLOG(2) << "Key not registered: " << key; return nullptr; @@ -60,7 +60,7 @@ class Registerer { } template - static unique_ptr DefaultCreator(Args ... args) { + static unique_ptr DefaultCreator(Args... args) { return std::unique_ptr(new DerivedType(args...)); } }; @@ -74,36 +74,35 @@ class Registerer { #endif #define MACE_DECLARE_TYPED_REGISTRY(RegistryName, SrcType, ObjectType, ...) \ - Registry* RegistryName(); \ - typedef Registerer \ + Registry* RegistryName(); \ + typedef Registerer \ Registerer##RegistryName; #define MACE_DEFINE_TYPED_REGISTRY(RegistryName, SrcType, ObjectType, ...) \ - Registry* RegistryName() { \ - static Registry* registry = \ - new Registry(); \ - return registry; \ + Registry* RegistryName() { \ + static Registry* registry = \ + new Registry(); \ + return registry; \ } -#define MACE_DECLARE_REGISTRY(RegistryName, ObjectType, ...) \ - MACE_DECLARE_TYPED_REGISTRY( \ - RegistryName, std::string, ObjectType, ##__VA_ARGS__) +#define MACE_DECLARE_REGISTRY(RegistryName, ObjectType, ...) \ + MACE_DECLARE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \ + ##__VA_ARGS__) -#define MACE_DEFINE_REGISTRY(RegistryName, ObjectType, ...) \ - MACE_DEFINE_TYPED_REGISTRY( \ - RegistryName, std::string, ObjectType, ##__VA_ARGS__) +#define MACE_DEFINE_REGISTRY(RegistryName, ObjectType, ...) \ + MACE_DEFINE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \ + ##__VA_ARGS__) #define MACE_REGISTER_TYPED_CREATOR(RegistryName, key, ...) \ - namespace { \ + namespace { \ static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \ key, RegistryName(), __VA_ARGS__); #define MACE_REGISTER_TYPED_CLASS(RegistryName, key, ...) \ - namespace { \ + namespace { \ static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \ - key, \ - RegistryName(), \ - Registerer##RegistryName::DefaultCreator<__VA_ARGS__>); \ + key, RegistryName(), \ + Registerer##RegistryName::DefaultCreator<__VA_ARGS__>); \ } #define MACE_REGISTER_CREATOR(RegistryName, key, ...) \ @@ -112,6 +111,6 @@ class Registerer { #define MACE_REGISTER_CLASS(RegistryName, key, ...) \ MACE_REGISTER_TYPED_CLASS(RegistryName, #key, __VA_ARGS__) -} // namespace mace +} // namespace mace -#endif // MACE_CORE_REGISTRY_H_ +#endif // MACE_CORE_REGISTRY_H_ diff --git a/mace/core/serializer.cc b/mace/core/serializer.cc index 3e80e545..cfe2d935 100644 --- a/mace/core/serializer.cc +++ b/mace/core/serializer.cc @@ -4,19 +4,18 @@ #include "mace/core/serializer.h" - namespace mace { unique_ptr Serializer::Serialize(const Tensor &tensor, - const string &name) { + const string &name) { MACE_NOT_IMPLEMENTED; return nullptr; } unique_ptr Serializer::Deserialize(const TensorProto &proto, DeviceType type) { - unique_ptr tensor(new Tensor(GetDeviceAllocator(type), - proto.data_type())); + unique_ptr tensor( + new Tensor(GetDeviceAllocator(type), proto.data_type())); vector dims; for (const index_t d : proto.dims()) { dims.push_back(d); @@ -25,8 +24,7 @@ unique_ptr Serializer::Deserialize(const TensorProto &proto, switch (proto.data_type()) { case DT_FLOAT: - tensor->Copy(proto.float_data().data(), - proto.float_data().size()); + tensor->Copy(proto.float_data().data(), proto.float_data().size()); break; case DT_DOUBLE: tensor->Copy(proto.double_data().data(), @@ -34,39 +32,38 @@ unique_ptr Serializer::Deserialize(const TensorProto &proto, break; case DT_INT32: tensor->template Copy(proto.int32_data().data(), - proto.int32_data().size()); + proto.int32_data().size()); break; case DT_UINT8: tensor->CopyWithCast(proto.int32_data().data(), - proto.int32_data().size()); + proto.int32_data().size()); break; case DT_INT16: tensor->CopyWithCast(proto.int32_data().data(), - proto.int32_data().size()); + proto.int32_data().size()); break; case DT_INT8: tensor->CopyWithCast(proto.int32_data().data(), - proto.int32_data().size()); + proto.int32_data().size()); break; case DT_INT64: tensor->Copy(proto.int64_data().data(), - proto.int64_data().size()); + proto.int64_data().size()); break; case DT_UINT16: tensor->CopyWithCast(proto.int32_data().data(), - proto.int32_data().size()); + proto.int32_data().size()); break; case DT_BOOL: tensor->CopyWithCast(proto.int32_data().data(), - proto.int32_data().size()); + proto.int32_data().size()); break; case DT_STRING: { string *content = tensor->mutable_data(); for (int i = 0; i < proto.string_data().size(); ++i) { content[i] = proto.string_data(i); } - } - break; + } break; default: MACE_NOT_IMPLEMENTED; break; @@ -75,4 +72,4 @@ unique_ptr Serializer::Deserialize(const TensorProto &proto, return tensor; } -} // namespace mace \ No newline at end of file +} // namespace mace \ No newline at end of file diff --git a/mace/core/serializer.h b/mace/core/serializer.h index 01f20748..f9966a5a 100644 --- a/mace/core/serializer.h +++ b/mace/core/serializer.h @@ -5,9 +5,9 @@ #ifndef MACE_CORE_SERIALIZER_H_ #define MACE_CORE_SERIALIZER_H_ -#include "mace/proto/mace.pb.h" #include "mace/core/common.h" #include "mace/core/tensor.h" +#include "mace/proto/mace.pb.h" namespace mace { @@ -20,9 +20,9 @@ class Serializer { unique_ptr Deserialize(const TensorProto& proto, DeviceType type); - DISABLE_COPY_AND_ASSIGN(Serializer); + DISABLE_COPY_AND_ASSIGN(Serializer); }; -} // namespace mace +} // namespace mace -#endif // MACE_CORE_SERIALIZER_H_ +#endif // MACE_CORE_SERIALIZER_H_ diff --git a/mace/core/tensor.h b/mace/core/tensor.h index 3dc3f1ed..224c342e 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -5,11 +5,11 @@ #ifndef MACE_CORE_TENSOR_H_ #define MACE_CORE_TENSOR_H_ -#include "mace/core/common.h" -#include "mace/proto/mace.pb.h" #include "mace/core/allocator.h" -#include "mace/core/types.h" +#include "mace/core/common.h" #include "mace/core/logging.h" +#include "mace/core/types.h" +#include "mace/proto/mace.pb.h" namespace mace { @@ -25,13 +25,13 @@ namespace mace { switch (TYPE_ENUM) { \ CASE(float, SINGLE_ARG(STMTS)) \ CASE(double, SINGLE_ARG(STMTS)) \ - CASE(int32_t, SINGLE_ARG(STMTS)) \ - CASE(uint8_t, SINGLE_ARG(STMTS)) \ - CASE(uint16_t, SINGLE_ARG(STMTS)) \ - CASE(int16_t, SINGLE_ARG(STMTS)) \ - CASE(int8_t, SINGLE_ARG(STMTS)) \ + CASE(int32_t, SINGLE_ARG(STMTS)) \ + CASE(uint8_t, SINGLE_ARG(STMTS)) \ + CASE(uint16_t, SINGLE_ARG(STMTS)) \ + CASE(int16_t, SINGLE_ARG(STMTS)) \ + CASE(int8_t, SINGLE_ARG(STMTS)) \ CASE(string, SINGLE_ARG(STMTS)) \ - CASE(int64_t, SINGLE_ARG(STMTS)) \ + CASE(int64_t, SINGLE_ARG(STMTS)) \ CASE(bool, SINGLE_ARG(STMTS)) \ case DT_INVALID: \ INVALID; \ @@ -41,20 +41,17 @@ namespace mace { break; \ } - #define CASES(TYPE_ENUM, STMTS) \ CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, LOG(FATAL) << "Type not set"; \ , LOG(FATAL) << "Unexpected type: " << TYPE_ENUM;) - class Tensor { public: Tensor() - : alloc_(cpu_allocator()), - size_(0), dtype_(DT_FLOAT), data_(nullptr) {}; + : alloc_(cpu_allocator()), size_(0), dtype_(DT_FLOAT), data_(nullptr){}; Tensor(Allocator* a, DataType type) - : alloc_(a), size_(0), dtype_(type), data_(nullptr) {}; + : alloc_(a), size_(0), dtype_(type), data_(nullptr){}; ~Tensor() { if (alloc_ && data_.get()) { @@ -92,9 +89,8 @@ class Tensor { if (data_.get() || size_ == 0) { return data_.get(); } else { - CASES(dtype_, data_.reset(alloc_->New(size_ * sizeof(T)), [this](void* ptr) { - alloc_->Delete(ptr); - })); + CASES(dtype_, data_.reset(alloc_->New(size_ * sizeof(T)), + [this](void* ptr) { alloc_->Delete(ptr); })); return data_.get(); } } @@ -116,13 +112,9 @@ class Tensor { } } - inline void ResizeLike(const Tensor& other) { - Resize(other.shape()); - } + inline void ResizeLike(const Tensor& other) { Resize(other.shape()); } - inline void ResizeLike(const Tensor* other) { - Resize(other->shape()); - } + inline void ResizeLike(const Tensor* other) { Resize(other->shape()); } template inline void Copy(const T* src, index_t size) { @@ -132,7 +124,8 @@ class Tensor { template inline void CopyWithCast(const SrcType* src, size_t size) { - MACE_CHECK(static_cast(size) == size_, "copy src and dst with different size."); + MACE_CHECK(static_cast(size) == size_, + "copy src and dst with different size."); unique_ptr buffer(new DstType[size]); for (size_t i = 0; i < size; ++i) { buffer[i] = static_cast(src[i]); @@ -146,10 +139,11 @@ class Tensor { inline void DebugPrint() { std::stringstream os; - for (int i: shape_) { + for (int i : shape_) { os << i << ", "; } - LOG(INFO) << "Tensor shape: " << os.str() << " type: " << DataType_Name(dtype_); + LOG(INFO) << "Tensor shape: " << os.str() + << " type: " << DataType_Name(dtype_); os.str(""); os.clear(); @@ -175,7 +169,8 @@ class Tensor { private: inline int64_t NumElements() const { - return std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies()); + return std::accumulate(shape_.begin(), shape_.end(), 1, + std::multiplies()); } Allocator* alloc_; @@ -184,9 +179,9 @@ class Tensor { std::shared_ptr data_; vector shape_; - DISABLE_COPY_AND_ASSIGN(Tensor); + DISABLE_COPY_AND_ASSIGN(Tensor); }; -} // namespace tensor +} // namespace tensor -#endif //MACE_CORE_TENSOR_H_ +#endif // MACE_CORE_TENSOR_H_ diff --git a/mace/core/testing/test_benchmark.cc b/mace/core/testing/test_benchmark.cc index 7e09c28f..66078911 100644 --- a/mace/core/testing/test_benchmark.cc +++ b/mace/core/testing/test_benchmark.cc @@ -51,11 +51,8 @@ Benchmark* Benchmark::ArgPair(int x, int y) { return this; } - // Run all benchmarks -void Benchmark::Run() { - Run("all"); -} +void Benchmark::Run() { Run("all"); } void Benchmark::Run(const char* pattern) { if (!all_benchmarks) return; @@ -113,8 +110,8 @@ void Benchmark::Run(const char* pattern) { (items_processed * 1e-6) / seconds); full_label += buf; } - printf("%-*s %10.0f %10d\t%s\n", width, name, - seconds * 1e9 / iters, iters, full_label.c_str()); + printf("%-*s %10.0f %10d\t%s\n", width, name, seconds * 1e9 / iters, + iters, full_label.c_str()); } } } diff --git a/mace/core/testing/test_benchmark.h b/mace/core/testing/test_benchmark.h index 6f96411b..25d12459 100644 --- a/mace/core/testing/test_benchmark.h +++ b/mace/core/testing/test_benchmark.h @@ -12,9 +12,9 @@ #include "mace/core/types.h" #define MACE_BENCHMARK_CONCAT(a, b, c) a##b##c -#define BENCHMARK(n) \ - static ::mace::testing::Benchmark* MACE_BENCHMARK_CONCAT(__benchmark_, n, __LINE__) = \ - (new ::mace::testing::Benchmark(#n, (n))) +#define BENCHMARK(n) \ + static ::mace::testing::Benchmark* MACE_BENCHMARK_CONCAT( \ + __benchmark_, n, __LINE__) = (new ::mace::testing::Benchmark(#n, (n))) namespace mace { namespace testing { diff --git a/mace/core/testing/test_benchmark_main.cc b/mace/core/testing/test_benchmark_main.cc index dfa87672..cc0c0172 100644 --- a/mace/core/testing/test_benchmark_main.cc +++ b/mace/core/testing/test_benchmark_main.cc @@ -17,4 +17,3 @@ int main(int argc, char** argv) { } return 0; } - diff --git a/mace/core/types.h b/mace/core/types.h index b174993d..21c502cf 100644 --- a/mace/core/types.h +++ b/mace/core/types.h @@ -18,26 +18,25 @@ struct DataTypeToEnum { static_assert(IsValidDataType::value, "Specified Data Type not supported"); }; - // EnumToDataType::Type is the type for DataType constant VALUE, e.g. // EnumToDataType::Type is float. template struct EnumToDataType {}; // Specializations below // Template specialization for both DataTypeToEnum and EnumToDataType. -#define MATCH_TYPE_AND_ENUM(TYPE, ENUM) \ - template <> \ - struct DataTypeToEnum { \ - static DataType v() { return ENUM; } \ - static constexpr DataType value = ENUM; \ - }; \ - template <> \ - struct IsValidDataType { \ - static constexpr bool value = true; \ - }; \ - template <> \ - struct EnumToDataType { \ - typedef TYPE Type; \ +#define MATCH_TYPE_AND_ENUM(TYPE, ENUM) \ + template <> \ + struct DataTypeToEnum { \ + static DataType v() { return ENUM; } \ + static constexpr DataType value = ENUM; \ + }; \ + template <> \ + struct IsValidDataType { \ + static constexpr bool value = true; \ + }; \ + template <> \ + struct EnumToDataType { \ + typedef TYPE Type; \ } MATCH_TYPE_AND_ENUM(float, DT_FLOAT); @@ -53,6 +52,6 @@ MATCH_TYPE_AND_ENUM(bool, DT_BOOL); static const int32_t kint32_tmax = ((int32_t)0x7FFFFFFF); -} // namespace mace +} // namespace mace -#endif // MACE_CORE_TYPES_H_ +#endif // MACE_CORE_TYPES_H_ diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index ae28d2df..953a5ba3 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -2,8 +2,8 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#include "mace/core/common.h" #include "mace/core/workspace.h" +#include "mace/core/common.h" #include "mace/core/serializer.h" namespace mace { @@ -16,8 +16,7 @@ vector Workspace::Tensors() const { return names; } -Tensor* Workspace::CreateTensor(const string& name, - Allocator* alloc, +Tensor* Workspace::CreateTensor(const string& name, Allocator* alloc, DataType type) { if (HasTensor(name)) { VLOG(1) << "Tensor " << name << " already exists. Skipping."; @@ -46,14 +45,16 @@ const Tensor* Workspace::GetTensor(const string& name) const { } Tensor* Workspace::GetTensor(const string& name) { - return const_cast(static_cast(this)->GetTensor(name)); + return const_cast( + static_cast(this)->GetTensor(name)); } -void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) { +void Workspace::LoadModelTensor(const NetDef& net_def, DeviceType type) { Serializer serializer; - for (auto& tensor_proto: net_def.tensors()) { - tensor_map_[tensor_proto.name()] = serializer.Deserialize(tensor_proto, type); + for (auto& tensor_proto : net_def.tensors()) { + tensor_map_[tensor_proto.name()] = + serializer.Deserialize(tensor_proto, type); } } -} // namespace mace \ No newline at end of file +} // namespace mace \ No newline at end of file diff --git a/mace/core/workspace.h b/mace/core/workspace.h index 7de345bc..5d87abf7 100644 --- a/mace/core/workspace.h +++ b/mace/core/workspace.h @@ -5,7 +5,6 @@ #ifndef MACE_CORE_WORKSPACE_H_ #define MACE_CORE_WORKSPACE_H_ - #include "mace/core/common.h" #include "mace/core/tensor.h" #include "mace/proto/mace.pb.h" @@ -37,10 +36,9 @@ class Workspace { private: TensorMap tensor_map_; - DISABLE_COPY_AND_ASSIGN(Workspace); + DISABLE_COPY_AND_ASSIGN(Workspace); }; -} // namespace mace - +} // namespace mace -#endif // MACE_CORE_WORKSPACE_H_ +#endif // MACE_CORE_WORKSPACE_H_ diff --git a/mace/examples/benchmark_example.cc b/mace/examples/benchmark_example.cc index 50e5184b..4fa34bea 100644 --- a/mace/examples/benchmark_example.cc +++ b/mace/examples/benchmark_example.cc @@ -14,7 +14,7 @@ static void foo(int iters) { float* out = new float[N]; while (iters--) { - for (int i=0; i < N; i++) { + for (int i = 0; i < N; i++) { out[i] = inp[i] * 2.0; } } @@ -24,7 +24,6 @@ static void foo(int iters) { BENCHMARK(foo); - static void bar(int iters, int n) { const int64_t tot = static_cast(iters) * n; mace::testing::ItemsProcessed(tot); @@ -34,7 +33,7 @@ static void bar(int iters, int n) { float* out = new float[n]; while (iters--) { - for (int i=0; i < n; i++) { + for (int i = 0; i < n; i++) { out[i] = inp[i] * 2.0; } } diff --git a/mace/kernels/addn.h b/mace/kernels/addn.h index f1803ce3..3e5845b3 100644 --- a/mace/kernels/addn.h +++ b/mace/kernels/addn.h @@ -10,10 +10,9 @@ namespace mace { namespace kernels { -template +template struct AddNFunctor { - void operator()(const vector& inputs, - T *output, index_t size) { + void operator()(const vector& inputs, T* output, index_t size) { memset(output, 0, size * sizeof(T)); int n = inputs.size(); for (int i = 0; i < n; ++i) { @@ -25,11 +24,10 @@ struct AddNFunctor { }; template <> -void AddNFunctor::operator()(const vector& inputs, - float *output, - index_t size); +void AddNFunctor::operator()( + const vector& inputs, float* output, index_t size); -} // namespace kernels -} // namespace mace +} // namespace kernels +} // namespace mace -#endif // MACE_KERNELS_ADDN_H_ \ No newline at end of file +#endif // MACE_KERNELS_ADDN_H_ \ No newline at end of file diff --git a/mace/kernels/batch_norm.h b/mace/kernels/batch_norm.h index 0c1c2ef0..ad76be94 100644 --- a/mace/kernels/batch_norm.h +++ b/mace/kernels/batch_norm.h @@ -11,26 +11,21 @@ namespace mace { namespace kernels { -template +template struct BatchNormFunctor { float variance_epsilon_; BatchNormFunctor(const float variance_epsilon) - : variance_epsilon_(variance_epsilon){} + : variance_epsilon_(variance_epsilon) {} - void operator()(const T* input, - const T* scale, - const T* offset, - const T* mean, - const T* var, - const index_t n, - const index_t channel, - const index_t sample_size, - T* output) { + void operator()(const T* input, const T* scale, const T* offset, + const T* mean, const T* var, const index_t n, + const index_t channel, const index_t sample_size, T* output) { // Batch normalization in the paper https://arxiv.org/abs/1502.03167 . // The calculation formula for inference is // Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X + - // ( \offset - \frac { \scale * mean } { \sqrt{var+\variance_epsilon} } + // ( \offset - \frac { \scale * mean } { + // \sqrt{var+\variance_epsilon} } // new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} } // new_offset = \offset - mean * common_val; // Y = new_scale * X + new_offset; @@ -53,18 +48,12 @@ struct BatchNormFunctor { }; template <> -void BatchNormFunctor::operator()(const float* input, - const float* scale, - const float* offset, - const float* mean, - const float* var, - const index_t n, - const index_t channel, - const index_t sample_size, - float* output); +void BatchNormFunctor::operator()( + const float* input, const float* scale, const float* offset, + const float* mean, const float* var, const index_t n, const index_t channel, + const index_t sample_size, float* output); +} // namepsace kernels +} // namespace mace -} // namepsace kernels -} // namespace mace - -#endif // MACE_KERNELS_BATCH_NORM_H_ +#endif // MACE_KERNELS_BATCH_NORM_H_ diff --git a/mace/kernels/conv_2d.h b/mace/kernels/conv_2d.h index 34044b4e..28e9011e 100644 --- a/mace/kernels/conv_2d.h +++ b/mace/kernels/conv_2d.h @@ -10,114 +10,103 @@ namespace mace { namespace kernels { -template +template class Conv2dFunctor { - public: - Conv2dFunctor(const int* strides, - const int* paddings, - const int* dilations) : - strides_(strides), - paddings_(paddings), - dilations_(dilations) {} - - void operator()(const T* input, // NCHW - const index_t* input_shape, - const T* filter, // c_out, c_in, kernel_h, kernel_w - const index_t* filter_shape, - const T* bias, // c_out - T* output, // NCHW - const index_t* output_shape) { - MACE_CHECK_NOTNULL(output); - - index_t batch = output_shape[0]; - index_t channels = output_shape[1]; - index_t height = output_shape[2]; - index_t width = output_shape[3]; - - index_t input_batch = input_shape[0]; - index_t input_channels = input_shape[1]; - index_t input_height = input_shape[2]; - index_t input_width = input_shape[3]; - - index_t kernel_h = filter_shape[2]; - index_t kernel_w = filter_shape[3]; - - int stride_h = strides_[0]; - int stride_w = strides_[1]; - - int dilation_h = dilations_[0]; - int dilation_w = dilations_[1]; - - MACE_CHECK(batch == input_batch, "Input/Output batch size mismatch"); - - // The left-upper most offset of the padded input - int padded_h_start = 0 - paddings_[0] / 2; - int padded_w_start = 0 - paddings_[1] / 2; - index_t padded_h_stop = input_height + paddings_[0] - paddings_[0] / 2; - index_t padded_w_stop = input_width + paddings_[1] - paddings_[1] / 2; - - index_t kernel_size = input_channels * kernel_h * kernel_w; + public: + Conv2dFunctor(const int* strides, const int* paddings, const int* dilations) + : strides_(strides), paddings_(paddings), dilations_(dilations) {} + + void operator()(const T* input, // NCHW + const index_t* input_shape, + const T* filter, // c_out, c_in, kernel_h, kernel_w + const index_t* filter_shape, + const T* bias, // c_out + T* output, // NCHW + const index_t* output_shape) { + MACE_CHECK_NOTNULL(output); + + index_t batch = output_shape[0]; + index_t channels = output_shape[1]; + index_t height = output_shape[2]; + index_t width = output_shape[3]; + + index_t input_batch = input_shape[0]; + index_t input_channels = input_shape[1]; + index_t input_height = input_shape[2]; + index_t input_width = input_shape[3]; + + index_t kernel_h = filter_shape[2]; + index_t kernel_w = filter_shape[3]; + + int stride_h = strides_[0]; + int stride_w = strides_[1]; + + int dilation_h = dilations_[0]; + int dilation_w = dilations_[1]; + + MACE_CHECK(batch == input_batch, "Input/Output batch size mismatch"); + + // The left-upper most offset of the padded input + int padded_h_start = 0 - paddings_[0] / 2; + int padded_w_start = 0 - paddings_[1] / 2; + index_t padded_h_stop = input_height + paddings_[0] - paddings_[0] / 2; + index_t padded_w_stop = input_width + paddings_[1] - paddings_[1] / 2; + + index_t kernel_size = input_channels * kernel_h * kernel_w; #pragma omp parallel for collapse(2) - for (int n = 0; n < batch; ++n) { - for (int c = 0; c < channels; ++c) { - for (int h = 0; h < height; ++h) { - for (int w = 0; w < width; ++w) { - index_t offset = n * channels * height * width + - c * height * width + - h * width + w; - T sum = 0; - const T* filter_ptr = filter + c * kernel_size; - for (int inc = 0; inc < input_channels; ++inc) { - for (int kh = 0; kh < kernel_h; ++kh) { - for (int kw = 0; kw < kernel_w; ++kw) { - - int inh = padded_h_start + h * stride_h + dilation_h * kh; - int inw = padded_w_start + w * stride_w + dilation_w * kw; - if (inh < 0 || inh >= input_height || - inw < 0 || inw >= input_width) { - MACE_CHECK(inh >= padded_h_start && - inh < padded_h_stop && - inw >= padded_w_start && - inw < padded_w_stop, - "Out of range read from input: ", - inh, ", ", inw); - // else padding with 0: - // sum += 0; - } else { - index_t input_offset = + for (int n = 0; n < batch; ++n) { + for (int c = 0; c < channels; ++c) { + for (int h = 0; h < height; ++h) { + for (int w = 0; w < width; ++w) { + index_t offset = n * channels * height * width + + c * height * width + h * width + w; + T sum = 0; + const T* filter_ptr = filter + c * kernel_size; + for (int inc = 0; inc < input_channels; ++inc) { + for (int kh = 0; kh < kernel_h; ++kh) { + for (int kw = 0; kw < kernel_w; ++kw) { + int inh = padded_h_start + h * stride_h + dilation_h * kh; + int inw = padded_w_start + w * stride_w + dilation_w * kw; + if (inh < 0 || inh >= input_height || inw < 0 || + inw >= input_width) { + MACE_CHECK(inh >= padded_h_start && inh < padded_h_stop && + inw >= padded_w_start && inw < padded_w_stop, + "Out of range read from input: ", inh, ", ", + inw); + // else padding with 0: + // sum += 0; + } else { + index_t input_offset = n * input_channels * input_height * input_width + - inc * input_height * input_width + - inh * input_width + inw; - sum += input[input_offset] * *filter_ptr; - } - ++filter_ptr; + inc * input_height * input_width + inh * input_width + + inw; + sum += input[input_offset] * *filter_ptr; } + ++filter_ptr; } - output[offset] = sum + bias[c]; } + output[offset] = sum + bias[c]; } } } } } + } - private: - const int* strides_; // [stride_h, stride_w] - const int* paddings_; // [padding_h, padding_w] - const int* dilations_; // [dilation_h, dilation_w] + private: + const int* strides_; // [stride_h, stride_w] + const int* paddings_; // [padding_h, padding_w] + const int* dilations_; // [dilation_h, dilation_w] }; template <> -void Conv2dFunctor::operator()(const float* input, - const index_t* input_shape, - const float* filter, - const index_t* filter_shape, - const float* bias, - float* output, - const index_t* output_shape); - -} // namespace kernels -} // namespace mace - -#endif // MACE_KERNELS_CONV_2D_H_ +void Conv2dFunctor::operator()( + const float* input, const index_t* input_shape, const float* filter, + const index_t* filter_shape, const float* bias, float* output, + const index_t* output_shape); + +} // namespace kernels +} // namespace mace + +#endif // MACE_KERNELS_CONV_2D_H_ diff --git a/mace/kernels/conv_pool_2d_util.cc b/mace/kernels/conv_pool_2d_util.cc index be5a742a..d2fcacc5 100644 --- a/mace/kernels/conv_pool_2d_util.cc +++ b/mace/kernels/conv_pool_2d_util.cc @@ -7,12 +7,10 @@ namespace mace { namespace kernels { -void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW +void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW const index_t *filter_shape, // OIHW - const int *dilations, - const int *strides, - Padding padding, - index_t *output_shape, + const int *dilations, const int *strides, + Padding padding, index_t *output_shape, int *padding_size) { MACE_CHECK(dilations[0] > 0 && dilations[1] > 0, "Invalid dilations, must >= 1"); @@ -43,14 +41,16 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW output_height = (input_shape[2] - k_extent_height) / strides[0] + 1; output_width = (input_shape[3] - k_extent_width) / strides[1] + 1; break; - case SAME:output_height = (input_shape[2] - 1) / strides[0] + 1; + case SAME: + output_height = (input_shape[2] - 1) / strides[0] + 1; output_width = (input_shape[3] - 1) / strides[1] + 1; break; case FULL: output_height = (input_shape[2] + k_extent_height - 2) / strides[0] + 1; output_width = (input_shape[3] + k_extent_width - 2) / strides[1] + 1; break; - default:MACE_CHECK(false, "Unsupported padding type: ", padding); + default: + MACE_CHECK(false, "Unsupported padding type: ", padding); } // Note: TensorFlow may padded one more on the right/bottom side @@ -58,10 +58,10 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW // utilize the more centered features. We need to benchmark // based on the model accuracy. - padding_size[0] = (output_height - 1) * strides[0] + - k_extent_height - input_shape[2]; - padding_size[1] = (output_width - 1) * strides[1] + - k_extent_width - input_shape[3]; + padding_size[0] = + (output_height - 1) * strides[0] + k_extent_height - input_shape[2]; + padding_size[1] = + (output_width - 1) * strides[1] + k_extent_width - input_shape[3]; output_shape[0] = input_shape[0]; output_shape[1] = output_channels; @@ -69,19 +69,15 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW output_shape[3] = output_width; } -void ConstructInputWithPadding(const float *input, - const index_t *input_shape, - const int *paddings, - Tensor *output_tensor) { +void ConstructInputWithPadding(const float *input, const index_t *input_shape, + const int *paddings, Tensor *output_tensor) { index_t batch = input_shape[0]; index_t channels = input_shape[1]; index_t height = input_shape[2]; index_t width = input_shape[3]; - std::vector output_shape({batch, - channels, - paddings[0] + height, - paddings[1] + width}); + std::vector output_shape( + {batch, channels, paddings[0] + height, paddings[1] + width}); const index_t output_width = output_shape[3]; const int padded_top = paddings[0] / 2; @@ -105,5 +101,5 @@ void ConstructInputWithPadding(const float *input, } } } -} // namespace kernels -} // namespace mace +} // namespace kernels +} // namespace mace diff --git a/mace/kernels/conv_pool_2d_util.h b/mace/kernels/conv_pool_2d_util.h index c6b9f090..ff46887e 100644 --- a/mace/kernels/conv_pool_2d_util.h +++ b/mace/kernels/conv_pool_2d_util.h @@ -10,26 +10,22 @@ namespace mace { enum Padding { - VALID = 0, // No padding - SAME = 1, // Pads with half the filter size (rounded down) on both sides - FULL = 2, // Pads with one less than the filter size on both sides + VALID = 0, // No padding + SAME = 1, // Pads with half the filter size (rounded down) on both sides + FULL = 2, // Pads with one less than the filter size on both sides }; namespace kernels { -void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW +void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW const index_t *filter_shape, // OIHW - const int *dilations, - const int *strides, - Padding padding, - index_t *output_shape, + const int *dilations, const int *strides, + Padding padding, index_t *output_shape, int *padding_size); -void ConstructInputWithPadding(const float *input, - const index_t *input_shape, - const int *paddings, - Tensor *output_tensor); -} // namespace kernels -} // namespace mace +void ConstructInputWithPadding(const float *input, const index_t *input_shape, + const int *paddings, Tensor *output_tensor); +} // namespace kernels +} // namespace mace -#endif // MACE_KERNELS_CONV_POOL_2D_UTIL_H_ +#endif // MACE_KERNELS_CONV_POOL_2D_UTIL_H_ diff --git a/mace/kernels/neon/addn_neon.cc b/mace/kernels/neon/addn_neon.cc index 86e53bcb..19f621d4 100644 --- a/mace/kernels/neon/addn_neon.cc +++ b/mace/kernels/neon/addn_neon.cc @@ -2,16 +2,15 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#include #include "mace/kernels/addn.h" +#include namespace mace { namespace kernels { template <> -void AddNFunctor::operator()(const vector& inputs, - float *output, - index_t size) { +void AddNFunctor::operator()( + const vector &inputs, float *output, index_t size) { // TODO: neon mem copy memset(output, 0, size * sizeof(float)); int n = inputs.size(); @@ -22,7 +21,7 @@ void AddNFunctor::operator()(const vector } int64_t element_per_group = size / groups; -#pragma omp parallel for num_threads(1) // no significant performance improve +#pragma omp parallel for num_threads(1) // no significant performance improve for (int64_t i = 0; i < size; i += element_per_group) { int64_t count = std::min(element_per_group, size - i); int nn = count >> 2; @@ -48,5 +47,5 @@ void AddNFunctor::operator()(const vector } }; -} // namespace kernels -} // namespace mace \ No newline at end of file +} // namespace kernels +} // namespace mace \ No newline at end of file diff --git a/mace/kernels/neon/batch_norm_neon.cc b/mace/kernels/neon/batch_norm_neon.cc index 244d564b..918a94c5 100644 --- a/mace/kernels/neon/batch_norm_neon.cc +++ b/mace/kernels/neon/batch_norm_neon.cc @@ -2,29 +2,25 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#include #include "mace/kernels/batch_norm.h" +#include namespace mace { namespace kernels { template <> -void BatchNormFunctor::operator()(const float* input, - const float* scale, - const float* offset, - const float* mean, - const float* var, - const index_t n, - const index_t channel, - const index_t sample_size, - float* output) { - // Batch normalization in the paper https://arxiv.org/abs/1502.03167 . - // The calculation formula for inference is - // Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X + - // ( \offset - \frac { \scale * mean } { \sqrt{var+\variance_epsilon} } - // new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} } - // new_offset = \offset - mean * common_val; - // Y = new_scale * X + new_offset; +void BatchNormFunctor::operator()( + const float* input, const float* scale, const float* offset, + const float* mean, const float* var, const index_t n, const index_t channel, + const index_t sample_size, float* output) { + // Batch normalization in the paper https://arxiv.org/abs/1502.03167 . + // The calculation formula for inference is + // Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X + + // ( \offset - \frac { \scale * mean } { \sqrt{var+\variance_epsilon} + // } + // new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} } + // new_offset = \offset - mean * common_val; + // Y = new_scale * X + new_offset; float new_scale, new_offset; index_t count = sample_size >> 2; index_t remain_count = sample_size - (count << 2); @@ -36,8 +32,8 @@ void BatchNormFunctor::operator()(const float* input, float32x4_t new_scale_f = vdupq_n_f32(new_scale); float32x4_t new_offset_f = vdupq_n_f32(new_offset); for (index_t i = 0; i < n; ++i) { - const float *input_sample_ptr = input + pos; - float *output_sample_ptr = output + pos; + const float* input_sample_ptr = input + pos; + float* output_sample_ptr = output + pos; for (index_t j = 0; j < count; ++j) { float32x4_t input_f = vld1q_f32(input_sample_ptr); @@ -57,5 +53,5 @@ void BatchNormFunctor::operator()(const float* input, } }; -} // namespace kernels -} // namespace mace \ No newline at end of file +} // namespace kernels +} // namespace mace \ No newline at end of file diff --git a/mace/kernels/neon/conv_2d_neon.cc b/mace/kernels/neon/conv_2d_neon.cc index 75b22e9a..5268eb1c 100644 --- a/mace/kernels/neon/conv_2d_neon.cc +++ b/mace/kernels/neon/conv_2d_neon.cc @@ -20,62 +20,39 @@ extern void Conv2dNeonK5x5S1(const float *input, const index_t *input_shape, const float *filter, const float *bias, float *output, const index_t *output_shape); -template<> +template <> void Conv2dFunctor::operator()(const float *input, // NCHW - const index_t *input_shape, - const float *filter, // c_out, c_in, kernel_h, kernel_w - const index_t *filter_shape, - const float *bias, // c_out - float *output, // NCHW - const index_t *output_shape) { - - typedef void (*Conv2dNeonFunction)(const float *input, // NCHW - const index_t *input_shape, - const float *filter, // c_out, c_in, kernel_h, kernel_w - const float *bias, // c_out - float *output, // NCHW - const index_t *output_shape); + float>:: +operator()(const float *input, // NCHW + const index_t *input_shape, + const float *filter, // c_out, c_in, kernel_h, kernel_w + const index_t *filter_shape, + const float *bias, // c_out + float *output, // NCHW + const index_t *output_shape) { + typedef void (*Conv2dNeonFunction)( + const float *input, // NCHW + const index_t *input_shape, + const float *filter, // c_out, c_in, kernel_h, kernel_w + const float *bias, // c_out + float *output, // NCHW + const index_t *output_shape); // Selection matrix: kernel_size x stride_size static const Conv2dNeonFunction selector[5][2] = { - { - Conv2dNeonK1x1S1, - nullptr - }, - { - nullptr, - nullptr - }, - { - Conv2dNeonK3x3S1, - nullptr - }, - { - nullptr, - nullptr - }, - { - Conv2dNeonK5x5S1, - nullptr - } - }; + {Conv2dNeonK1x1S1, nullptr}, + {nullptr, nullptr}, + {Conv2dNeonK3x3S1, nullptr}, + {nullptr, nullptr}, + {Conv2dNeonK5x5S1, nullptr}}; // not implement yet index_t kernel_h = filter_shape[2]; index_t kernel_w = filter_shape[3]; - if (kernel_h != kernel_w || kernel_h > 5 || - strides_[0] != strides_[1] || strides_[0] > 2 || - dilations_[0] != 1 || dilations_[1] != 1 || + if (kernel_h != kernel_w || kernel_h > 5 || strides_[0] != strides_[1] || + strides_[0] > 2 || dilations_[0] != 1 || dilations_[1] != 1 || selector[kernel_h - 1][strides_[0] - 1] == nullptr) { LOG(WARNING) << "NEON conv2d kernel not implementated, using slow vesion"; Conv2dFunctor(strides_, paddings_, dilations_)( - input, - input_shape, - filter, - filter_shape, - bias, - output, - output_shape - ); + input, input_shape, filter, filter_shape, bias, output, output_shape); return; } @@ -87,13 +64,8 @@ void Conv2dFunctor 0; --remaining_pixels) { - const float mul = *input_ptr * k0; + const float mul = *input_ptr * k0; const float mul1 = *input_ptr1 * k1; const float mul2 = *input_ptr2 * k2; const float mul3 = *input_ptr3 * k3; @@ -141,9 +140,9 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW // Process the remaining channels for (; inc < input_channels; ++inc) { float* output_ptr = channel_output_start; - MACE_ASSERT(input_ptr == input + n * input_channels * - input_height * input_width + - inc * input_height * input_width); + MACE_ASSERT(input_ptr == + input + n * input_channels * input_height * input_width + + inc * input_height * input_width); MACE_ASSERT(filter_ptr == filter + c * input_channels + inc); const float k0 = filter_ptr[0]; @@ -166,13 +165,13 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW vst1q_f32(output_ptr + 4, out4); output_ptr += 8; - input_ptr += 8; + input_ptr += 8; } // Process the remaining pixels index_t remaining_pixels = loop_remaining; for (; remaining_pixels > 0; --remaining_pixels) { const float mul = *input_ptr * k0; - + *output_ptr += mul; ++output_ptr; @@ -183,5 +182,5 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW } }; -} // namespace kernels -} // namespace mace +} // namespace kernels +} // namespace mace diff --git a/mace/kernels/neon/conv_2d_neon_3x3.cc b/mace/kernels/neon/conv_2d_neon_3x3.cc index 8ba5e82d..3853a5b5 100644 --- a/mace/kernels/neon/conv_2d_neon_3x3.cc +++ b/mace/kernels/neon/conv_2d_neon_3x3.cc @@ -10,78 +10,81 @@ namespace kernels { static const int kRegisterSize = 4; -void Conv2dNeonK3x3S1(const float* input, // NCHW - const index_t* input_shape, - const float* filter, // c_out, c_in, kernel_h, kernel_w - const float* bias, // c_out - float* output, // NCHW - const index_t* output_shape) { - - int batch = output_shape[0]; +void Conv2dNeonK3x3S1(const float* input, // NCHW + const index_t* input_shape, + const float* filter, // c_out, c_in, kernel_h, kernel_w + const float* bias, // c_out + float* output, // NCHW + const index_t* output_shape) { + int batch = output_shape[0]; int channels = output_shape[1]; - int height = output_shape[2]; - int width = output_shape[3]; + int height = output_shape[2]; + int width = output_shape[3]; - int input_batch = input_shape[0]; + int input_batch = input_shape[0]; int input_channels = input_shape[1]; - int input_height = input_shape[2]; - int input_width = input_shape[3]; + int input_height = input_shape[2]; + int input_width = input_shape[3]; int kernel_h = 3; - int kernel_w = 3; + int kernel_w = 3; int height_count = (height >> 1) << 1; for (int b = 0; b < batch; ++b) { float* output_ptr_base = output + b * channels * height * width; for (int oc = 0; oc < channels; ++oc) { - const float* filter_ptr = filter + oc * input_channels * kernel_h * kernel_w; - const float* input_ptr = input + b * input_channels * input_height * input_width; + const float* filter_ptr = + filter + oc * input_channels * kernel_h * kernel_w; + const float* input_ptr = + input + b * input_channels * input_height * input_width; float* output_ptr = output_ptr_base + oc * height * width; std::fill(output_ptr, output_ptr + height * width, bias[oc]); for (int ic = 0; ic < input_channels; ++ic) { float32x4_t filter0 = vld1q_f32(filter_ptr); - float32x4_t filter3 = vld1q_f32(filter_ptr+3); - float32x4_t filter6 = vld1q_f32(filter_ptr+6); + float32x4_t filter3 = vld1q_f32(filter_ptr + 3); + float32x4_t filter6 = vld1q_f32(filter_ptr + 6); - const float* row[kRegisterSize] = { - input_ptr, input_ptr + input_width, - input_ptr + 2 * input_width, input_ptr + 3 * input_width - }; + const float* row[kRegisterSize] = {input_ptr, input_ptr + input_width, + input_ptr + 2 * input_width, + input_ptr + 3 * input_width}; float* output_ptr1 = output_ptr; float* output_ptr2 = output_ptr + width; for (int h = 0; h < height_count; h += 2) { - int count = width >> 2; int remain_count = width & 3; for (; count > 0; --count) { float32x4_t sum0 = vdupq_n_f32(.0f); float32x4_t sum1 = vdupq_n_f32(.0f); - float32x4_t row0_ext_0 = vld1q_f32(row[0]); //0123 - float32x4_t row0_latter = vld1q_f32(row[0] + kRegisterSize); //4567 - float32x4_t row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); //1234 - float32x4_t row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); //2345 + float32x4_t row0_ext_0 = vld1q_f32(row[0]); // 0123 + float32x4_t row0_latter = vld1q_f32(row[0] + kRegisterSize); // 4567 + float32x4_t row0_ext_1 = + vextq_f32(row0_ext_0, row0_latter, 1); // 1234 + float32x4_t row0_ext_2 = + vextq_f32(row0_ext_0, row0_latter, 2); // 2345 sum0 = vfmaq_laneq_f32(sum0, row0_ext_0, filter0, 0); sum0 = vfmaq_laneq_f32(sum0, row0_ext_1, filter0, 1); sum0 = vfmaq_laneq_f32(sum0, row0_ext_2, filter0, 2); - float32x4_t row1_ext_0 = vld1q_f32(row[1]); //0123 - float32x4_t row1_latter = vld1q_f32(row[1] + kRegisterSize); //4567 - float32x4_t row1_ext_1 = vextq_f32(row1_ext_0, row1_latter, 1); //1234 - float32x4_t row1_ext_2 = vextq_f32(row1_ext_0, row1_latter, 2); //2345 + float32x4_t row1_ext_0 = vld1q_f32(row[1]); // 0123 + float32x4_t row1_latter = vld1q_f32(row[1] + kRegisterSize); // 4567 + float32x4_t row1_ext_1 = + vextq_f32(row1_ext_0, row1_latter, 1); // 1234 + float32x4_t row1_ext_2 = + vextq_f32(row1_ext_0, row1_latter, 2); // 2345 sum0 = vfmaq_laneq_f32(sum0, row1_ext_0, filter3, 0); sum0 = vfmaq_laneq_f32(sum0, row1_ext_1, filter3, 1); sum0 = vfmaq_laneq_f32(sum0, row1_ext_2, filter3, 2); - row0_ext_0 = vld1q_f32(row[2]); //0123 - row0_latter = vld1q_f32(row[2] + kRegisterSize); //4567 - row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); //1234 - row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); //2345 + row0_ext_0 = vld1q_f32(row[2]); // 0123 + row0_latter = vld1q_f32(row[2] + kRegisterSize); // 4567 + row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); // 1234 + row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); // 2345 sum0 = vfmaq_laneq_f32(sum0, row0_ext_0, filter6, 0); sum0 = vfmaq_laneq_f32(sum0, row0_ext_1, filter6, 1); @@ -96,10 +99,10 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW sum1 = vfmaq_laneq_f32(sum1, row0_ext_1, filter3, 1); sum1 = vfmaq_laneq_f32(sum1, row0_ext_2, filter3, 2); - row1_ext_0 = vld1q_f32(row[3]); //0123 - row1_latter = vld1q_f32(row[3] + kRegisterSize); //4567 - row1_ext_1 = vextq_f32(row1_ext_0, row1_latter, 1); //1234 - row1_ext_2 = vextq_f32(row1_ext_0, row1_latter, 2); //2345 + row1_ext_0 = vld1q_f32(row[3]); // 0123 + row1_latter = vld1q_f32(row[3] + kRegisterSize); // 4567 + row1_ext_1 = vextq_f32(row1_ext_0, row1_latter, 1); // 1234 + row1_ext_2 = vextq_f32(row1_ext_0, row1_latter, 2); // 2345 sum1 = vfmaq_laneq_f32(sum1, row1_ext_0, filter6, 0); sum1 = vfmaq_laneq_f32(sum1, row1_ext_1, filter6, 1); @@ -114,15 +117,15 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW output_ptr1 += kRegisterSize; output_ptr2 += kRegisterSize; - for(int i = 0; i < kRegisterSize; ++i) { + for (int i = 0; i < kRegisterSize; ++i) { row[i] += kRegisterSize; } } for (; remain_count > 0; --remain_count) { - float32x4_t row0 = vld1q_f32(row[0]); //0123 - float32x4_t row1 = vld1q_f32(row[1]); //0123 - float32x4_t row2 = vld1q_f32(row[2]); //0123 - float32x4_t row3 = vld1q_f32(row[3]); //0123 + float32x4_t row0 = vld1q_f32(row[0]); // 0123 + float32x4_t row1 = vld1q_f32(row[1]); // 0123 + float32x4_t row2 = vld1q_f32(row[2]); // 0123 + float32x4_t row3 = vld1q_f32(row[3]); // 0123 float32x4_t sum = vmulq_f32(row0, filter0); sum = vmlaq_f32(sum, row1, filter3); @@ -138,13 +141,13 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW ++output_ptr1; ++output_ptr2; - for(int i = 0; i < kRegisterSize; ++i) { + for (int i = 0; i < kRegisterSize; ++i) { row[i] += 1; } } output_ptr1 += width; output_ptr2 += width; - for(int i = 0; i < kRegisterSize; ++i) { + for (int i = 0; i < kRegisterSize; ++i) { row[i] += 2 + input_width; } } @@ -152,30 +155,34 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW if (height != height_count) { int count = width >> 2; int remain_count = width & 3; - for(; count > 0; --count) { + for (; count > 0; --count) { float32x4_t sum0 = vdupq_n_f32(.0f); - float32x4_t row0_ext_0 = vld1q_f32(row[0]); //0123 - float32x4_t row0_latter = vld1q_f32(row[0] + kRegisterSize); //4567 - float32x4_t row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); //1234 - float32x4_t row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); //2345 + float32x4_t row0_ext_0 = vld1q_f32(row[0]); // 0123 + float32x4_t row0_latter = vld1q_f32(row[0] + kRegisterSize); // 4567 + float32x4_t row0_ext_1 = + vextq_f32(row0_ext_0, row0_latter, 1); // 1234 + float32x4_t row0_ext_2 = + vextq_f32(row0_ext_0, row0_latter, 2); // 2345 sum0 = vfmaq_laneq_f32(sum0, row0_ext_0, filter0, 0); sum0 = vfmaq_laneq_f32(sum0, row0_ext_1, filter0, 1); sum0 = vfmaq_laneq_f32(sum0, row0_ext_2, filter0, 2); - float32x4_t row1_ext_0 = vld1q_f32(row[1]); //0123 - float32x4_t row1_latter = vld1q_f32(row[1] + kRegisterSize); //4567 - float32x4_t row1_ext_1 = vextq_f32(row1_ext_0, row1_latter, 1); //1234 - float32x4_t row1_ext_2 = vextq_f32(row1_ext_0, row1_latter, 2); //2345 + float32x4_t row1_ext_0 = vld1q_f32(row[1]); // 0123 + float32x4_t row1_latter = vld1q_f32(row[1] + kRegisterSize); // 4567 + float32x4_t row1_ext_1 = + vextq_f32(row1_ext_0, row1_latter, 1); // 1234 + float32x4_t row1_ext_2 = + vextq_f32(row1_ext_0, row1_latter, 2); // 2345 sum0 = vfmaq_laneq_f32(sum0, row1_ext_0, filter3, 0); sum0 = vfmaq_laneq_f32(sum0, row1_ext_1, filter3, 1); sum0 = vfmaq_laneq_f32(sum0, row1_ext_2, filter3, 2); - row0_ext_0 = vld1q_f32(row[2]); //0123 - row0_latter = vld1q_f32(row[2] + kRegisterSize); //4567 - row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); //1234 - row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); //2345 + row0_ext_0 = vld1q_f32(row[2]); // 0123 + row0_latter = vld1q_f32(row[2] + kRegisterSize); // 4567 + row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); // 1234 + row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); // 2345 sum0 = vfmaq_laneq_f32(sum0, row0_ext_0, filter6, 0); sum0 = vfmaq_laneq_f32(sum0, row0_ext_1, filter6, 1); @@ -185,14 +192,14 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW output_row0 = vaddq_f32(output_row0, sum0); vst1q_f32(output_ptr1, output_row0); output_ptr1 += kRegisterSize; - for(int i = 0; i < 3; ++i) { + for (int i = 0; i < 3; ++i) { row[i] += kRegisterSize; } } for (; remain_count > 0; --remain_count) { - float32x4_t row0 = vld1q_f32(row[0]); //0123 - float32x4_t row1 = vld1q_f32(row[1]); //0123 - float32x4_t row2 = vld1q_f32(row[2]); //0123 + float32x4_t row0 = vld1q_f32(row[0]); // 0123 + float32x4_t row1 = vld1q_f32(row[1]); // 0123 + float32x4_t row2 = vld1q_f32(row[2]); // 0123 float32x4_t sum = vmulq_f32(row0, filter0); sum = vmlaq_f32(sum, row1, filter3); @@ -201,7 +208,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW *output_ptr1 = vaddvq_f32(sum); ++output_ptr1; - for(int i = 0; i < 3; ++i) { + for (int i = 0; i < 3; ++i) { row[i] += 1; } } @@ -213,5 +220,5 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW } } -} // namespace kernels -} // namespace mace +} // namespace kernels +} // namespace mace diff --git a/mace/kernels/neon/conv_2d_neon_5x5.cc b/mace/kernels/neon/conv_2d_neon_5x5.cc index 693f1241..0e926eb2 100644 --- a/mace/kernels/neon/conv_2d_neon_5x5.cc +++ b/mace/kernels/neon/conv_2d_neon_5x5.cc @@ -10,11 +10,11 @@ namespace mace { namespace kernels { -void Conv2dNeonK5x5S1(const float* input, // NCHW +void Conv2dNeonK5x5S1(const float* input, // NCHW const index_t* input_shape, - const float* filter, // c_out, c_in, kernel_h, kernel_w - const float* bias, // c_out - float* output, // NCHW + const float* filter, // c_out, c_in, kernel_h, kernel_w + const float* bias, // c_out + float* output, // NCHW const index_t* output_shape) { const index_t batch = output_shape[0]; const index_t channels = output_shape[1]; @@ -30,17 +30,17 @@ void Conv2dNeonK5x5S1(const float* input, // NCHW const index_t input_total_pixels_per_channel = input_height * input_width; const index_t output_total_pixels_per_channel = height * width; - const index_t input_total_pixels_per_batch = input_total_pixels_per_channel - * input_channels; - const index_t output_total_pixels_per_batch = output_total_pixels_per_channel - * channels; + const index_t input_total_pixels_per_batch = + input_total_pixels_per_channel * input_channels; + const index_t output_total_pixels_per_batch = + output_total_pixels_per_channel * channels; const index_t patch_size = input_channels * 25; #pragma omp parallel for collapse(2) for (index_t n = 0; n < batch; ++n) { for (index_t c = 0; c < channels; ++c) { - float* output_ptr = output + n * output_total_pixels_per_batch - + c * output_total_pixels_per_channel; + float* output_ptr = output + n * output_total_pixels_per_batch + + c * output_total_pixels_per_channel; const float* input_ptr = input + n * input_total_pixels_per_batch; // Fill with bias @@ -53,7 +53,7 @@ void Conv2dNeonK5x5S1(const float* input, // NCHW float* outptr2 = outptr + width; const float* inptr = input_ptr + inc * input_total_pixels_per_channel; - const float* filter_ptr = filter + c * patch_size + inc * 25; + const float* filter_ptr = filter + c * patch_size + inc * 25; const float* r0 = inptr; const float* r1 = inptr + input_width; @@ -246,8 +246,8 @@ void Conv2dNeonK5x5S1(const float* input, // NCHW sum2 = r5[4] * k4[4]; float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum)); - float32x2_t - _ss2 = vadd_f32(vget_low_f32(_sum2), vget_high_f32(_sum2)); + float32x2_t _ss2 = + vadd_f32(vget_low_f32(_sum2), vget_high_f32(_sum2)); float32x2_t _ss_ss2 = vpadd_f32(_ss, _ss2); sum += vget_lane_f32(_ss_ss2, 0); @@ -414,7 +414,7 @@ void Conv2dNeonK5x5S1(const float* input, // NCHW } } -} // namespace kernels -} // namespace mace +} // namespace kernels +} // namespace mace -#endif // MACE_KERNELS_NEON_CONV_2D_NEON_5X5_H_ +#endif // MACE_KERNELS_NEON_CONV_2D_NEON_5X5_H_ diff --git a/mace/kernels/neon/max_pooling_neon_2x2.cc b/mace/kernels/neon/max_pooling_neon_2x2.cc index 088ea467..3be9fa28 100644 --- a/mace/kernels/neon/max_pooling_neon_2x2.cc +++ b/mace/kernels/neon/max_pooling_neon_2x2.cc @@ -2,19 +2,17 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // +#include #include #include -#include #include "mace/core/common.h" namespace mace { namespace kernels { -void PoolingMaxNeonK2x2S2x2(const float *input, - const index_t *in_shape, - float *output, - const index_t *out_shape, +void PoolingMaxNeonK2x2S2x2(const float *input, const index_t *in_shape, + float *output, const index_t *out_shape, const int *paddings) { index_t batch = in_shape[0]; index_t channels = in_shape[1]; @@ -44,7 +42,7 @@ void PoolingMaxNeonK2x2S2x2(const float *input, int w = 0; int num_vectors = 0; if (!((h == 0 && padding_top > 0) || - (h == out_height - 1 && padding_bottom > 0))) { + (h == out_height - 1 && padding_bottom > 0))) { r0 = input + input_offset + (h * 2 - padding_top) * in_width; r1 = r0 + in_width; if (padding_left > 0) { @@ -86,8 +84,7 @@ void PoolingMaxNeonK2x2S2x2(const float *input, for (int kw = 0; kw < 2; ++kw) { int inh = h * 2 - padding_top + kh; int inw = w * 2 - padding_left + kw; - if (inh >= 0 && inh < in_height && - inw >= 0 && inw < in_width) { + if (inh >= 0 && inh < in_height && inw >= 0 && inw < in_width) { max = std::max(max, input[input_offset + inh * in_width + inw]); } } @@ -104,10 +101,8 @@ void PoolingMaxNeonK2x2S2x2(const float *input, } // assume the input has already been padded -void PoolingMaxNeonK2x2S2x2Padded(const float *input, - const index_t *in_shape, - float *output, - const index_t *out_shape) { +void PoolingMaxNeonK2x2S2x2Padded(const float *input, const index_t *in_shape, + float *output, const index_t *out_shape) { index_t batch = in_shape[0]; index_t channels = in_shape[1]; index_t in_height = in_shape[2]; diff --git a/mace/kernels/neon/max_pooling_neon_3x3.cc b/mace/kernels/neon/max_pooling_neon_3x3.cc index 045ce7b0..129b4df2 100644 --- a/mace/kernels/neon/max_pooling_neon_3x3.cc +++ b/mace/kernels/neon/max_pooling_neon_3x3.cc @@ -2,19 +2,17 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // +#include #include #include -#include #include "mace/core/common.h" namespace mace { namespace kernels { -void PoolingMaxNeonK3x3S2x2(const float *input, - const index_t *in_shape, - float *output, - const index_t *out_shape, +void PoolingMaxNeonK3x3S2x2(const float *input, const index_t *in_shape, + float *output, const index_t *out_shape, const int *paddings) { index_t batch = in_shape[0]; index_t channels = in_shape[1]; @@ -44,7 +42,7 @@ void PoolingMaxNeonK3x3S2x2(const float *input, int num_vectors = 0; const float *r0, *r1, *r2; if (!((h == 0 && padding_top > 0) || - (h == out_height - 1 && padding_bottom > 0))) { + (h == out_height - 1 && padding_bottom > 0))) { r0 = input + input_offset + (h * 2 - padding_top) * in_width; r1 = r0 + in_width; r2 = r1 + in_width; @@ -112,8 +110,7 @@ void PoolingMaxNeonK3x3S2x2(const float *input, for (int kw = 0; kw < 3; ++kw) { int inh = h * 2 - padding_top + kh; int inw = w * 2 - padding_left + kw; - if (inh >= 0 && inh < in_height && - inw >= 0 && inw < in_width) { + if (inh >= 0 && inh < in_height && inw >= 0 && inw < in_width) { max = std::max(max, input[input_offset + inh * in_width + inw]); } } @@ -130,10 +127,8 @@ void PoolingMaxNeonK3x3S2x2(const float *input, } // assume the input has already been padded -void PoolingMaxNeonK3x3S2x2Padded(const float *input, - const index_t *in_shape, - float *output, - const index_t *out_shape) { +void PoolingMaxNeonK3x3S2x2Padded(const float *input, const index_t *in_shape, + float *output, const index_t *out_shape) { index_t batch = in_shape[0]; index_t channels = in_shape[1]; index_t in_height = in_shape[2]; @@ -218,5 +213,5 @@ void PoolingMaxNeonK3x3S2x2Padded(const float *input, } } -} // namespace kernels -} // namespace mace +} // namespace kernels +} // namespace mace diff --git a/mace/kernels/neon/pooling_neon.cc b/mace/kernels/neon/pooling_neon.cc index 33d76341..bc6b1952 100644 --- a/mace/kernels/neon/pooling_neon.cc +++ b/mace/kernels/neon/pooling_neon.cc @@ -2,45 +2,36 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#include #include "mace/kernels/pooling.h" +#include #include "mace/kernels/conv_pool_2d_util.h" namespace mace { namespace kernels { -extern void PoolingMaxNeonK2x2S2x2(const float *input, - const index_t *in_shape, - float *output, - const index_t *out_shape, +extern void PoolingMaxNeonK2x2S2x2(const float *input, const index_t *in_shape, + float *output, const index_t *out_shape, const int *paddings); -extern void PoolingMaxNeonK3x3S2x2(const float *input, - const index_t *in_shape, - float *output, - const index_t *out_shape, +extern void PoolingMaxNeonK3x3S2x2(const float *input, const index_t *in_shape, + float *output, const index_t *out_shape, const int *paddings); #ifdef __COPY_MAKE_PADDING -extern void PoolingMaxNeonK2x2S2x2Padded(const float* input, - const index_t* in_shape, - float* output, - const index_t* out_shape); -extern void PoolingMaxNeonK3x3S2x2Padded(const float* input, - const index_t* in_shape, - float* output, - const index_t* out_shape); +extern void PoolingMaxNeonK2x2S2x2Padded(const float *input, + const index_t *in_shape, float *output, + const index_t *out_shape); +extern void PoolingMaxNeonK3x3S2x2Padded(const float *input, + const index_t *in_shape, float *output, + const index_t *out_shape); #endif -template<> +template <> void PoolingFunctor::operator()( - const float *input, - const index_t *input_shape, - float *output, + const float *input, const index_t *input_shape, float *output, const index_t *output_shape) { - if (kernels_[0] == 2 && kernels_[1] == 2 && - strides_[0] == 2 && strides_[1] == 2 && - pooling_type_ == MAX) { + if (kernels_[0] == 2 && kernels_[1] == 2 && strides_[0] == 2 && + strides_[1] == 2 && pooling_type_ == MAX) { #ifdef __COPY_MAKE_PADDING Tensor padded_input; ConstructInputWithPadding(input, input_shape, paddings_, &padded_input); @@ -50,9 +41,8 @@ void PoolingFunctor::operator()( #else PoolingMaxNeonK2x2S2x2(input, input_shape, output, output_shape, paddings_); #endif - } else if (kernels_[0] == 3 && kernels_[1] == 3 && - strides_[0] == 2 && strides_[1] == 2 && - pooling_type_ == MAX) { + } else if (kernels_[0] == 3 && kernels_[1] == 3 && strides_[0] == 2 && + strides_[1] == 2 && pooling_type_ == MAX) { #ifdef __COPY_MAKE_PADDING Tensor padded_input; ConstructInputWithPadding(input, input_shape, paddings_, &padded_input); @@ -65,13 +55,9 @@ void PoolingFunctor::operator()( } else { // not implement yet PoolingFunctor(pooling_type_, kernels_, strides_, paddings_, dilations_)( - input, - input_shape, - output, - output_shape - ); + input, input_shape, output, output_shape); } } -} // namespace kernels -} // namespace mace \ No newline at end of file +} // namespace kernels +} // namespace mace \ No newline at end of file diff --git a/mace/kernels/neon/relu_neon.cc b/mace/kernels/neon/relu_neon.cc index 19ae6332..b03b8960 100644 --- a/mace/kernels/neon/relu_neon.cc +++ b/mace/kernels/neon/relu_neon.cc @@ -2,17 +2,17 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#include #include "mace/kernels/relu.h" +#include namespace mace { namespace kernels { template <> void ReluFunctor::operator()(const float *input, - float *output, - index_t size) { -#pragma omp parallel for num_threads(1) // no significant performance improve + float *output, + index_t size) { +#pragma omp parallel for num_threads(1) // no significant performance improve for (int64_t i = 0; i < size; i += kCostPerGroup) { int64_t count = std::min(static_cast(kCostPerGroup), size - i); int nn = count >> 2; @@ -36,6 +36,5 @@ void ReluFunctor::operator()(const float *input, } }; - -} // namespace kernels -} // namespace mace \ No newline at end of file +} // namespace kernels +} // namespace mace \ No newline at end of file diff --git a/mace/kernels/pooling.h b/mace/kernels/pooling.h index b8a1bdd7..b40c2c1f 100644 --- a/mace/kernels/pooling.h +++ b/mace/kernels/pooling.h @@ -11,29 +11,24 @@ namespace mace { enum PoolingType { - AVG = 1, // avg_pool - MAX = 2, // max_pool + AVG = 1, // avg_pool + MAX = 2, // max_pool }; namespace kernels { -template +template class PoolingFunctor { public: - PoolingFunctor(const PoolingType pooling_type, - const int *kernels, - const int *strides, - const int *paddings, - const int *dilations) + PoolingFunctor(const PoolingType pooling_type, const int *kernels, + const int *strides, const int *paddings, const int *dilations) : pooling_type_(pooling_type), kernels_(kernels), strides_(strides), paddings_(paddings), dilations_(dilations) {} - void operator()(const T *input, - const index_t *input_shape, - T *output, + void operator()(const T *input, const index_t *input_shape, T *output, const index_t *output_shape) { index_t batch = output_shape[0]; index_t channels = output_shape[1]; @@ -60,32 +55,31 @@ class PoolingFunctor { #pragma omp parallel for collapse(2) for (int n = 0; n < batch; ++n) { for (int c = 0; c < channels; ++c) { - index_t out_offset = n * channels * height * width + - c * height * width; + index_t out_offset = n * channels * height * width + c * height * width; index_t in_offset = n * input_channels * input_height * input_width + - c * input_height * input_width; + c * input_height * input_width; for (int h = 0; h < height; ++h) { for (int w = 0; w < width; ++w) { T sum_or_max = 0; switch (pooling_type_) { - case AVG:break; - case MAX:sum_or_max = std::numeric_limits::lowest(); + case AVG: + break; + case MAX: + sum_or_max = std::numeric_limits::lowest(); break; default: - MACE_CHECK(false, - "Unsupported pooling type: ", - pooling_type_); + MACE_CHECK(false, "Unsupported pooling type: ", pooling_type_); } for (int kh = 0; kh < kernel_h; ++kh) { for (int kw = 0; kw < kernel_w; ++kw) { int inh = padded_h_start + h * stride_h + dilation_h * kh; int inw = padded_w_start + w * stride_w + dilation_w * kw; - if (inh >= 0 && inh < input_height && - inw >= 0 && inw < input_width) { - index_t input_offset = in_offset + - inh * input_width + inw; + if (inh >= 0 && inh < input_height && inw >= 0 && + inw < input_width) { + index_t input_offset = in_offset + inh * input_width + inw; switch (pooling_type_) { - case AVG:sum_or_max += input[input_offset]; + case AVG: + sum_or_max += input[input_offset]; break; case MAX: sum_or_max = std::max(sum_or_max, input[input_offset]); @@ -98,14 +92,14 @@ class PoolingFunctor { } } switch (pooling_type_) { - case AVG:output[out_offset] = sum_or_max / (kernel_h * kernel_w); + case AVG: + output[out_offset] = sum_or_max / (kernel_h * kernel_w); break; - case MAX:output[out_offset] = sum_or_max; + case MAX: + output[out_offset] = sum_or_max; break; default: - MACE_CHECK(false, - "Unsupported pooling type: ", - pooling_type_); + MACE_CHECK(false, "Unsupported pooling type: ", pooling_type_); } out_offset += 1; } @@ -122,14 +116,12 @@ class PoolingFunctor { const int *dilations_; }; -template<> +template <> void PoolingFunctor::operator()( - const float *input, - const index_t *input_shape, - float *output, + const float *input, const index_t *input_shape, float *output, const index_t *output_shape); -} // namespace kernels -} // namespace mace +} // namespace kernels +} // namespace mace -#endif //MACE_KERNELS_POOLING_H +#endif // MACE_KERNELS_POOLING_H diff --git a/mace/kernels/relu.h b/mace/kernels/relu.h index 8eed29a9..79788f03 100644 --- a/mace/kernels/relu.h +++ b/mace/kernels/relu.h @@ -10,7 +10,7 @@ namespace mace { namespace kernels { -template +template struct ReluFunctor { void operator()(const T *input, T *output, index_t size) { for (index_t i = 0; i < size; ++i) { @@ -24,7 +24,7 @@ void ReluFunctor::operator()(const float *input, float *output, index_t size); -} // namespace kernels -} // namespace mace +} // namespace kernels +} // namespace mace -#endif // MACE_KERNELS_RELU_H_ \ No newline at end of file +#endif // MACE_KERNELS_RELU_H_ \ No newline at end of file diff --git a/mace/kernels/resize_bilinear.h b/mace/kernels/resize_bilinear.h index 4b0df869..1302f9f7 100644 --- a/mace/kernels/resize_bilinear.h +++ b/mace/kernels/resize_bilinear.h @@ -22,8 +22,8 @@ struct CachedInterpolation { inline float CalculateResizeScale(index_t in_size, index_t out_size, bool align_corners) { return (align_corners && out_size > 1) - ? (in_size - 1) / static_cast(out_size - 1) - : in_size / static_cast(out_size); + ? (in_size - 1) / static_cast(out_size - 1) + : in_size / static_cast(out_size); } inline void ComputeInterpolationWeights(const index_t out_size, @@ -41,21 +41,20 @@ inline void ComputeInterpolationWeights(const index_t out_size, } inline float ComputeLerp(const float top_left, const float top_right, - const float bottom_left, const float bottom_right, - const float x_lerp, const float y_lerp) { + const float bottom_left, const float bottom_right, + const float x_lerp, const float y_lerp) { const float top = top_left + (top_right - top_left) * x_lerp; const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp; return top + (bottom - top) * y_lerp; } -template -void ResizeImage(const T *images, - const index_t batch_size, const index_t in_height, - const index_t in_width, const index_t out_height, - const index_t out_width, const index_t channels, +template +void ResizeImage(const T *images, const index_t batch_size, + const index_t in_height, const index_t in_width, + const index_t out_height, const index_t out_width, + const index_t channels, const std::vector &xs_vec, - const std::vector &ys, - float *output) { + const std::vector &ys, float *output) { const index_t in_channel_size = in_height * in_width; const index_t in_batch_num_values = channels * in_channel_size; const index_t out_channel_size = out_height * out_width; @@ -65,10 +64,10 @@ void ResizeImage(const T *images, #pragma omp parallel for collapse(2) for (index_t b = 0; b < batch_size; ++b) { for (index_t c = 0; c < channels; ++c) { - const T* input_ptr = images + in_batch_num_values * b - + in_channel_size * c; - float *output_ptr = output + out_batch_num_values * b - + out_channel_size * c; + const T *input_ptr = + images + in_batch_num_values * b + in_channel_size * c; + float *output_ptr = + output + out_batch_num_values * b + out_channel_size * c; for (index_t y = 0; y < out_height; ++y) { const T *ys_input_lower_ptr = input_ptr + ys[y].lower * in_width; const T *ys_input_upper_ptr = input_ptr + ys[y].upper * in_width; @@ -83,9 +82,8 @@ void ResizeImage(const T *images, const float bottom_left = ys_input_upper_ptr[xs_lower]; const float bottom_right = ys_input_upper_ptr[xs_upper]; - output_ptr[x] = - ComputeLerp(top_left, top_right, bottom_left, bottom_right, - xs_lerp, ys_lerp); + output_ptr[x] = ComputeLerp(top_left, top_right, bottom_left, + bottom_right, xs_lerp, ys_lerp); } output_ptr += out_width; } @@ -94,16 +92,15 @@ void ResizeImage(const T *images, } } -template +template struct ResizeBilinearFunctor { bool align_corners_; - ResizeBilinearFunctor(bool align_corners) - : align_corners_(align_corners) {} + ResizeBilinearFunctor(bool align_corners) : align_corners_(align_corners) {} - void operator()(const T *input, T *output, - index_t n, index_t channels, index_t in_height, - index_t in_width, index_t out_height, index_t out_width) { + void operator()(const T *input, T *output, index_t n, index_t channels, + index_t in_height, index_t in_width, index_t out_height, + index_t out_width) { if (out_height == in_height && out_width == in_width) { std::copy(input, input + channels * in_height * in_width, output); return; @@ -111,8 +108,8 @@ struct ResizeBilinearFunctor { float height_scale = CalculateResizeScale(in_height, out_height, align_corners_); - float - width_scale = CalculateResizeScale(in_width, out_width, align_corners_); + float width_scale = + CalculateResizeScale(in_width, out_width, align_corners_); std::vector ys(out_height + 1); std::vector xs(out_width + 1); @@ -121,12 +118,12 @@ struct ResizeBilinearFunctor { ComputeInterpolationWeights(out_height, in_height, height_scale, ys.data()); ComputeInterpolationWeights(out_width, in_width, width_scale, xs.data()); - ResizeImage(input, n, in_height, in_width, out_height, out_width, - channels, xs, ys, output); + ResizeImage(input, n, in_height, in_width, out_height, out_width, channels, + xs, ys, output); } }; -} // namespace kernels -} // namespace mace +} // namespace kernels +} // namespace mace -#endif // MACE_KERNELS_RESIZE_BILINEAR_H_ +#endif // MACE_KERNELS_RESIZE_BILINEAR_H_ diff --git a/mace/ops/addn.cc b/mace/ops/addn.cc index 766a223e..0598d1cd 100644 --- a/mace/ops/addn.cc +++ b/mace/ops/addn.cc @@ -10,6 +10,6 @@ REGISTER_CPU_OPERATOR(AddN, AddNOp); #if __ARM_NEON REGISTER_NEON_OPERATOR(AddN, AddNOp); -#endif // __ARM_NEON +#endif // __ARM_NEON -} // namespace mace +} // namespace mace diff --git a/mace/ops/addn.h b/mace/ops/addn.h index c25db759..064be034 100644 --- a/mace/ops/addn.h +++ b/mace/ops/addn.h @@ -10,10 +10,10 @@ namespace mace { -template +template class AddNOp : public Operator { public: - AddNOp(const OperatorDef &operator_def, Workspace *ws) + AddNOp(const OperatorDef& operator_def, Workspace* ws) : Operator(operator_def, ws) {} bool Run() override { @@ -36,6 +36,6 @@ class AddNOp : public Operator { kernels::AddNFunctor functor_; }; -} // namespace mace +} // namespace mace -#endif // MACE_OPS_ADDN_H_ +#endif // MACE_OPS_ADDN_H_ diff --git a/mace/ops/addn_benchmark.cc b/mace/ops/addn_benchmark.cc index 8e3f1b29..f7329d1b 100644 --- a/mace/ops/addn_benchmark.cc +++ b/mace/ops/addn_benchmark.cc @@ -10,7 +10,6 @@ namespace mace { template static void AddNBenchmark(int iters, int n, int size) { - mace::testing::StopTiming(); OpsTestNet net; @@ -18,8 +17,7 @@ static void AddNBenchmark(int iters, int n, int size) { for (int i = 0; i < n; ++i) { op_def_builder.Input(internal::MakeString("Input", i).c_str()); } - op_def_builder.Output("Output") - .Finalize(net.operator_def()); + op_def_builder.Output("Output").Finalize(net.operator_def()); // Add input data for (int i = 0; i < n; ++i) { @@ -32,27 +30,26 @@ static void AddNBenchmark(int iters, int n, int size) { } mace::testing::StartTiming(); - while(iters--) { + while (iters--) { net.RunOp(D); } } -#define BM_ADDN_MACRO(N, SIZE, TYPE, DEVICE) \ - static void BM_ADDN_##N##_##SIZE##_##TYPE##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * SIZE; \ - mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot * (sizeof(TYPE))); \ - AddNBenchmark(iters, N, SIZE); \ - } \ +#define BM_ADDN_MACRO(N, SIZE, TYPE, DEVICE) \ + static void BM_ADDN_##N##_##SIZE##_##TYPE##_##DEVICE(int iters) { \ + const int64_t tot = static_cast(iters) * N * SIZE; \ + mace::testing::ItemsProcessed(tot); \ + mace::testing::BytesProcessed(tot*(sizeof(TYPE))); \ + AddNBenchmark(iters, N, SIZE); \ + } \ BENCHMARK(BM_ADDN_##N##_##SIZE##_##TYPE##_##DEVICE) -#define BM_ADDN(N, SIZE, TYPE) \ - BM_ADDN_MACRO(N, SIZE, TYPE, CPU); \ +#define BM_ADDN(N, SIZE, TYPE) \ + BM_ADDN_MACRO(N, SIZE, TYPE, CPU); \ BM_ADDN_MACRO(N, SIZE, TYPE, NEON); BM_ADDN(10, 1000, float); BM_ADDN(10, 10000, float); BM_ADDN(100, 1000, float); BM_ADDN(100, 10000, float); -} // namespace mace \ No newline at end of file +} // namespace mace \ No newline at end of file diff --git a/mace/ops/addn_test.cc b/mace/ops/addn_test.cc index 453458ff..dd5f906f 100644 --- a/mace/ops/addn_test.cc +++ b/mace/ops/addn_test.cc @@ -36,4 +36,4 @@ TEST_F(AddnOpTest, AddnOp) { ExpectTensorNear(expected, *net.GetOutput("Output"), 0.01); } -} // namespace mace +} // namespace mace diff --git a/mace/ops/batch_norm.cc b/mace/ops/batch_norm.cc index 9a48b669..f5b050f1 100644 --- a/mace/ops/batch_norm.cc +++ b/mace/ops/batch_norm.cc @@ -10,6 +10,6 @@ REGISTER_CPU_OPERATOR(BatchNorm, BatchNormOp); #if __ARM_NEON REGISTER_NEON_OPERATOR(BatchNorm, BatchNormOp); -#endif // __ARM_NEON +#endif // __ARM_NEON -} // namespace mace \ No newline at end of file +} // namespace mace \ No newline at end of file diff --git a/mace/ops/batch_norm.h b/mace/ops/batch_norm.h index e58886b0..a9b1f9f5 100644 --- a/mace/ops/batch_norm.h +++ b/mace/ops/batch_norm.h @@ -10,50 +10,55 @@ namespace mace { -template +template class BatchNormOp : public Operator { - public: - BatchNormOp(const OperatorDef &operator_def, Workspace *ws) - : Operator(operator_def, ws), - functor_(OperatorBase::GetSingleArgument("variance_epsilon", 1e-4)){} - - bool Run() override { - const Tensor* input = this->Input(0); - const Tensor* scale = this->Input(1); - const Tensor* offset = this->Input(2); - const Tensor* mean = this->Input(3); - const Tensor* var = this->Input(4); - - MACE_CHECK(input->dim_size() == 4, "input must be 4-dimensional. ", input->dim_size()); - MACE_CHECK(scale->dim_size() == 1, "scale must be 1-dimensional. ", scale->dim_size()); - MACE_CHECK(offset->dim_size() == 1, "offset must be 1-dimensional. ", offset->dim_size()); - MACE_CHECK(mean->dim_size() == 1, "mean must be 1-dimensional. ", mean->dim_size()); - MACE_CHECK(var->dim_size() == 1, "var must be 1-dimensional. ", var->dim_size()); - - Tensor* output = this->Output(0); - output->ResizeLike(input); - - const index_t n = input->dim(0); - const index_t channel = input->dim(1); - const index_t sample_size = input->dim(2) * input->dim(3); - - const T* input_ptr = input->data(); - const T* scale_ptr = scale->data(); - const T* offset_ptr = offset->data(); - const T* mean_ptr = mean->data(); - const T* var_ptr = var->data(); - T* output_ptr = output->mutable_data(); - - functor_(input_ptr, scale_ptr, offset_ptr, mean_ptr, var_ptr, - n, channel, sample_size, - output_ptr); - return true; - } - private: - kernels::BatchNormFunctor functor_; + public: + BatchNormOp(const OperatorDef& operator_def, Workspace* ws) + : Operator(operator_def, ws), + functor_( + OperatorBase::GetSingleArgument("variance_epsilon", 1e-4)) {} + bool Run() override { + const Tensor* input = this->Input(0); + const Tensor* scale = this->Input(1); + const Tensor* offset = this->Input(2); + const Tensor* mean = this->Input(3); + const Tensor* var = this->Input(4); + + MACE_CHECK(input->dim_size() == 4, "input must be 4-dimensional. ", + input->dim_size()); + MACE_CHECK(scale->dim_size() == 1, "scale must be 1-dimensional. ", + scale->dim_size()); + MACE_CHECK(offset->dim_size() == 1, "offset must be 1-dimensional. ", + offset->dim_size()); + MACE_CHECK(mean->dim_size() == 1, "mean must be 1-dimensional. ", + mean->dim_size()); + MACE_CHECK(var->dim_size() == 1, "var must be 1-dimensional. ", + var->dim_size()); + + Tensor* output = this->Output(0); + output->ResizeLike(input); + + const index_t n = input->dim(0); + const index_t channel = input->dim(1); + const index_t sample_size = input->dim(2) * input->dim(3); + + const T* input_ptr = input->data(); + const T* scale_ptr = scale->data(); + const T* offset_ptr = offset->data(); + const T* mean_ptr = mean->data(); + const T* var_ptr = var->data(); + T* output_ptr = output->mutable_data(); + + functor_(input_ptr, scale_ptr, offset_ptr, mean_ptr, var_ptr, n, channel, + sample_size, output_ptr); + return true; + } + + private: + kernels::BatchNormFunctor functor_; }; -} // namespace mace +} // namespace mace -#endif // MACE_BATCH_NORM_H_ +#endif // MACE_BATCH_NORM_H_ diff --git a/mace/ops/batch_norm_benchmark.cc b/mace/ops/batch_norm_benchmark.cc index 789934fb..f9de40c5 100644 --- a/mace/ops/batch_norm_benchmark.cc +++ b/mace/ops/batch_norm_benchmark.cc @@ -8,19 +8,19 @@ namespace mace { template -static void BatchNorm(int iters, int batch, int channels, int height, int width) { - +static void BatchNorm(int iters, int batch, int channels, int height, + int width) { mace::testing::StopTiming(); OpsTestNet net; OpDefBuilder("BatchNorm", "BatchNormBM") - .Input("Input") - .Input("Scale") - .Input("Offset") - .Input("Mean") - .Input("Var") - .Output("Output") - .Finalize(net.operator_def()); + .Input("Input") + .Input("Scale") + .Input("Offset") + .Input("Mean") + .Input("Var") + .Output("Output") + .Finalize(net.operator_def()); // Add input data net.AddRandomInput("Input", {batch, channels, height, width}); @@ -35,23 +35,23 @@ static void BatchNorm(int iters, int batch, int channels, int height, int width) } mace::testing::StartTiming(); - while(iters--) { + while (iters--) { net.RunOp(D); } } -#define BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, DEVICE) \ - static void BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot * (sizeof(TYPE))); \ - BatchNorm(iters, N, C, H, W); \ - } \ +#define BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, DEVICE) \ + static void BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::ItemsProcessed(tot); \ + mace::testing::BytesProcessed(tot*(sizeof(TYPE))); \ + BatchNorm(iters, N, C, H, W); \ + } \ BENCHMARK(BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) -#define BM_BATCH_NORM(N, C, H, W, TYPE) \ - BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \ +#define BM_BATCH_NORM(N, C, H, W, TYPE) \ + BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \ BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, NEON); BM_BATCH_NORM(1, 1, 512, 512, float); @@ -65,4 +65,4 @@ BM_BATCH_NORM(1, 128, 256, 256, float); BM_BATCH_NORM(1, 128, 512, 512, float); BM_BATCH_NORM(32, 1, 256, 256, float); BM_BATCH_NORM(32, 3, 256, 256, float); -} // namespace mace \ No newline at end of file +} // namespace mace \ No newline at end of file diff --git a/mace/ops/batch_norm_test.cc b/mace/ops/batch_norm_test.cc index f4e07416..f963de21 100644 --- a/mace/ops/batch_norm_test.cc +++ b/mace/ops/batch_norm_test.cc @@ -13,17 +13,17 @@ TEST_F(BatchNormOpTest, SimpleCPU) { // Construct graph auto& net = test_net(); OpDefBuilder("BatchNorm", "BatchNormTest") - .Input("Input") - .Input("Scale") - .Input("Offset") - .Input("Mean") - .Input("Var") - .Output("Output") - .Finalize(net.operator_def()); + .Input("Input") + .Input("Scale") + .Input("Offset") + .Input("Mean") + .Input("Var") + .Output("Output") + .Finalize(net.operator_def()); // Add input data net.AddInputFromArray("Input", {1, 1, 6, 2}, - {5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}); + {5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}); net.AddInputFromArray("Scale", {1}, {4.0f}); net.AddInputFromArray("Offset", {1}, {2.0}); net.AddInputFromArray("Mean", {1}, {10}); @@ -33,8 +33,8 @@ TEST_F(BatchNormOpTest, SimpleCPU) { net.RunOp(); // Check - auto expected = CreateTensor({1, 1, 6, 2}, - {-3.86, -3.86, -1.51, -1.51, 0.83, 0.83, + auto expected = + CreateTensor({1, 1, 6, 2}, {-3.86, -3.86, -1.51, -1.51, 0.83, 0.83, 3.17, 3.17, 5.51, 5.51, 7.86, 7.86}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.01); @@ -51,13 +51,13 @@ TEST_F(BatchNormOpTest, SimpleNeon) { // Construct graph auto& net = test_net(); OpDefBuilder("BatchNorm", "BatchNormTest") - .Input("Input") - .Input("Scale") - .Input("Offset") - .Input("Mean") - .Input("Var") - .Output("Output") - .Finalize(net.operator_def()); + .Input("Input") + .Input("Scale") + .Input("Offset") + .Input("Mean") + .Input("Var") + .Output("Output") + .Finalize(net.operator_def()); // Add input data net.AddRandomInput("Input", {batch, channels, height, width}); @@ -77,5 +77,4 @@ TEST_F(BatchNormOpTest, SimpleNeon) { ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } - } diff --git a/mace/ops/conv_2d.cc b/mace/ops/conv_2d.cc index a236856b..33c60956 100644 --- a/mace/ops/conv_2d.cc +++ b/mace/ops/conv_2d.cc @@ -11,6 +11,6 @@ REGISTER_CPU_OPERATOR(Conv2d, Conv2dOp); #if __ARM_NEON REGISTER_NEON_OPERATOR(Conv2d, Conv2dOp); -#endif // __ARM_NEON +#endif // __ARM_NEON -} // namespace mace +} // namespace mace diff --git a/mace/ops/conv_2d.h b/mace/ops/conv_2d.h index 6ae1e06f..ad3206b0 100644 --- a/mace/ops/conv_2d.h +++ b/mace/ops/conv_2d.h @@ -13,11 +13,11 @@ namespace mace { -template +template class Conv2dOp : public ConvPool2dOpBase { public: Conv2dOp(const OperatorDef& op_def, Workspace* ws) - : ConvPool2dOpBase(op_def, ws) {}; + : ConvPool2dOpBase(op_def, ws){}; bool Run() override { const Tensor* input = this->Input(INPUT); @@ -27,21 +27,16 @@ class Conv2dOp : public ConvPool2dOpBase { std::vector output_shape(4); std::vector paddings(2); - kernels::CalcPaddingAndOutputSize(input->shape().data(), - filter->shape().data(), - this->dilations_.data(), - this->strides_.data(), - this->padding_, - output_shape.data(), - paddings.data()); + kernels::CalcPaddingAndOutputSize( + input->shape().data(), filter->shape().data(), this->dilations_.data(), + this->strides_.data(), this->padding_, output_shape.data(), + paddings.data()); output->Resize(output_shape); - auto conv2d = kernels::Conv2dFunctor(this->strides_.data(), - paddings.data(), - this->dilations_.data()); - conv2d(input->data(), input->shape().data(), - filter->data(), filter->shape().data(), - bias->data(), output->mutable_data(), + auto conv2d = kernels::Conv2dFunctor( + this->strides_.data(), paddings.data(), this->dilations_.data()); + conv2d(input->data(), input->shape().data(), filter->data(), + filter->shape().data(), bias->data(), output->mutable_data(), output->shape().data()); return true; @@ -52,6 +47,6 @@ class Conv2dOp : public ConvPool2dOpBase { OP_OUTPUT_TAGS(OUTPUT); }; -} // namespace mace +} // namespace mace -#endif // MACE_OPS_CONV_2D_H_ +#endif // MACE_OPS_CONV_2D_H_ diff --git a/mace/ops/conv_2d_benchmark.cc b/mace/ops/conv_2d_benchmark.cc index 96843971..e26f7ac8 100644 --- a/mace/ops/conv_2d_benchmark.cc +++ b/mace/ops/conv_2d_benchmark.cc @@ -13,17 +13,17 @@ namespace mace { template static void Conv2d(int iters, int batch, int channels, int height, int width, - int kernel_h, int kernel_w, int stride, - Padding padding, int output_channels) { + int kernel_h, int kernel_w, int stride, Padding padding, + int output_channels) { mace::testing::StopTiming(); OpsTestNet net; OpDefBuilder("Conv2d", "Conv2dTest") - .Input("Input") - .Input("Filter") - .Input("Bias") - .Output("Output") - .Finalize(net.operator_def()); + .Input("Input") + .Input("Filter") + .Input("Bias") + .Output("Output") + .Finalize(net.operator_def()); // Add args net.AddIntsArg("strides", {stride, stride}); @@ -32,7 +32,8 @@ static void Conv2d(int iters, int batch, int channels, int height, int width, // Add input data net.AddRandomInput("Input", {batch, channels, height, width}); - net.AddRandomInput("Filter", {output_channels, channels, kernel_h, kernel_w}); + net.AddRandomInput("Filter", + {output_channels, channels, kernel_h, kernel_w}); net.AddRandomInput("Bias", {output_channels}); // Warm-up @@ -41,27 +42,30 @@ static void Conv2d(int iters, int batch, int channels, int height, int width, } mace::testing::StartTiming(); - while(iters--) { + while (iters--) { net.RunOp(D); } } -#define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, DEVICE) \ - static void BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot * (sizeof(TYPE))); \ - Conv2d(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, OC); \ - } \ - BENCHMARK(BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE) +#define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, DEVICE) \ + static void \ + BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::ItemsProcessed(tot); \ + mace::testing::BytesProcessed(tot*(sizeof(TYPE))); \ + Conv2d(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, \ + OC); \ + } \ + BENCHMARK( \ + BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE) -#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \ - BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \ +#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \ + BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \ BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, NEON); BM_CONV_2D(1, 64, 32, 32, 1, 1, 1, VALID, 128, float); -BM_CONV_2D(1, 64, 33, 31, 1, 1, 1, VALID, 128, float); // Test bad alignments +BM_CONV_2D(1, 64, 33, 31, 1, 1, 1, VALID, 128, float); // Test bad alignments BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 128, float); BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, VALID, 128, float); BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, SAME, 128, float); @@ -71,4 +75,4 @@ BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, VALID, 128, float); BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, SAME, 128, float); BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, SAME, 128, float); -} // namespace mace +} // namespace mace diff --git a/mace/ops/conv_2d_test.cc b/mace/ops/conv_2d_test.cc index 4dbc5d34..db6f2b48 100644 --- a/mace/ops/conv_2d_test.cc +++ b/mace/ops/conv_2d_test.cc @@ -2,8 +2,8 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#include "mace/core/operator.h" #include "mace/ops/conv_2d.h" +#include "mace/core/operator.h" #include "mace/ops/ops_test_util.h" using namespace mace; @@ -14,11 +14,11 @@ TEST_F(Conv2dOpTest, Simple_VALID) { // Construct graph auto& net = test_net(); OpDefBuilder("Conv2d", "Conv2dTest") - .Input("Input") - .Input("Filter") - .Input("Bias") - .Output("Output") - .Finalize(net.operator_def()); + .Input("Input") + .Input("Filter") + .Input("Bias") + .Output("Output") + .Finalize(net.operator_def()); // Add args net.AddIntsArg("strides", {1, 1}); @@ -26,17 +26,13 @@ TEST_F(Conv2dOpTest, Simple_VALID) { net.AddIntsArg("dilations", {1, 1}); // Add input data - net.AddInputFromArray("Input", {1, 2, 3, 3}, - {1, 1, 1, - 1, 1, 1, - 1, 1, 1, - 1, 1, 1, - 1, 1, 1, - 1, 1, 1}); - net.AddInputFromArray("Filter", {1, 2, 3, 3}, - {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); + net.AddInputFromArray( + "Input", {1, 2, 3, 3}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + net.AddInputFromArray( + "Filter", {1, 2, 3, 3}, + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); net.AddInputFromArray("Bias", {1}, {0.1f}); // Run @@ -52,11 +48,11 @@ TEST_F(Conv2dOpTest, Simple_SAME) { // Construct graph auto& net = test_net(); OpDefBuilder("Conv2d", "Conv2dTest") - .Input("Input") - .Input("Filter") - .Input("Bias") - .Output("Output") - .Finalize(net.operator_def()); + .Input("Input") + .Input("Filter") + .Input("Bias") + .Output("Output") + .Finalize(net.operator_def()); // Add args net.AddIntsArg("strides", {1, 1}); @@ -64,27 +60,22 @@ TEST_F(Conv2dOpTest, Simple_SAME) { net.AddIntsArg("dilations", {1, 1}); // Add input data - net.AddInputFromArray("Input", {1, 2, 3, 3}, - {1, 1, 1, - 1, 1, 1, - 1, 1, 1, - 1, 1, 1, - 1, 1, 1, - 1, 1, 1}); - net.AddInputFromArray("Filter", {1, 2, 3, 3}, - {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); + net.AddInputFromArray( + "Input", {1, 2, 3, 3}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + net.AddInputFromArray( + "Filter", {1, 2, 3, 3}, + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); net.AddInputFromArray("Bias", {1}, {0.1f}); // Run net.RunOp(); // Check - auto expected = CreateTensor({1, 1, 3, 3}, - { 8.1f, 12.1f, 8.1f, - 12.1f, 18.1f, 12.1f, - 8.1f, 12.1f, 8.1f}); + auto expected = CreateTensor( + {1, 1, 3, 3}, + {8.1f, 12.1f, 8.1f, 12.1f, 18.1f, 12.1f, 8.1f, 12.1f, 8.1f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } @@ -93,11 +84,11 @@ TEST_F(Conv2dOpTest, Combined) { // Construct graph auto& net = test_net(); OpDefBuilder("Conv2d", "Conv2dTest") - .Input("Input") - .Input("Filter") - .Input("Bias") - .Output("Output") - .Finalize(net.operator_def()); + .Input("Input") + .Input("Filter") + .Input("Bias") + .Output("Output") + .Finalize(net.operator_def()); // Add args net.AddIntsArg("strides", {2, 2}); @@ -105,36 +96,24 @@ TEST_F(Conv2dOpTest, Combined) { net.AddIntsArg("dilations", {1, 1}); // Add input data - net.AddInputFromArray("Input", {1, 2, 5, 5}, - {1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1}); - net.AddInputFromArray("Filter", {2, 2, 3, 3}, - {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, - 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f}); + net.AddInputFromArray( + "Input", {1, 2, 5, 5}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + net.AddInputFromArray( + "Filter", {2, 2, 3, 3}, + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f}); net.AddInputFromArray("Bias", {2}, {0.1f, 0.2f}); // Run net.RunOp(); // Check - auto expected = CreateTensor({1, 2, 3, 3}, - { 8.1f, 12.1f, 8.1f, - 12.1f, 18.1f, 12.1f, - 8.1f, 12.1f, 8.1f, - 4.2f, 6.2f, 4.2f, - 6.2f, 9.2f, 6.2f, - 4.2f, 6.2f, 4.2f}); - + auto expected = CreateTensor( + {1, 2, 3, 3}, {8.1f, 12.1f, 8.1f, 12.1f, 18.1f, 12.1f, 8.1f, 12.1f, 8.1f, + 4.2f, 6.2f, 4.2f, 6.2f, 9.2f, 6.2f, 4.2f, 6.2f, 4.2f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } @@ -143,11 +122,11 @@ TEST_F(Conv2dOpTest, Conv1x1) { // Construct graph auto& net = test_net(); OpDefBuilder("Conv2d", "Conv2dTest") - .Input("Input") - .Input("Filter") - .Input("Bias") - .Output("Output") - .Finalize(net.operator_def()); + .Input("Input") + .Input("Filter") + .Input("Bias") + .Output("Output") + .Finalize(net.operator_def()); // Add args net.AddIntsArg("strides", {1, 1}); @@ -155,38 +134,32 @@ TEST_F(Conv2dOpTest, Conv1x1) { net.AddIntsArg("dilations", {1, 1}); // Add input data - net.AddInputFromArray("Input", {1, 5, 3, 10}, - {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); - net.AddInputFromArray("Filter", {2, 5, 1, 1}, - {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}); + net.AddInputFromArray( + "Input", {1, 5, 3, 10}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + net.AddInputFromArray( + "Filter", {2, 5, 1, 1}, + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}); net.AddInputFromArray("Bias", {2}, {0.1f, 0.2f}); // Run net.RunOp(); // Check - auto expected = CreateTensor({1, 2, 3, 10}, - {5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, - 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, - 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, - 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, - 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, - 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f}); + auto expected = CreateTensor( + {1, 2, 3, 10}, + {5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, + 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, + 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, + 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, + 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, + 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } @@ -194,8 +167,7 @@ TEST_F(Conv2dOpTest, Conv1x1) { // TODO we need more tests TEST_F(Conv2dOpTest, ConvNxNS12) { testing::internal::LogToStderr(); - auto func = [&](int kernel_h, int kernel_w, - int stride_h, int stride_w, + auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, Padding type) { srand(time(NULL)); @@ -206,7 +178,7 @@ TEST_F(Conv2dOpTest, ConvNxNS12) { index_t width = 7 + rand() % 100; index_t output_channels = 1 + rand() % 50; // Construct graph - auto &net = test_net(); + auto& net = test_net(); OpDefBuilder("Conv2d", "Conv2dTest") .Input("Input") .Input("Filter") @@ -221,8 +193,8 @@ TEST_F(Conv2dOpTest, ConvNxNS12) { // Add input data net.AddRandomInput("Input", {batch, input_channels, height, width}); - net.AddRandomInput("Filter", {output_channels, input_channels, - kernel_h, kernel_w}); + net.AddRandomInput( + "Filter", {output_channels, input_channels, kernel_h, kernel_w}); net.AddRandomInput("Bias", {output_channels}); // run cpu net.RunOp(); diff --git a/mace/ops/conv_pool_2d_base.h b/mace/ops/conv_pool_2d_base.h index a84e4152..a572b71e 100644 --- a/mace/ops/conv_pool_2d_base.h +++ b/mace/ops/conv_pool_2d_base.h @@ -10,16 +10,15 @@ namespace mace { -template +template class ConvPool2dOpBase : public Operator { public: ConvPool2dOpBase(const OperatorDef& op_def, Workspace* ws) - : Operator(op_def, ws), - strides_(OperatorBase::GetRepeatedArgument("strides")), - padding_(static_cast( - OperatorBase::GetSingleArgument("padding", - static_cast(SAME)))), - dilations_(OperatorBase::GetRepeatedArgument("dilations")) {} + : Operator(op_def, ws), + strides_(OperatorBase::GetRepeatedArgument("strides")), + padding_(static_cast(OperatorBase::GetSingleArgument( + "padding", static_cast(SAME)))), + dilations_(OperatorBase::GetRepeatedArgument("dilations")) {} protected: std::vector strides_; @@ -27,6 +26,6 @@ class ConvPool2dOpBase : public Operator { std::vector dilations_; }; -} // namespace mace +} // namespace mace -#endif // MACE_OPS_CONV_POOL_2D_BASE_H_ +#endif // MACE_OPS_CONV_POOL_2D_BASE_H_ diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index d2b9a2c1..0315a71e 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -43,31 +43,33 @@ class OpsTestNet { public: OpsTestNet() {} - template - void AddInputFromArray(const char *name, - const std::vector &shape, + template + void AddInputFromArray(const char *name, const std::vector &shape, const std::vector &data) { - Tensor *input = ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum::v()); + Tensor *input = + ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum::v()); input->Resize(shape); T *input_data = input->mutable_data(); MACE_CHECK(input->size() == data.size()); memcpy(input_data, data.data(), data.size() * sizeof(T)); } - template - void AddRepeatedInput(const char *name, - const std::vector &shape, - const T data) { - Tensor *input = ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum::v()); + template + void AddRepeatedInput(const char *name, const std::vector &shape, + const T data) { + Tensor *input = + ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum::v()); input->Resize(shape); T *input_data = input->mutable_data(); MACE_CHECK(input->size() == data.size()); std::fill(input_data, input_data + input->size(), data); } - template - void AddRandomInput(const char *name, const std::vector &shape, bool positive = false) { - Tensor *input = ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum::v()); + template + void AddRandomInput(const char *name, const std::vector &shape, + bool positive = false) { + Tensor *input = + ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum::v()); input->Resize(shape); float *input_data = input->mutable_data(); @@ -76,12 +78,16 @@ class OpsTestNet { std::normal_distribution nd(0, 1); std::generate(input_data, input_data + input->size(), - [&gen, &nd, positive] { return positive ? std::abs(nd(gen)) : nd(gen); }); + [&gen, &nd, positive] { + return positive ? std::abs(nd(gen)) : nd(gen); + }); } - template - void AddFixedInput(const char *name, const std::vector &shape, T value) { - Tensor *input = ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum::v()); + template + void AddFixedInput(const char *name, const std::vector &shape, + T value) { + Tensor *input = + ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum::v()); input->Resize(shape); float *input_data = input->mutable_data(); @@ -122,7 +128,8 @@ class OpsTestNet { } } - void AddStringsArg(const char *name, const std::vector &values) { + void AddStringsArg(const char *name, + const std::vector &values) { auto arg = op_def_.add_arg(); arg->set_name(name); for (auto value : values) { @@ -145,9 +152,7 @@ class OpsTestNet { return net_->Run(); } - bool RunOp() { - return RunOp(DeviceType::CPU); - } + bool RunOp() { return RunOp(DeviceType::CPU); } Tensor *GetOutput(const char *output_name) { return ws_.GetTensor(output_name); @@ -177,8 +182,9 @@ class OpsTestBase : public ::testing::Test { OpsTestNet test_net_; }; -template -unique_ptr CreateTensor(const std::vector &shape, const std::vector &data) { +template +unique_ptr CreateTensor(const std::vector &shape, + const std::vector &data) { unique_ptr res(new Tensor(cpu_allocator(), DataTypeToEnum::v())); res->Resize(shape); T *input_data = res->mutable_data(); @@ -209,40 +215,38 @@ inline std::string ShapeToString(const Tensor &x) { return std::string(stream.str()); } - -template +template struct is_floating_point_type { - static const bool value = std::is_same::value || - std::is_same::value; + static const bool value = + std::is_same::value || std::is_same::value; }; -template +template inline void ExpectEqual(const T &a, const T &b) { EXPECT_EQ(a, b); } -template<> +template <> inline void ExpectEqual(const float &a, const float &b) { EXPECT_FLOAT_EQ(a, b); } -template<> +template <> inline void ExpectEqual(const double &a, const double &b) { EXPECT_DOUBLE_EQ(a, b); } inline void AssertSameTypeDims(const Tensor &x, const Tensor &y) { ASSERT_EQ(x.dtype(), y.dtype()); - ASSERT_TRUE(IsSameSize(x, y)) - << "x.shape [" << ShapeToString(x) << "] vs " - << "y.shape [ " << ShapeToString(y) << "]"; + ASSERT_TRUE(IsSameSize(x, y)) << "x.shape [" << ShapeToString(x) << "] vs " + << "y.shape [ " << ShapeToString(y) << "]"; } -template::value> +template ::value> struct Expector; // Partial specialization for float and double. -template +template struct Expector { static void Equal(const T &a, const T &b) { ExpectEqual(a, b); } @@ -262,18 +266,19 @@ struct Expector { auto a = x.data(); auto b = y.data(); for (int i = 0; i < x.size(); ++i) { - EXPECT_NEAR(a[i], b[i], abs_err) - << "a = " << a << " b = " << b << " index = " << i; + EXPECT_NEAR(a[i], b[i], abs_err) << "a = " << a << " b = " << b + << " index = " << i; } } }; -template +template void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) { - static_assert(is_floating_point_type::value, "T is not a floating point type"); + static_assert(is_floating_point_type::value, + "T is not a floating point type"); Expector::Near(x, y, abs_err); } -} // namespace mace +} // namespace mace -#endif // MACE_OPS_TEST_UTIL_H_ +#endif // MACE_OPS_TEST_UTIL_H_ diff --git a/mace/ops/pooling.cc b/mace/ops/pooling.cc index cab59685..4b972647 100644 --- a/mace/ops/pooling.cc +++ b/mace/ops/pooling.cc @@ -2,7 +2,6 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // - #include "mace/ops/pooling.h" namespace mace { @@ -11,6 +10,6 @@ REGISTER_CPU_OPERATOR(Pooling, PoolingOp); #if __ARM_NEON REGISTER_NEON_OPERATOR(Pooling, PoolingOp); -#endif // __ARM_NEON +#endif // __ARM_NEON -} // namespace mace +} // namespace mace diff --git a/mace/ops/pooling.h b/mace/ops/pooling.h index 4d0001df..597a4724 100644 --- a/mace/ops/pooling.h +++ b/mace/ops/pooling.h @@ -11,17 +11,17 @@ namespace mace { -template +template class PoolingOp : public ConvPool2dOpBase { -public: + public: PoolingOp(const OperatorDef& op_def, Workspace* ws) - : ConvPool2dOpBase(op_def, ws), - kernels_(OperatorBase::GetRepeatedArgument("kernels")), - pooling_type_(static_cast( - OperatorBase::GetSingleArgument( - "pooling_type", static_cast(AVG)))) {}; + : ConvPool2dOpBase(op_def, ws), + kernels_(OperatorBase::GetRepeatedArgument("kernels")), + pooling_type_( + static_cast(OperatorBase::GetSingleArgument( + "pooling_type", static_cast(AVG)))){}; - bool Run() override{ + bool Run() override { const Tensor* input = this->Input(INPUT); Tensor* output = this->Output(OUTPUT); std::vector in_shape = input->shape(); @@ -33,28 +33,21 @@ public: filter_shape[1] = in_shape[0]; filter_shape[2] = kernels_[0]; filter_shape[3] = kernels_[1]; - kernels::CalcPaddingAndOutputSize(in_shape.data(), - filter_shape.data(), + kernels::CalcPaddingAndOutputSize(in_shape.data(), filter_shape.data(), this->dilations_.data(), - this->strides_.data(), - this->padding_, - output_shape.data(), - paddings.data()); + this->strides_.data(), this->padding_, + output_shape.data(), paddings.data()); output->Resize(output_shape); - auto pooling_func = kernels::PoolingFunctor(pooling_type_, - kernels_.data(), - this->strides_.data(), - paddings.data(), - this->dilations_.data()); - pooling_func(input->data(), - in_shape.data(), - output->mutable_data(), - output->shape().data()); + auto pooling_func = kernels::PoolingFunctor( + pooling_type_, kernels_.data(), this->strides_.data(), paddings.data(), + this->dilations_.data()); + pooling_func(input->data(), in_shape.data(), + output->mutable_data(), output->shape().data()); return true; }; -protected: + protected: std::vector kernels_; PoolingType pooling_type_; @@ -62,6 +55,6 @@ protected: OP_OUTPUT_TAGS(OUTPUT); }; -} // namespace mace +} // namespace mace -#endif //MACE_OPS_POOLING_H_ +#endif // MACE_OPS_POOLING_H_ diff --git a/mace/ops/pooling_benchmark.cc b/mace/ops/pooling_benchmark.cc index ccdcb206..aa2ae140 100644 --- a/mace/ops/pooling_benchmark.cc +++ b/mace/ops/pooling_benchmark.cc @@ -2,20 +2,19 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#include "mace/core/testing/test_benchmark.h" -#include "mace/core/operator.h" #include "mace/kernels/pooling.h" +#include "mace/core/operator.h" +#include "mace/core/testing/test_benchmark.h" #include "mace/kernels/conv_pool_2d_util.h" #include "mace/ops/ops_test_util.h" using namespace mace; using namespace mace::kernels; -template -static void Pooling(int iters, int batch, int channels, int height, - int width, int kernel, int stride, Padding padding, +template +static void Pooling(int iters, int batch, int channels, int height, int width, + int kernel, int stride, Padding padding, PoolingType pooling_type) { - mace::testing::StopTiming(); OpsTestNet net; @@ -45,18 +44,21 @@ static void Pooling(int iters, int batch, int channels, int height, } } -#define BM_POOLING_MACRO(N, C, H, W, KE, STRIDE, PA, PO, DEVICE) \ - static void BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot * (sizeof(float)));\ - Pooling(iters, N, C, H, W, KE, STRIDE, Padding::PA, PoolingType::PO); \ - } \ - BENCHMARK(BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE) +#define BM_POOLING_MACRO(N, C, H, W, KE, STRIDE, PA, PO, DEVICE) \ + static void \ + BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::ItemsProcessed(tot); \ + mace::testing::BytesProcessed(tot*(sizeof(float))); \ + Pooling(iters, N, C, H, W, KE, STRIDE, Padding::PA, \ + PoolingType::PO); \ + } \ + BENCHMARK( \ + BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE) -#define BM_POOLING(N, C, H, W, K, S, PA, PO) \ - BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, CPU); \ +#define BM_POOLING(N, C, H, W, K, S, PA, PO) \ + BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, CPU); \ BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, NEON); BM_POOLING(1, 3, 129, 129, 2, 2, SAME, MAX); diff --git a/mace/ops/pooling_test.cc b/mace/ops/pooling_test.cc index 7ff8e351..7ca43f19 100644 --- a/mace/ops/pooling_test.cc +++ b/mace/ops/pooling_test.cc @@ -5,9 +5,9 @@ #include "gtest/gtest.h" #include "mace/core/operator.h" -#include "mace/ops/ops_test_util.h" -#include "mace/ops/conv_pool_2d_base.h" #include "mace/kernels/pooling.h" +#include "mace/ops/conv_pool_2d_base.h" +#include "mace/ops/ops_test_util.h" using namespace mace; @@ -17,9 +17,9 @@ TEST_F(PoolingOpTest, MAX_VALID) { // Construct graph auto& net = test_net(); OpDefBuilder("Pooling", "PoolingTest") - .Input("Input") - .Output("Output") - .Finalize(net.operator_def()); + .Input("Input") + .Output("Output") + .Finalize(net.operator_def()); // Add args net.AddIntsArg("kernels", {2, 2}); @@ -29,34 +29,28 @@ TEST_F(PoolingOpTest, MAX_VALID) { net.AddIntArg("pooling_type", PoolingType::MAX); // Add input data - net.AddInputFromArray("Input", {1, 2, 4, 4}, - {0, 1, 2, 3, - 4, 5, 6, 7, - 8, 9, 10, 11, - 12, 13, 14, 15, - 16, 17, 18, 19, - 20, 21, 22, 23, - 24, 25, 26, 27, - 28, 29, 30, 31}); + net.AddInputFromArray( + "Input", {1, 2, 4, 4}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}); // Run net.RunOp(); // Check - auto expected = CreateTensor({1, 2, 2, 2}, - {5, 7, 13, 15, 21, 23, 29, 31}); + auto expected = + CreateTensor({1, 2, 2, 2}, {5, 7, 13, 15, 21, 23, 29, 31}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } - TEST_F(PoolingOpTest, AVG_VALID) { // Construct graph auto& net = test_net(); OpDefBuilder("Pooling", "PoolingTest") - .Input("Input") - .Output("Output") - .Finalize(net.operator_def()); + .Input("Input") + .Output("Output") + .Finalize(net.operator_def()); // Add args net.AddIntsArg("kernels", {2, 2}); @@ -66,22 +60,17 @@ TEST_F(PoolingOpTest, AVG_VALID) { net.AddIntArg("pooling_type", PoolingType::AVG); // Add input data - net.AddInputFromArray("Input", {1, 2, 4, 4}, - {0, 1, 2, 3, - 4, 5, 6, 7, - 8, 9, 10, 11, - 12, 13, 14, 15, - 16, 17, 18, 19, - 20, 21, 22, 23, - 24, 25, 26, 27, - 28, 29, 30, 31}); + net.AddInputFromArray( + "Input", {1, 2, 4, 4}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}); // Run net.RunOp(); // Check - auto expected = CreateTensor({1, 2, 2, 2}, - {2.5, 4.5, 10.5, 12.5, 18.5, 20.5, 26.5, 28.5}); + auto expected = CreateTensor( + {1, 2, 2, 2}, {2.5, 4.5, 10.5, 12.5, 18.5, 20.5, 26.5, 28.5}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } @@ -90,9 +79,9 @@ TEST_F(PoolingOpTest, MAX_SAME) { // Construct graph auto& net = test_net(); OpDefBuilder("Pooling", "PoolingTest") - .Input("Input") - .Output("Output") - .Finalize(net.operator_def()); + .Input("Input") + .Output("Output") + .Finalize(net.operator_def()); // Add args net.AddIntsArg("kernels", {2, 2}); @@ -103,16 +92,13 @@ TEST_F(PoolingOpTest, MAX_SAME) { // Add input data net.AddInputFromArray("Input", {1, 1, 3, 3}, - {0, 1, 2, - 3, 4, 5, - 6, 7, 8}); + {0, 1, 2, 3, 4, 5, 6, 7, 8}); // Run net.RunOp(); // Check - auto expected = CreateTensor({1, 1, 2, 2}, - {4, 5, 7, 8}); + auto expected = CreateTensor({1, 1, 2, 2}, {4, 5, 7, 8}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } @@ -121,9 +107,9 @@ TEST_F(PoolingOpTest, MAX_VALID_DILATION) { // Construct graph auto& net = test_net(); OpDefBuilder("Pooling", "PoolingTest") - .Input("Input") - .Output("Output") - .Finalize(net.operator_def()); + .Input("Input") + .Output("Output") + .Finalize(net.operator_def()); // Add args net.AddIntsArg("kernels", {2, 2}); @@ -133,18 +119,15 @@ TEST_F(PoolingOpTest, MAX_VALID_DILATION) { net.AddIntArg("pooling_type", PoolingType::MAX); // Add input data - net.AddInputFromArray("Input", {1, 1, 4, 4}, - {0, 1, 2, 3, - 4, 5, 6, 7, - 8, 9, 10, 11, - 12, 13, 14, 15}); + net.AddInputFromArray( + "Input", {1, 1, 4, 4}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); // Run net.RunOp(); // Check - auto expected = CreateTensor({1, 1, 2, 2}, - {10, 11, 14, 15}); + auto expected = CreateTensor({1, 1, 2, 2}, {10, 11, 14, 15}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } @@ -153,9 +136,9 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) { // Construct graph auto& net = test_net(); OpDefBuilder("Pooling", "PoolingTest") - .Input("Input") - .Output("Output") - .Finalize(net.operator_def()); + .Input("Input") + .Output("Output") + .Finalize(net.operator_def()); // Add args net.AddIntArg("pooling_type", PoolingType::MAX); @@ -165,18 +148,14 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) { net.AddIntsArg("dilations", {1, 1}); // Add input data - net.AddInputFromArray("Input", {1, 1, 4, 5}, - {0, 1, 2, 3, 4, - 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19}); + net.AddInputFromArray( + "Input", {1, 1, 4, 5}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}); // Run net.RunOp(DeviceType::NEON); // Check - Tensor expected = CreateTensor({1, 1, 2, 3}, - {6, 8, 9, - 16, 18, 19}); + Tensor expected = CreateTensor({1, 1, 2, 3}, {6, 8, 9, 16, 18, 19}); ExpectTensorNear(expected, *net.GetOutput("Output"), 0.001); } @@ -185,9 +164,9 @@ TEST_F(PoolingOpTest, MAX_k3x3s2x2) { // Construct graph auto& net = test_net(); OpDefBuilder("Pooling", "PoolingTest") - .Input("Input") - .Output("Output") - .Finalize(net.operator_def()); + .Input("Input") + .Output("Output") + .Finalize(net.operator_def()); // Add args net.AddIntArg("pooling_type", PoolingType::MAX); @@ -197,18 +176,14 @@ TEST_F(PoolingOpTest, MAX_k3x3s2x2) { net.AddIntsArg("dilations", {1, 1}); // Add input data - net.AddInputFromArray("Input", {1, 1, 4, 5}, - {0, 1, 2, 3, 4, - 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19}); + net.AddInputFromArray( + "Input", {1, 1, 4, 5}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}); // Run net.RunOp(DeviceType::NEON); // Check - Tensor expected = CreateTensor({1, 1, 2, 3}, - {11, 13, 14, - 16, 18, 19}); + Tensor expected = CreateTensor({1, 1, 2, 3}, {11, 13, 14, 16, 18, 19}); ExpectTensorNear(expected, *net.GetOutput("Output"), 0.001); } diff --git a/mace/ops/relu.cc b/mace/ops/relu.cc index c2193080..8602f932 100644 --- a/mace/ops/relu.cc +++ b/mace/ops/relu.cc @@ -10,6 +10,6 @@ REGISTER_CPU_OPERATOR(Relu, ReluOp); #if __ARM_NEON REGISTER_NEON_OPERATOR(Relu, ReluOp); -#endif // __ARM_NEON +#endif // __ARM_NEON -} // namespace mace +} // namespace mace diff --git a/mace/ops/relu.h b/mace/ops/relu.h index 166c7733..c195c78f 100644 --- a/mace/ops/relu.h +++ b/mace/ops/relu.h @@ -10,10 +10,10 @@ namespace mace { -template +template class ReluOp : public Operator { public: - ReluOp(const OperatorDef &operator_def, Workspace *ws) + ReluOp(const OperatorDef& operator_def, Workspace* ws) : Operator(operator_def, ws) {} bool Run() override { const Tensor* input_tensor = this->inputs_[0]; @@ -31,6 +31,6 @@ class ReluOp : public Operator { kernels::ReluFunctor functor_; }; -} // namespace mace +} // namespace mace -#endif // MACE_OPS_RELU_H_ +#endif // MACE_OPS_RELU_H_ diff --git a/mace/ops/relu_benchmark.cc b/mace/ops/relu_benchmark.cc index 371c7eca..4605990e 100644 --- a/mace/ops/relu_benchmark.cc +++ b/mace/ops/relu_benchmark.cc @@ -10,7 +10,6 @@ namespace mace { template static void ReluBenchmark(int iters, int size) { - mace::testing::StopTiming(); OpsTestNet net; @@ -28,26 +27,25 @@ static void ReluBenchmark(int iters, int size) { } mace::testing::StartTiming(); - while(iters--) { + while (iters--) { net.RunOp(D); } } -#define BM_RELU_MACRO(SIZE, TYPE, DEVICE) \ - static void BM_RELU_##SIZE##_##TYPE##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * SIZE; \ - mace::testing::ItemsProcessed(tot); \ - mace::testing::BytesProcessed(tot * (sizeof(TYPE))); \ - ReluBenchmark(iters, SIZE); \ - } \ +#define BM_RELU_MACRO(SIZE, TYPE, DEVICE) \ + static void BM_RELU_##SIZE##_##TYPE##_##DEVICE(int iters) { \ + const int64_t tot = static_cast(iters) * SIZE; \ + mace::testing::ItemsProcessed(tot); \ + mace::testing::BytesProcessed(tot*(sizeof(TYPE))); \ + ReluBenchmark(iters, SIZE); \ + } \ BENCHMARK(BM_RELU_##SIZE##_##TYPE##_##DEVICE) -#define BM_RELU(SIZE, TYPE) \ - BM_RELU_MACRO(SIZE, TYPE, CPU); \ +#define BM_RELU(SIZE, TYPE) \ + BM_RELU_MACRO(SIZE, TYPE, CPU); \ BM_RELU_MACRO(SIZE, TYPE, NEON); BM_RELU(1000, float); BM_RELU(100000, float); BM_RELU(10000000, float); -} // namespace mace \ No newline at end of file +} // namespace mace \ No newline at end of file diff --git a/mace/ops/relu_test.cc b/mace/ops/relu_test.cc index 6ca8f6e3..1277722c 100644 --- a/mace/ops/relu_test.cc +++ b/mace/ops/relu_test.cc @@ -32,4 +32,4 @@ TEST_F(ReluOpTest, ReluOp) { ExpectTensorNear(expected, *net.GetOutput("Output"), 0.01); } -} // namespace mace +} // namespace mace diff --git a/mace/ops/resize_bilinear.cc b/mace/ops/resize_bilinear.cc index 8e931cc9..a20c9f13 100644 --- a/mace/ops/resize_bilinear.cc +++ b/mace/ops/resize_bilinear.cc @@ -9,7 +9,8 @@ namespace mace { REGISTER_CPU_OPERATOR(ResizeBilinear, ResizeBilinearOp); #if __ARM_NEON -REGISTER_NEON_OPERATOR(ResizeBilinear, ResizeBilinearOp); -#endif // __ARM_NEON +REGISTER_NEON_OPERATOR(ResizeBilinear, + ResizeBilinearOp); +#endif // __ARM_NEON -} // namespace mace +} // namespace mace diff --git a/mace/ops/resize_bilinear.h b/mace/ops/resize_bilinear.h index 2d1b6f59..8daa3176 100644 --- a/mace/ops/resize_bilinear.h +++ b/mace/ops/resize_bilinear.h @@ -5,18 +5,18 @@ #ifndef MACE_RESIZE_BILINEAR_H #define MACE_RESIZE_BILINEAR_H - #include "mace/core/operator.h" #include "mace/kernels/resize_bilinear.h" namespace mace { -template +template class ResizeBilinearOp : public Operator { public: - ResizeBilinearOp(const OperatorDef &operator_def, Workspace *ws) + ResizeBilinearOp(const OperatorDef& operator_def, Workspace* ws) : Operator(operator_def, ws), - functor_(OperatorBase::GetSingleArgument("align_corners", false)) {} + functor_( + OperatorBase::GetSingleArgument("align_corners", false)) {} bool Run() override { const Tensor* input = this->Input(0); @@ -24,8 +24,8 @@ class ResizeBilinearOp : public Operator { MACE_CHECK(input->dim_size() == 4, "input must be 4-dimensional.", input->dim_size()); - MACE_CHECK(resize_dims->dim_size() == 1, "resize dim must be 2-dimensional.", - resize_dims->dim_size()); + MACE_CHECK(resize_dims->dim_size() == 1, + "resize dim must be 2-dimensional.", resize_dims->dim_size()); Tensor* output = this->Output(0); @@ -35,7 +35,7 @@ class ResizeBilinearOp : public Operator { index_t in_width = input->dim(3); index_t out_height = resize_dims->data()[0]; index_t out_width = resize_dims->data()[1]; - vector out_shape {n, channels, out_height, out_width}; + vector out_shape{n, channels, out_height, out_width}; output->Resize(out_shape); const T* input_ptr = input->data(); @@ -45,10 +45,11 @@ class ResizeBilinearOp : public Operator { out_height, out_width); return true; } + private: kernels::ResizeBilinearFunctor functor_; }; -} // namespace mace +} // namespace mace -#endif // MACE_RESIZE_BILINEAR_H +#endif // MACE_RESIZE_BILINEAR_H diff --git a/mace/ops/resize_bilinear_test.cc b/mace/ops/resize_bilinear_test.cc index 4887e136..333d32af 100644 --- a/mace/ops/resize_bilinear_test.cc +++ b/mace/ops/resize_bilinear_test.cc @@ -2,9 +2,9 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // +#include "mace/ops/resize_bilinear.h" #include "mace/core/operator.h" #include "mace/ops/ops_test_util.h" -#include "mace/ops/resize_bilinear.h" using namespace mace; -- GitLab