提交 8ae8f575 编写于 作者: L Liangliang He

Fix google coding style

上级 9c9af68e
......@@ -7,13 +7,9 @@
namespace mace {
static std::unique_ptr<CPUAllocator> g_cpu_allocator(new CPUAllocator());
CPUAllocator* cpu_allocator() {
return g_cpu_allocator.get();
}
CPUAllocator* cpu_allocator() { return g_cpu_allocator.get(); }
void SetCPUAllocator(CPUAllocator* alloc) {
g_cpu_allocator.reset(alloc);
}
void SetCPUAllocator(CPUAllocator* alloc) { g_cpu_allocator.reset(alloc); }
Allocator* GetDeviceAllocator(DeviceType type) {
switch (type) {
......@@ -26,4 +22,4 @@ Allocator* GetDeviceAllocator(DeviceType type) {
return nullptr;
}
} // namespace mace
} // namespace mace
......@@ -39,7 +39,7 @@ class Allocator {
}
};
class CPUAllocator: public Allocator {
class CPUAllocator : public Allocator {
public:
~CPUAllocator() override {}
void* New(size_t nbytes) override {
......@@ -55,9 +55,7 @@ class CPUAllocator: public Allocator {
return data;
}
void Delete(void* data) override {
free(data);
}
void Delete(void* data) override { free(data); }
void CopyBytes(void* dst, const void* src, size_t size) override {
memcpy(dst, src, size);
......@@ -85,6 +83,6 @@ struct DeviceContext<DeviceType::NEON> {
Allocator* GetDeviceAllocator(DeviceType type);
} // namespace mace
} // namespace mace
#endif // MACE_CORE_ALLOCATOR_H_
#endif // MACE_CORE_ALLOCATOR_H_
......@@ -5,12 +5,12 @@
#ifndef MACE_CORE_COMMON_H_
#define MACE_CORE_COMMON_H_
#include <set>
#include <algorithm>
#include <map>
#include <string>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include <algorithm>
#include "mace/core/logging.h"
......@@ -24,9 +24,9 @@ typedef int64_t index_t;
// Disable the copy and assignment operator for a class.
#ifndef DISABLE_COPY_AND_ASSIGN
#define DISABLE_COPY_AND_ASSIGN(classname) \
private: \
classname(const classname&) = delete; \
#define DISABLE_COPY_AND_ASSIGN(classname) \
private: \
classname(const classname&) = delete; \
classname& operator=(const classname&) = delete
#endif
......@@ -35,4 +35,4 @@ private: \
// TODO: need to fine tune this
#define kCostPerGroup 1024000000
#endif // MACE_CORE_COMMON_H_
#endif // MACE_CORE_COMMON_H_
......@@ -2,7 +2,6 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/logging.h"
#include <stdlib.h>
......@@ -62,11 +61,11 @@ void LogMessage::GenerateLogMessage() {
#else
void LogMessage::GenerateLogMessage() {
fprintf(stderr, "%c %s:%d] %s\n", "IWEF"[severity_], fname_, line_, str().c_str());
fprintf(stderr, "%c %s:%d] %s\n", "IWEF"[severity_], fname_, line_,
str().c_str());
}
#endif
namespace {
// Parse log level (int64_t) from environment variable (char*)
......
......@@ -5,8 +5,8 @@
#ifndef MACE_CORE_LOGGING_H_
#define MACE_CORE_LOGGING_H_
#include <sstream>
#include <limits>
#include <sstream>
#include <string>
#undef ERROR
......@@ -30,8 +30,8 @@ inline void MakeStringInternal(std::stringstream& ss, const T& t) {
}
template <typename T, typename... Args>
inline void
MakeStringInternal(std::stringstream& ss, const T& t, const Args&... args) {
inline void MakeStringInternal(std::stringstream& ss, const T& t,
const Args&... args) {
MakeStringInternal(ss, t);
MakeStringInternal(ss, args...);
}
......@@ -48,9 +48,7 @@ template <>
inline string MakeString(const string& str) {
return str;
}
inline string MakeString(const char* c_str) {
return string(c_str);
}
inline string MakeString(const char* c_str) { return string(c_str); }
class LogMessage : public std::basic_ostringstream<char> {
public:
......@@ -85,8 +83,7 @@ class LogMessageFatal : public LogMessage {
::mace::internal::LogMessage(__FILE__, __LINE__, mace::WARNING)
#define _MACE_LOG_ERROR \
::mace::internal::LogMessage(__FILE__, __LINE__, mace::ERROR)
#define _MACE_LOG_FATAL \
::mace::internal::LogMessageFatal(__FILE__, __LINE__)
#define _MACE_LOG_FATAL ::mace::internal::LogMessageFatal(__FILE__, __LINE__)
#define _MACE_LOG_QFATAL _MACE_LOG_FATAL
......@@ -96,10 +93,10 @@ class LogMessageFatal : public LogMessage {
// Turn VLOG off when under mobile devices for considerations of binary size.
#define VLOG_IS_ON(lvl) ((lvl) <= 0)
#else
// Otherwise, Set MACE_CPP_MIN_VLOG_LEVEL environment to update minimum log level
// Otherwise, Set MACE_CPP_MIN_VLOG_LEVEL environment to update minimum log
// level
// of VLOG
#define VLOG_IS_ON(lvl) \
((lvl) <= ::mace::internal::LogMessage::MinVLogLevel())
#define VLOG_IS_ON(lvl) ((lvl) <= ::mace::internal::LogMessage::MinVLogLevel())
#endif
#define VLOG(lvl) \
......@@ -113,16 +110,16 @@ class LogMessageFatal : public LogMessage {
// MACE_CHECK(fp->Write(x) == 4)
// MACE_CHECK(fp->Write(x) == 4, "Write failed")
// which are not correct for MACE_ASSERT.
#define MACE_CHECK(condition, ...) \
if (!(condition)) \
LOG(FATAL) << "Check failed: " #condition " " \
<< ::mace::internal::MakeString(__VA_ARGS__)
#define MACE_CHECK(condition, ...) \
if (!(condition)) \
LOG(FATAL) << "Check failed: " #condition " " \
<< ::mace::internal::MakeString(__VA_ARGS__)
#ifndef NDEBUG
#define MACE_ASSERT(condition, ...) \
if (!(condition)) \
LOG(FATAL) << "Assert failed: " #condition " " \
<< ::mace::internal::MakeString(__VA_ARGS__)
#define MACE_ASSERT(condition, ...) \
if (!(condition)) \
LOG(FATAL) << "Assert failed: " #condition " " \
<< ::mace::internal::MakeString(__VA_ARGS__)
#else
#define MACE_ASSERT(condition, ...) ((void)0)
#endif
......@@ -135,9 +132,9 @@ T&& CheckNotNull(const char* file, int line, const char* exprtext, T&& t) {
return std::forward<T>(t);
}
#define MACE_CHECK_NOTNULL(val) \
#define MACE_CHECK_NOTNULL(val) \
::mace::internal::CheckNotNull(__FILE__, __LINE__, \
"'" #val "' Must be non NULL", (val))
"'" #val "' Must be non NULL", (val))
} // namespace internal
} // namespace mace
......
......@@ -17,5 +17,4 @@
#define MACE_PREDICT_TRUE(x) (x)
#endif
#endif //MACE_CORE_MACROS_H_
#endif // MACE_CORE_MACROS_H_
......@@ -6,22 +6,19 @@
namespace mace {
NetBase::NetBase(const std::shared_ptr<const NetDef> &net_def,
Workspace *ws,
NetBase::NetBase(const std::shared_ptr<const NetDef>& net_def, Workspace* ws,
DeviceType type)
: name_(net_def->name()) {
}
: name_(net_def->name()) {}
SimpleNet::SimpleNet(const std::shared_ptr<const NetDef> &net_def,
Workspace *ws,
DeviceType type) : NetBase(net_def, ws, type) {
SimpleNet::SimpleNet(const std::shared_ptr<const NetDef>& net_def,
Workspace* ws, DeviceType type)
: NetBase(net_def, ws, type) {
VLOG(1) << "Constructing SimpleNet " << net_def->name();
for (int idx = 0; idx < net_def->op_size(); ++idx) {
const auto& operator_def = net_def->op(idx);
VLOG(1) << "Creating operator " << operator_def.name() << ":"
<< operator_def.type();
std::unique_ptr<OperatorBase> op {nullptr};
std::unique_ptr<OperatorBase> op{nullptr};
OperatorDef temp_def(operator_def);
op = CreateOperator(temp_def, ws, type);
operators_.emplace_back(std::move(op));
......@@ -40,20 +37,16 @@ bool SimpleNet::Run() {
return true;
}
unique_ptr<NetBase> CreateNet(const NetDef& net_def,
Workspace* ws,
unique_ptr<NetBase> CreateNet(const NetDef& net_def, Workspace* ws,
DeviceType type) {
std::shared_ptr<NetDef> tmp_net_def(new NetDef(net_def));
return CreateNet(tmp_net_def, ws, type);
}
unique_ptr<NetBase> CreateNet(
const std::shared_ptr<const NetDef>& net_def,
Workspace* ws,
DeviceType type) {
unique_ptr<NetBase> CreateNet(const std::shared_ptr<const NetDef>& net_def,
Workspace* ws, DeviceType type) {
unique_ptr<NetBase> net(new SimpleNet(net_def, ws, type));
return net;
}
} // namespace mace
} // namespace mace
......@@ -6,35 +6,31 @@
#define MACE_CORE_NET_H_
#include "mace/core/common.h"
#include "mace/proto/mace.pb.h"
#include "mace/core/operator.h"
#include "mace/core/workspace.h"
#include "mace/proto/mace.pb.h"
namespace mace {
class NetBase {
public:
NetBase(const std::shared_ptr<const NetDef> &net_def,
Workspace* ws,
NetBase(const std::shared_ptr<const NetDef>& net_def, Workspace* ws,
DeviceType type);
virtual ~NetBase() noexcept {}
virtual bool Run() = 0;
const string &Name() const {
return name_;
}
const string& Name() const { return name_; }
protected:
string name_;
DISABLE_COPY_AND_ASSIGN(NetBase);
DISABLE_COPY_AND_ASSIGN(NetBase);
};
class SimpleNet : public NetBase {
public:
SimpleNet(const std::shared_ptr<const NetDef>& net_def,
Workspace* ws,
SimpleNet(const std::shared_ptr<const NetDef>& net_def, Workspace* ws,
DeviceType type);
bool Run() override;
......@@ -42,17 +38,14 @@ class SimpleNet : public NetBase {
protected:
vector<unique_ptr<OperatorBase> > operators_;
DISABLE_COPY_AND_ASSIGN(SimpleNet);
DISABLE_COPY_AND_ASSIGN(SimpleNet);
};
unique_ptr<NetBase> CreateNet(const NetDef& net_def,
Workspace* ws,
unique_ptr<NetBase> CreateNet(const NetDef& net_def, Workspace* ws,
DeviceType type);
unique_ptr<NetBase> CreateNet(
const std::shared_ptr<const NetDef>& net_def,
Workspace* ws,
DeviceType type);
unique_ptr<NetBase> CreateNet(const std::shared_ptr<const NetDef>& net_def,
Workspace* ws, DeviceType type);
} // namespace mace
} // namespace mace
#endif // MACE_CORE_NET_H_
#endif // MACE_CORE_NET_H_
......@@ -11,33 +11,22 @@ std::map<int32_t, OperatorRegistry*>* gDeviceTypeRegistry() {
return &g_device_type_registry;
}
MACE_DEFINE_REGISTRY(
CPUOperatorRegistry,
OperatorBase,
const OperatorDef&,
Workspace*);
MACE_DEFINE_REGISTRY(CPUOperatorRegistry, OperatorBase, const OperatorDef&,
Workspace*);
MACE_REGISTER_DEVICE_TYPE(DeviceType::CPU, CPUOperatorRegistry);
MACE_DEFINE_REGISTRY(
NEONOperatorRegistry,
OperatorBase,
const OperatorDef&,
Workspace*);
MACE_DEFINE_REGISTRY(NEONOperatorRegistry, OperatorBase, const OperatorDef&,
Workspace*);
MACE_REGISTER_DEVICE_TYPE(DeviceType::NEON, NEONOperatorRegistry);
unique_ptr<OperatorBase> CreateOperator(
const OperatorDef& operator_def,
Workspace* ws,
DeviceType type) {
unique_ptr<OperatorBase> CreateOperator(const OperatorDef& operator_def,
Workspace* ws, DeviceType type) {
OperatorRegistry* registry = gDeviceTypeRegistry()->at(type);
return registry->Create(operator_def.type(), operator_def, ws);
}
OperatorBase::OperatorBase(const OperatorDef &operator_def, Workspace *ws)
OperatorBase::OperatorBase(const OperatorDef& operator_def, Workspace* ws)
: operator_ws_(ws),
operator_def_(std::make_shared<OperatorDef>(operator_def)) {
}
operator_def_(std::make_shared<OperatorDef>(operator_def)) {}
} // namespace mace
} // namespace mace
......@@ -5,12 +5,12 @@
#ifndef MACE_CORE_OPERATOR_H
#define MACE_CORE_OPERATOR_H
#include "mace/core/proto_utils.h"
#include "mace/core/common.h"
#include "mace/proto/mace.pb.h"
#include "mace/core/tensor.h"
#include "mace/core/proto_utils.h"
#include "mace/core/registry.h"
#include "mace/core/tensor.h"
#include "mace/core/workspace.h"
#include "mace/proto/mace.pb.h"
namespace mace {
......@@ -23,22 +23,21 @@ class OperatorBase {
MACE_CHECK(operator_def_, "operator_def was null!");
return ArgumentHelper::HasArgument(*operator_def_, name);
}
template<typename T>
template <typename T>
inline T GetSingleArgument(const string &name, const T &default_value) const {
MACE_CHECK(operator_def_, "operator_def was null!");
return ArgumentHelper::GetSingleArgument<OperatorDef, T>(
*operator_def_, name, default_value);
}
template<typename T>
template <typename T>
inline bool HasSingleArgumentOfType(const string &name) const {
MACE_CHECK(operator_def_, "operator_def was null!");
return ArgumentHelper::HasSingleArgumentOfType<OperatorDef, T>(
*operator_def_, name);
}
template<typename T>
template <typename T>
inline vector<T> GetRepeatedArgument(
const string &name,
const vector<T> &default_value = {}) const {
const string &name, const vector<T> &default_value = {}) const {
MACE_CHECK(operator_def_, "operator_def was null!");
return ArgumentHelper::GetRepeatedArgument<OperatorDef, T>(
*operator_def_, name, default_value);
......@@ -49,9 +48,7 @@ class OperatorBase {
return inputs_[idx];
}
inline Tensor *Output(int idx) {
return outputs_[idx];
}
inline Tensor *Output(int idx) { return outputs_[idx]; }
inline int InputSize() { return inputs_.size(); }
inline int OutputSize() { return outputs_.size(); }
......@@ -70,9 +67,7 @@ class OperatorBase {
operator_def_ = operator_def;
}
inline bool has_debug_def() const {
return operator_def_ != nullptr;
}
inline bool has_debug_def() const { return operator_def_ != nullptr; }
protected:
Workspace *operator_ws_;
......@@ -80,7 +75,7 @@ class OperatorBase {
vector<const Tensor *> inputs_;
vector<Tensor *> outputs_;
DISABLE_COPY_AND_ASSIGN(OperatorBase);
DISABLE_COPY_AND_ASSIGN(OperatorBase);
};
template <DeviceType D, class T>
......@@ -90,26 +85,22 @@ class Operator : public OperatorBase {
: OperatorBase(operator_def, ws) {
for (const string &input_str : operator_def.input()) {
const Tensor *tensor = ws->GetTensor(input_str);
MACE_CHECK(
tensor != nullptr,
"op ",
operator_def.type(),
": Encountered a non-existing input tensor: ",
input_str);
MACE_CHECK(tensor != nullptr, "op ", operator_def.type(),
": Encountered a non-existing input tensor: ", input_str);
inputs_.push_back(tensor);
}
for (const string &output_str : operator_def.output()) {
outputs_.push_back(MACE_CHECK_NOTNULL(ws->CreateTensor(output_str,
DeviceContext<D>::allocator(),
DataTypeToEnum<T>::v())));
outputs_.push_back(MACE_CHECK_NOTNULL(ws->CreateTensor(
output_str, DeviceContext<D>::allocator(), DataTypeToEnum<T>::v())));
}
}
virtual bool Run() override = 0;
~Operator() noexcept override {}
};
// OP_INPUT_TAGS and OP_OUTPUT_TAGS are optional features to name the indices of the
// OP_INPUT_TAGS and OP_OUTPUT_TAGS are optional features to name the indices of
// the
// operator's inputs and outputs, in order to avoid confusion. For example, for
// a fully convolution layer that has input, weight and bias, you can define its
// input tags as:
......@@ -119,9 +110,9 @@ class Operator : public OperatorBase {
// you can now do
// auto& weight = Input(WEIGHT);
// to make it more clear.
#define OP_INPUT_TAGS(first_input, ...) \
#define OP_INPUT_TAGS(first_input, ...) \
enum _InputTags { first_input = 0, __VA_ARGS__ }
#define OP_OUTPUT_TAGS(first_input, ...) \
#define OP_OUTPUT_TAGS(first_input, ...) \
enum _OutputTags { first_input = 0, __VA_ARGS__ }
typedef Registry<std::string, OperatorBase, const OperatorDef &, Workspace *>
......@@ -135,7 +126,7 @@ struct DeviceTypeRegisterer {
if (gDeviceTypeRegistry()->count(type)) {
LOG(ERROR) << "Device type " << type
<< "registered twice. This should not happen. Did you have "
"duplicated numbers assigned to different devices?";
"duplicated numbers assigned to different devices?";
std::exit(1);
}
// Calling the registry function to get the actual registry pointer.
......@@ -143,39 +134,31 @@ struct DeviceTypeRegisterer {
}
};
#define MACE_REGISTER_DEVICE_TYPE(type, registry_function) \
namespace { \
static DeviceTypeRegisterer MACE_ANONYMOUS_VARIABLE( \
DeviceType)(type, &registry_function); \
#define MACE_REGISTER_DEVICE_TYPE(type, registry_function) \
namespace { \
static DeviceTypeRegisterer MACE_ANONYMOUS_VARIABLE(DeviceType)( \
type, &registry_function); \
}
MACE_DECLARE_REGISTRY(
CPUOperatorRegistry,
OperatorBase,
const OperatorDef&,
Workspace*);
MACE_DECLARE_REGISTRY(CPUOperatorRegistry, OperatorBase, const OperatorDef &,
Workspace *);
#define REGISTER_CPU_OPERATOR_CREATOR(key, ...) \
MACE_REGISTER_CREATOR(CPUOperatorRegistry, key, __VA_ARGS__)
#define REGISTER_CPU_OPERATOR(name, ...) \
#define REGISTER_CPU_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__)
MACE_DECLARE_REGISTRY(
NEONOperatorRegistry,
OperatorBase,
const OperatorDef&,
Workspace*);
MACE_DECLARE_REGISTRY(NEONOperatorRegistry, OperatorBase, const OperatorDef &,
Workspace *);
#define REGISTER_NEON_OPERATOR_CREATOR(key, ...) \
MACE_REGISTER_CREATOR(NEONOperatorRegistry, key, __VA_ARGS__)
#define REGISTER_NEON_OPERATOR(name, ...) \
#define REGISTER_NEON_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(NEONOperatorRegistry, name, __VA_ARGS__)
unique_ptr<OperatorBase> CreateOperator(
const OperatorDef &operator_def,
Workspace *ws,
DeviceType type);
unique_ptr<OperatorBase> CreateOperator(const OperatorDef &operator_def,
Workspace *ws, DeviceType type);
} // namespace mace
} // namespace mace
#endif //MACE_CORE_OPERATOR_H
#endif // MACE_CORE_OPERATOR_H
......@@ -5,9 +5,9 @@
#include "mace/core/proto_utils.h"
#include <fcntl.h>
#include <unistd.h>
#include <cerrno>
#include <fstream>
#include <unistd.h>
#include "google/protobuf/io/coded_stream.h"
#include "google/protobuf/io/zero_copy_stream_impl.h"
......@@ -82,13 +82,12 @@ bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) {
return proto->ParseFromCodedStream(&coded_stream);
}
void WriteProtoToBinaryFile(
const MessageLite& /*proto*/,
const char* /*filename*/) {
void WriteProtoToBinaryFile(const MessageLite& /*proto*/,
const char* /*filename*/) {
LOG(FATAL) << "Not implemented yet.";
}
#else // MACE_USE_LITE_PROTO
#else // MACE_USE_LITE_PROTO
// Full protocol buffer.
......@@ -118,7 +117,7 @@ void WriteProtoToTextFile(const Message& proto, const char* filename) {
}
bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) {
#if defined (_MSC_VER) // for MSC compiler binary flag needs to be specified
#if defined(_MSC_VER) // for MSC compiler binary flag needs to be specified
int fd = open(filename, O_RDONLY | O_BINARY);
#else
int fd = open(filename, O_RDONLY);
......@@ -138,8 +137,8 @@ bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) {
void WriteProtoToBinaryFile(const MessageLite& proto, const char* filename) {
int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644);
MACE_CHECK(
fd != -1, "File cannot be created: ", filename, " error number: ", errno);
MACE_CHECK(fd != -1, "File cannot be created: ", filename, " error number: ",
errno);
std::unique_ptr<ZeroCopyOutputStream> raw_output(new FileOutputStream(fd));
std::unique_ptr<CodedOutputStream> coded_output(
new CodedOutputStream(raw_output.get()));
......@@ -151,18 +150,17 @@ void WriteProtoToBinaryFile(const MessageLite& proto, const char* filename) {
#endif // MACE_USE_LITE_PROTO
ArgumentHelper::ArgumentHelper(const OperatorDef &def) {
for (auto &arg : def.arg()) {
ArgumentHelper::ArgumentHelper(const OperatorDef& def) {
for (auto& arg : def.arg()) {
if (arg_map_.find(arg.name()) != arg_map_.end()) {
MACE_CHECK(
arg.SerializeAsString() == arg_map_[arg.name()].SerializeAsString(),
"Found argument of the same name '",
arg.name(),
"' but with different contents: ",
ProtoDebugString(def));
"Found argument of the same name '", arg.name(),
"' but with different contents: ", ProtoDebugString(def));
LOG(WARNING) << "Duplicated argument name found in operator def: "
<< ProtoDebugString(def) << ", arg: " << ProtoDebugString(arg);
<< ProtoDebugString(def)
<< ", arg: " << ProtoDebugString(arg);
}
arg_map_[arg.name()] = arg;
......@@ -171,10 +169,9 @@ ArgumentHelper::ArgumentHelper(const OperatorDef &def) {
ArgumentHelper::ArgumentHelper(const NetDef& netdef) {
for (auto& arg : netdef.arg()) {
MACE_CHECK(
arg_map_.count(arg.name()) == 0,
"Duplicated argument name found in net def: ",
ProtoDebugString(netdef));
MACE_CHECK(arg_map_.count(arg.name()) == 0,
"Duplicated argument name found in net def: ",
ProtoDebugString(netdef));
arg_map_[arg.name()] = arg;
}
}
......@@ -192,32 +189,24 @@ bool SupportsLosslessConversion(const InputType& value) {
}
}
#define INSTANTIATE_GET_SINGLE_ARGUMENT( \
T, fieldname, enforce_lossless_conversion) \
#define INSTANTIATE_GET_SINGLE_ARGUMENT(T, fieldname, \
enforce_lossless_conversion) \
template <> \
T ArgumentHelper::GetSingleArgument<T>( \
const string& name, const T& default_value) const { \
T ArgumentHelper::GetSingleArgument<T>(const string& name, \
const T& default_value) const { \
if (arg_map_.count(name) == 0) { \
VLOG(1) << "Using default parameter value " << default_value \
<< " for parameter " << name; \
return default_value; \
} \
MACE_CHECK( \
arg_map_.at(name).has_##fieldname(), \
"Argument ", \
name, \
" does not have the right field: expected field " #fieldname); \
MACE_CHECK(arg_map_.at(name).has_##fieldname(), "Argument ", name, \
" does not have the right field: expected field " #fieldname); \
auto value = arg_map_.at(name).fieldname(); \
if (enforce_lossless_conversion) { \
auto supportsConversion = \
SupportsLosslessConversion<decltype(value), T>(value); \
MACE_CHECK( \
supportsConversion, \
"Value", \
value, \
" of argument ", \
name, \
"cannot be represented correctly in a target type"); \
MACE_CHECK(supportsConversion, "Value", value, " of argument ", name, \
"cannot be represented correctly in a target type"); \
} \
return value; \
} \
......@@ -242,30 +231,25 @@ INSTANTIATE_GET_SINGLE_ARGUMENT(size_t, i, true)
INSTANTIATE_GET_SINGLE_ARGUMENT(string, s, false)
#undef INSTANTIATE_GET_SINGLE_ARGUMENT
#define INSTANTIATE_GET_REPEATED_ARGUMENT( \
T, fieldname, enforce_lossless_conversion) \
template <> \
vector<T> ArgumentHelper::GetRepeatedArgument<T>( \
const string& name, const std::vector<T>& default_value) const { \
if (arg_map_.count(name) == 0) { \
return default_value; \
} \
vector<T> values; \
for (const auto& v : arg_map_.at(name).fieldname()) { \
if (enforce_lossless_conversion) { \
auto supportsConversion = \
SupportsLosslessConversion<decltype(v), T>(v); \
MACE_CHECK( \
supportsConversion, \
"Value", \
v, \
" of argument ", \
name, \
"cannot be represented correctly in a target type"); \
} \
values.push_back(v); \
} \
return values; \
#define INSTANTIATE_GET_REPEATED_ARGUMENT(T, fieldname, \
enforce_lossless_conversion) \
template <> \
vector<T> ArgumentHelper::GetRepeatedArgument<T>( \
const string& name, const std::vector<T>& default_value) const { \
if (arg_map_.count(name) == 0) { \
return default_value; \
} \
vector<T> values; \
for (const auto& v : arg_map_.at(name).fieldname()) { \
if (enforce_lossless_conversion) { \
auto supportsConversion = \
SupportsLosslessConversion<decltype(v), T>(v); \
MACE_CHECK(supportsConversion, "Value", v, " of argument ", name, \
"cannot be represented correctly in a target type"); \
} \
values.push_back(v); \
} \
return values; \
}
INSTANTIATE_GET_REPEATED_ARGUMENT(float, floats, false)
......@@ -281,14 +265,14 @@ INSTANTIATE_GET_REPEATED_ARGUMENT(size_t, ints, true)
INSTANTIATE_GET_REPEATED_ARGUMENT(string, strings, false)
#undef INSTANTIATE_GET_REPEATED_ARGUMENT
#define MACE_MAKE_SINGULAR_ARGUMENT(T, fieldname) \
template <> \
Argument MakeArgument(const string& name, const T& value) { \
Argument arg; \
arg.set_name(name); \
arg.set_##fieldname(value); \
return arg; \
}
#define MACE_MAKE_SINGULAR_ARGUMENT(T, fieldname) \
template <> \
Argument MakeArgument(const string& name, const T& value) { \
Argument arg; \
arg.set_name(name); \
arg.set_##fieldname(value); \
return arg; \
}
MACE_MAKE_SINGULAR_ARGUMENT(bool, i)
MACE_MAKE_SINGULAR_ARGUMENT(float, f)
......@@ -305,16 +289,16 @@ Argument MakeArgument(const string& name, const MessageLite& value) {
return arg;
}
#define MACE_MAKE_REPEATED_ARGUMENT(T, fieldname) \
template <> \
Argument MakeArgument(const string& name, const vector<T>& value) { \
Argument arg; \
arg.set_name(name); \
for (const auto& v : value) { \
arg.add_##fieldname(v); \
} \
return arg; \
}
#define MACE_MAKE_REPEATED_ARGUMENT(T, fieldname) \
template <> \
Argument MakeArgument(const string& name, const vector<T>& value) { \
Argument arg; \
arg.set_name(name); \
for (const auto& v : value) { \
arg.add_##fieldname(v); \
} \
return arg; \
}
MACE_MAKE_REPEATED_ARGUMENT(float, floats)
MACE_MAKE_REPEATED_ARGUMENT(int, ints)
......@@ -328,31 +312,24 @@ const Argument& GetArgument(const OperatorDef& def, const string& name) {
return arg;
}
}
MACE_CHECK(false,
"Argument named ",
name,
"does not exist in operator ",
ProtoDebugString(def));
MACE_CHECK(false, "Argument named ", name, "does not exist in operator ",
ProtoDebugString(def));
}
bool GetFlagArgument(
const OperatorDef& def,
const string& name,
bool def_value) {
bool GetFlagArgument(const OperatorDef& def, const string& name,
bool def_value) {
for (const Argument& arg : def.arg()) {
if (arg.name() == name) {
MACE_CHECK(
arg.has_i(), "Can't parse argument as bool: ", ProtoDebugString(arg));
MACE_CHECK(arg.has_i(), "Can't parse argument as bool: ",
ProtoDebugString(arg));
return arg.i();
}
}
return def_value;
}
Argument* GetMutableArgument(
const string& name,
const bool create_if_missing,
OperatorDef* def) {
Argument* GetMutableArgument(const string& name, const bool create_if_missing,
OperatorDef* def) {
for (int i = 0; i < def->arg_size(); ++i) {
if (def->arg(i).name() == name) {
return def->mutable_arg(i);
......
......@@ -12,15 +12,14 @@
#include "google/protobuf/message.h"
#endif // !MACE_USE_LITE_PROTO
#include "mace/proto/mace.pb.h"
#include "mace/core/common.h"
#include "mace/proto/mace.pb.h"
namespace mace {
using std::string;
using ::google::protobuf::MessageLite;
// Common interfaces that reads file contents into a string.
bool ReadStringFromFile(const char* filename, string* str);
bool WriteStringToFile(const string& str, const char* filename);
......@@ -46,22 +45,20 @@ inline string ProtoDebugString(const MessageLite& proto) {
// Text format MessageLite wrappers: these functions do nothing but just
// allowing things to compile. It will produce a runtime error if you are using
// MessageLite but still want text support.
inline bool ReadProtoFromTextFile(
const char* /*filename*/,
MessageLite* /*proto*/) {
inline bool ReadProtoFromTextFile(const char* /*filename*/,
MessageLite* /*proto*/) {
LOG(FATAL) << "If you are running lite version, you should not be "
<< "calling any text-format protobuffers.";
<< "calling any text-format protobuffers.";
return false; // Just to suppress compiler warning.
}
inline bool ReadProtoFromTextFile(const string filename, MessageLite* proto) {
return ReadProtoFromTextFile(filename.c_str(), proto);
}
inline void WriteProtoToTextFile(
const MessageLite& /*proto*/,
const char* /*filename*/) {
inline void WriteProtoToTextFile(const MessageLite& /*proto*/,
const char* /*filename*/) {
LOG(FATAL) << "If you are running lite version, you should not be "
<< "calling any text-format protobuffers.";
<< "calling any text-format protobuffers.";
}
inline void WriteProtoToTextFile(const MessageLite& proto,
const string& filename) {
......@@ -107,16 +104,13 @@ inline bool ReadProtoFromFile(const string& filename, Message* proto) {
#endif // MACE_USE_LITE_PROTO
template <
class IterableInputs = std::initializer_list<string>,
class IterableOutputs = std::initializer_list<string>,
class IterableArgs = std::initializer_list<Argument>>
OperatorDef CreateOperatorDef(
const string& type,
const string& name,
const IterableInputs& inputs,
const IterableOutputs& outputs,
const IterableArgs& args) {
template <class IterableInputs = std::initializer_list<string>,
class IterableOutputs = std::initializer_list<string>,
class IterableArgs = std::initializer_list<Argument>>
OperatorDef CreateOperatorDef(const string& type, const string& name,
const IterableInputs& inputs,
const IterableOutputs& outputs,
const IterableArgs& args) {
OperatorDef def;
def.set_type(type);
def.set_name(name);
......@@ -134,20 +128,13 @@ OperatorDef CreateOperatorDef(
// A simplified version compared to the full CreateOperator, if you do not need
// to specify args.
template <
class IterableInputs = std::initializer_list<string>,
class IterableOutputs = std::initializer_list<string>>
inline OperatorDef CreateOperatorDef(
const string& type,
const string& name,
const IterableInputs& inputs,
const IterableOutputs& outputs) {
return CreateOperatorDef(
type,
name,
inputs,
outputs,
std::vector<Argument>());
template <class IterableInputs = std::initializer_list<string>,
class IterableOutputs = std::initializer_list<string>>
inline OperatorDef CreateOperatorDef(const string& type, const string& name,
const IterableInputs& inputs,
const IterableOutputs& outputs) {
return CreateOperatorDef(type, name, inputs, outputs,
std::vector<Argument>());
}
/**
......@@ -166,10 +153,8 @@ class ArgumentHelper {
}
template <typename Def, typename T>
static T GetSingleArgument(
const Def& def,
const string& name,
const T& default_value) {
static T GetSingleArgument(const Def& def, const string& name,
const T& default_value) {
return ArgumentHelper(def).GetSingleArgument<T>(name, default_value);
}
......@@ -180,8 +165,7 @@ class ArgumentHelper {
template <typename Def, typename T>
static vector<T> GetRepeatedArgument(
const Def& def,
const string& name,
const Def& def, const string& name,
const std::vector<T>& default_value = std::vector<T>()) {
return ArgumentHelper(def).GetRepeatedArgument<T>(name, default_value);
}
......@@ -192,9 +176,8 @@ class ArgumentHelper {
}
template <typename Def, typename MessageType>
static vector<MessageType> GetRepeatedMessageArgument(
const Def& def,
const string& name) {
static vector<MessageType> GetRepeatedMessageArgument(const Def& def,
const string& name) {
return ArgumentHelper(def).GetRepeatedMessageArgument<MessageType>(name);
}
......@@ -216,9 +199,8 @@ class ArgumentHelper {
MACE_CHECK(arg_map_.count(name), "Cannot find parameter named " + name);
MessageType message;
if (arg_map_.at(name).has_s()) {
MACE_CHECK(
message.ParseFromString(arg_map_.at(name).s()),
"Faild to parse content from the string");
MACE_CHECK(message.ParseFromString(arg_map_.at(name).s()),
"Faild to parse content from the string");
} else {
VLOG(1) << "Return empty message for parameter " << name;
}
......@@ -230,9 +212,8 @@ class ArgumentHelper {
MACE_CHECK(arg_map_.count(name), "Cannot find parameter named " + name);
vector<MessageType> messages(arg_map_.at(name).strings_size());
for (int i = 0; i < messages.size(); ++i) {
MACE_CHECK(
messages[i].ParseFromString(arg_map_.at(name).strings(i)),
"Faild to parse content from the string");
MACE_CHECK(messages[i].ParseFromString(arg_map_.at(name).strings(i)),
"Faild to parse content from the string");
}
return messages;
}
......@@ -242,15 +223,11 @@ class ArgumentHelper {
};
const Argument& GetArgument(const OperatorDef& def, const string& name);
bool GetFlagArgument(
const OperatorDef& def,
const string& name,
bool def_value = false);
Argument* GetMutableArgument(
const string& name,
const bool create_if_missing,
OperatorDef* def);
bool GetFlagArgument(const OperatorDef& def, const string& name,
bool def_value = false);
Argument* GetMutableArgument(const string& name, const bool create_if_missing,
OperatorDef* def);
template <typename T>
Argument MakeArgument(const string& name, const T& value);
......
......@@ -12,7 +12,7 @@ namespace mace {
template <class SrcType, class ObjectType, class... Args>
class Registry {
public:
typedef std::function<std::unique_ptr<ObjectType> (Args ...)> Creator;
typedef std::function<std::unique_ptr<ObjectType>(Args...)> Creator;
Registry() : registry_() {}
......@@ -24,7 +24,7 @@ class Registry {
inline bool Has(const SrcType& key) { return registry_.count(key) != 0; }
unique_ptr<ObjectType> Create(const SrcType& key, Args ... args) {
unique_ptr<ObjectType> Create(const SrcType& key, Args... args) {
if (registry_.count(key) == 0) {
VLOG(2) << "Key not registered: " << key;
return nullptr;
......@@ -60,7 +60,7 @@ class Registerer {
}
template <class DerivedType>
static unique_ptr<ObjectType> DefaultCreator(Args ... args) {
static unique_ptr<ObjectType> DefaultCreator(Args... args) {
return std::unique_ptr<ObjectType>(new DerivedType(args...));
}
};
......@@ -74,36 +74,35 @@ class Registerer {
#endif
#define MACE_DECLARE_TYPED_REGISTRY(RegistryName, SrcType, ObjectType, ...) \
Registry<SrcType, ObjectType, ##__VA_ARGS__>* RegistryName(); \
typedef Registerer<SrcType, ObjectType, ##__VA_ARGS__> \
Registry<SrcType, ObjectType, ##__VA_ARGS__>* RegistryName(); \
typedef Registerer<SrcType, ObjectType, ##__VA_ARGS__> \
Registerer##RegistryName;
#define MACE_DEFINE_TYPED_REGISTRY(RegistryName, SrcType, ObjectType, ...) \
Registry<SrcType, ObjectType, ##__VA_ARGS__>* RegistryName() { \
static Registry<SrcType, ObjectType, ##__VA_ARGS__>* registry = \
new Registry<SrcType, ObjectType, ##__VA_ARGS__>(); \
return registry; \
Registry<SrcType, ObjectType, ##__VA_ARGS__>* RegistryName() { \
static Registry<SrcType, ObjectType, ##__VA_ARGS__>* registry = \
new Registry<SrcType, ObjectType, ##__VA_ARGS__>(); \
return registry; \
}
#define MACE_DECLARE_REGISTRY(RegistryName, ObjectType, ...) \
MACE_DECLARE_TYPED_REGISTRY( \
RegistryName, std::string, ObjectType, ##__VA_ARGS__)
#define MACE_DECLARE_REGISTRY(RegistryName, ObjectType, ...) \
MACE_DECLARE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \
##__VA_ARGS__)
#define MACE_DEFINE_REGISTRY(RegistryName, ObjectType, ...) \
MACE_DEFINE_TYPED_REGISTRY( \
RegistryName, std::string, ObjectType, ##__VA_ARGS__)
#define MACE_DEFINE_REGISTRY(RegistryName, ObjectType, ...) \
MACE_DEFINE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \
##__VA_ARGS__)
#define MACE_REGISTER_TYPED_CREATOR(RegistryName, key, ...) \
namespace { \
namespace { \
static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \
key, RegistryName(), __VA_ARGS__);
#define MACE_REGISTER_TYPED_CLASS(RegistryName, key, ...) \
namespace { \
namespace { \
static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \
key, \
RegistryName(), \
Registerer##RegistryName::DefaultCreator<__VA_ARGS__>); \
key, RegistryName(), \
Registerer##RegistryName::DefaultCreator<__VA_ARGS__>); \
}
#define MACE_REGISTER_CREATOR(RegistryName, key, ...) \
......@@ -112,6 +111,6 @@ class Registerer {
#define MACE_REGISTER_CLASS(RegistryName, key, ...) \
MACE_REGISTER_TYPED_CLASS(RegistryName, #key, __VA_ARGS__)
} // namespace mace
} // namespace mace
#endif // MACE_CORE_REGISTRY_H_
#endif // MACE_CORE_REGISTRY_H_
......@@ -4,19 +4,18 @@
#include "mace/core/serializer.h"
namespace mace {
unique_ptr<TensorProto> Serializer::Serialize(const Tensor &tensor,
const string &name) {
const string &name) {
MACE_NOT_IMPLEMENTED;
return nullptr;
}
unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto,
DeviceType type) {
unique_ptr<Tensor> tensor(new Tensor(GetDeviceAllocator(type),
proto.data_type()));
unique_ptr<Tensor> tensor(
new Tensor(GetDeviceAllocator(type), proto.data_type()));
vector<index_t> dims;
for (const index_t d : proto.dims()) {
dims.push_back(d);
......@@ -25,8 +24,7 @@ unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto,
switch (proto.data_type()) {
case DT_FLOAT:
tensor->Copy<float>(proto.float_data().data(),
proto.float_data().size());
tensor->Copy<float>(proto.float_data().data(), proto.float_data().size());
break;
case DT_DOUBLE:
tensor->Copy<double>(proto.double_data().data(),
......@@ -34,39 +32,38 @@ unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto,
break;
case DT_INT32:
tensor->template Copy<int32_t>(proto.int32_data().data(),
proto.int32_data().size());
proto.int32_data().size());
break;
case DT_UINT8:
tensor->CopyWithCast<int32_t, uint8_t>(proto.int32_data().data(),
proto.int32_data().size());
proto.int32_data().size());
break;
case DT_INT16:
tensor->CopyWithCast<int32_t, int16_t>(proto.int32_data().data(),
proto.int32_data().size());
proto.int32_data().size());
break;
case DT_INT8:
tensor->CopyWithCast<int32_t, int8_t>(proto.int32_data().data(),
proto.int32_data().size());
proto.int32_data().size());
break;
case DT_INT64:
tensor->Copy<int64_t>(proto.int64_data().data(),
proto.int64_data().size());
proto.int64_data().size());
break;
case DT_UINT16:
tensor->CopyWithCast<int32_t, uint16_t>(proto.int32_data().data(),
proto.int32_data().size());
proto.int32_data().size());
break;
case DT_BOOL:
tensor->CopyWithCast<int32_t, bool>(proto.int32_data().data(),
proto.int32_data().size());
proto.int32_data().size());
break;
case DT_STRING: {
string *content = tensor->mutable_data<string>();
for (int i = 0; i < proto.string_data().size(); ++i) {
content[i] = proto.string_data(i);
}
}
break;
} break;
default:
MACE_NOT_IMPLEMENTED;
break;
......@@ -75,4 +72,4 @@ unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto,
return tensor;
}
} // namespace mace
\ No newline at end of file
} // namespace mace
\ No newline at end of file
......@@ -5,9 +5,9 @@
#ifndef MACE_CORE_SERIALIZER_H_
#define MACE_CORE_SERIALIZER_H_
#include "mace/proto/mace.pb.h"
#include "mace/core/common.h"
#include "mace/core/tensor.h"
#include "mace/proto/mace.pb.h"
namespace mace {
......@@ -20,9 +20,9 @@ class Serializer {
unique_ptr<Tensor> Deserialize(const TensorProto& proto, DeviceType type);
DISABLE_COPY_AND_ASSIGN(Serializer);
DISABLE_COPY_AND_ASSIGN(Serializer);
};
} // namespace mace
} // namespace mace
#endif // MACE_CORE_SERIALIZER_H_
#endif // MACE_CORE_SERIALIZER_H_
......@@ -5,11 +5,11 @@
#ifndef MACE_CORE_TENSOR_H_
#define MACE_CORE_TENSOR_H_
#include "mace/core/common.h"
#include "mace/proto/mace.pb.h"
#include "mace/core/allocator.h"
#include "mace/core/types.h"
#include "mace/core/common.h"
#include "mace/core/logging.h"
#include "mace/core/types.h"
#include "mace/proto/mace.pb.h"
namespace mace {
......@@ -25,13 +25,13 @@ namespace mace {
switch (TYPE_ENUM) { \
CASE(float, SINGLE_ARG(STMTS)) \
CASE(double, SINGLE_ARG(STMTS)) \
CASE(int32_t, SINGLE_ARG(STMTS)) \
CASE(uint8_t, SINGLE_ARG(STMTS)) \
CASE(uint16_t, SINGLE_ARG(STMTS)) \
CASE(int16_t, SINGLE_ARG(STMTS)) \
CASE(int8_t, SINGLE_ARG(STMTS)) \
CASE(int32_t, SINGLE_ARG(STMTS)) \
CASE(uint8_t, SINGLE_ARG(STMTS)) \
CASE(uint16_t, SINGLE_ARG(STMTS)) \
CASE(int16_t, SINGLE_ARG(STMTS)) \
CASE(int8_t, SINGLE_ARG(STMTS)) \
CASE(string, SINGLE_ARG(STMTS)) \
CASE(int64_t, SINGLE_ARG(STMTS)) \
CASE(int64_t, SINGLE_ARG(STMTS)) \
CASE(bool, SINGLE_ARG(STMTS)) \
case DT_INVALID: \
INVALID; \
......@@ -41,20 +41,17 @@ namespace mace {
break; \
}
#define CASES(TYPE_ENUM, STMTS) \
CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, LOG(FATAL) << "Type not set"; \
, LOG(FATAL) << "Unexpected type: " << TYPE_ENUM;)
class Tensor {
public:
Tensor()
: alloc_(cpu_allocator()),
size_(0), dtype_(DT_FLOAT), data_(nullptr) {};
: alloc_(cpu_allocator()), size_(0), dtype_(DT_FLOAT), data_(nullptr){};
Tensor(Allocator* a, DataType type)
: alloc_(a), size_(0), dtype_(type), data_(nullptr) {};
: alloc_(a), size_(0), dtype_(type), data_(nullptr){};
~Tensor() {
if (alloc_ && data_.get()) {
......@@ -92,9 +89,8 @@ class Tensor {
if (data_.get() || size_ == 0) {
return data_.get();
} else {
CASES(dtype_, data_.reset(alloc_->New(size_ * sizeof(T)), [this](void* ptr) {
alloc_->Delete(ptr);
}));
CASES(dtype_, data_.reset(alloc_->New(size_ * sizeof(T)),
[this](void* ptr) { alloc_->Delete(ptr); }));
return data_.get();
}
}
......@@ -116,13 +112,9 @@ class Tensor {
}
}
inline void ResizeLike(const Tensor& other) {
Resize(other.shape());
}
inline void ResizeLike(const Tensor& other) { Resize(other.shape()); }
inline void ResizeLike(const Tensor* other) {
Resize(other->shape());
}
inline void ResizeLike(const Tensor* other) { Resize(other->shape()); }
template <typename T>
inline void Copy(const T* src, index_t size) {
......@@ -132,7 +124,8 @@ class Tensor {
template <typename SrcType, typename DstType>
inline void CopyWithCast(const SrcType* src, size_t size) {
MACE_CHECK(static_cast<index_t>(size) == size_, "copy src and dst with different size.");
MACE_CHECK(static_cast<index_t>(size) == size_,
"copy src and dst with different size.");
unique_ptr<DstType[]> buffer(new DstType[size]);
for (size_t i = 0; i < size; ++i) {
buffer[i] = static_cast<DstType>(src[i]);
......@@ -146,10 +139,11 @@ class Tensor {
inline void DebugPrint() {
std::stringstream os;
for (int i: shape_) {
for (int i : shape_) {
os << i << ", ";
}
LOG(INFO) << "Tensor shape: " << os.str() << " type: " << DataType_Name(dtype_);
LOG(INFO) << "Tensor shape: " << os.str()
<< " type: " << DataType_Name(dtype_);
os.str("");
os.clear();
......@@ -175,7 +169,8 @@ class Tensor {
private:
inline int64_t NumElements() const {
return std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies<int64_t>());
return std::accumulate(shape_.begin(), shape_.end(), 1,
std::multiplies<int64_t>());
}
Allocator* alloc_;
......@@ -184,9 +179,9 @@ class Tensor {
std::shared_ptr<void> data_;
vector<index_t> shape_;
DISABLE_COPY_AND_ASSIGN(Tensor);
DISABLE_COPY_AND_ASSIGN(Tensor);
};
} // namespace tensor
} // namespace tensor
#endif //MACE_CORE_TENSOR_H_
#endif // MACE_CORE_TENSOR_H_
......@@ -51,11 +51,8 @@ Benchmark* Benchmark::ArgPair(int x, int y) {
return this;
}
// Run all benchmarks
void Benchmark::Run() {
Run("all");
}
void Benchmark::Run() { Run("all"); }
void Benchmark::Run(const char* pattern) {
if (!all_benchmarks) return;
......@@ -113,8 +110,8 @@ void Benchmark::Run(const char* pattern) {
(items_processed * 1e-6) / seconds);
full_label += buf;
}
printf("%-*s %10.0f %10d\t%s\n", width, name,
seconds * 1e9 / iters, iters, full_label.c_str());
printf("%-*s %10.0f %10d\t%s\n", width, name, seconds * 1e9 / iters,
iters, full_label.c_str());
}
}
}
......
......@@ -12,9 +12,9 @@
#include "mace/core/types.h"
#define MACE_BENCHMARK_CONCAT(a, b, c) a##b##c
#define BENCHMARK(n) \
static ::mace::testing::Benchmark* MACE_BENCHMARK_CONCAT(__benchmark_, n, __LINE__) = \
(new ::mace::testing::Benchmark(#n, (n)))
#define BENCHMARK(n) \
static ::mace::testing::Benchmark* MACE_BENCHMARK_CONCAT( \
__benchmark_, n, __LINE__) = (new ::mace::testing::Benchmark(#n, (n)))
namespace mace {
namespace testing {
......
......@@ -17,4 +17,3 @@ int main(int argc, char** argv) {
}
return 0;
}
......@@ -18,26 +18,25 @@ struct DataTypeToEnum {
static_assert(IsValidDataType<T>::value, "Specified Data Type not supported");
};
// EnumToDataType<VALUE>::Type is the type for DataType constant VALUE, e.g.
// EnumToDataType<DT_FLOAT>::Type is float.
template <DataType VALUE>
struct EnumToDataType {}; // Specializations below
// Template specialization for both DataTypeToEnum and EnumToDataType.
#define MATCH_TYPE_AND_ENUM(TYPE, ENUM) \
template <> \
struct DataTypeToEnum<TYPE> { \
static DataType v() { return ENUM; } \
static constexpr DataType value = ENUM; \
}; \
template <> \
struct IsValidDataType<TYPE> { \
static constexpr bool value = true; \
}; \
template <> \
struct EnumToDataType<ENUM> { \
typedef TYPE Type; \
#define MATCH_TYPE_AND_ENUM(TYPE, ENUM) \
template <> \
struct DataTypeToEnum<TYPE> { \
static DataType v() { return ENUM; } \
static constexpr DataType value = ENUM; \
}; \
template <> \
struct IsValidDataType<TYPE> { \
static constexpr bool value = true; \
}; \
template <> \
struct EnumToDataType<ENUM> { \
typedef TYPE Type; \
}
MATCH_TYPE_AND_ENUM(float, DT_FLOAT);
......@@ -53,6 +52,6 @@ MATCH_TYPE_AND_ENUM(bool, DT_BOOL);
static const int32_t kint32_tmax = ((int32_t)0x7FFFFFFF);
} // namespace mace
} // namespace mace
#endif // MACE_CORE_TYPES_H_
#endif // MACE_CORE_TYPES_H_
......@@ -2,8 +2,8 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/common.h"
#include "mace/core/workspace.h"
#include "mace/core/common.h"
#include "mace/core/serializer.h"
namespace mace {
......@@ -16,8 +16,7 @@ vector<string> Workspace::Tensors() const {
return names;
}
Tensor* Workspace::CreateTensor(const string& name,
Allocator* alloc,
Tensor* Workspace::CreateTensor(const string& name, Allocator* alloc,
DataType type) {
if (HasTensor(name)) {
VLOG(1) << "Tensor " << name << " already exists. Skipping.";
......@@ -46,14 +45,16 @@ const Tensor* Workspace::GetTensor(const string& name) const {
}
Tensor* Workspace::GetTensor(const string& name) {
return const_cast<Tensor*>(static_cast<const Workspace*>(this)->GetTensor(name));
return const_cast<Tensor*>(
static_cast<const Workspace*>(this)->GetTensor(name));
}
void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
void Workspace::LoadModelTensor(const NetDef& net_def, DeviceType type) {
Serializer serializer;
for (auto& tensor_proto: net_def.tensors()) {
tensor_map_[tensor_proto.name()] = serializer.Deserialize(tensor_proto, type);
for (auto& tensor_proto : net_def.tensors()) {
tensor_map_[tensor_proto.name()] =
serializer.Deserialize(tensor_proto, type);
}
}
} // namespace mace
\ No newline at end of file
} // namespace mace
\ No newline at end of file
......@@ -5,7 +5,6 @@
#ifndef MACE_CORE_WORKSPACE_H_
#define MACE_CORE_WORKSPACE_H_
#include "mace/core/common.h"
#include "mace/core/tensor.h"
#include "mace/proto/mace.pb.h"
......@@ -37,10 +36,9 @@ class Workspace {
private:
TensorMap tensor_map_;
DISABLE_COPY_AND_ASSIGN(Workspace);
DISABLE_COPY_AND_ASSIGN(Workspace);
};
} // namespace mace
} // namespace mace
#endif // MACE_CORE_WORKSPACE_H_
#endif // MACE_CORE_WORKSPACE_H_
......@@ -14,7 +14,7 @@ static void foo(int iters) {
float* out = new float[N];
while (iters--) {
for (int i=0; i < N; i++) {
for (int i = 0; i < N; i++) {
out[i] = inp[i] * 2.0;
}
}
......@@ -24,7 +24,6 @@ static void foo(int iters) {
BENCHMARK(foo);
static void bar(int iters, int n) {
const int64_t tot = static_cast<int64_t>(iters) * n;
mace::testing::ItemsProcessed(tot);
......@@ -34,7 +33,7 @@ static void bar(int iters, int n) {
float* out = new float[n];
while (iters--) {
for (int i=0; i < n; i++) {
for (int i = 0; i < n; i++) {
out[i] = inp[i] * 2.0;
}
}
......
......@@ -10,10 +10,9 @@
namespace mace {
namespace kernels {
template<DeviceType D, typename T>
template <DeviceType D, typename T>
struct AddNFunctor {
void operator()(const vector<const T*>& inputs,
T *output, index_t size) {
void operator()(const vector<const T*>& inputs, T* output, index_t size) {
memset(output, 0, size * sizeof(T));
int n = inputs.size();
for (int i = 0; i < n; ++i) {
......@@ -25,11 +24,10 @@ struct AddNFunctor {
};
template <>
void AddNFunctor<DeviceType::NEON, float>::operator()(const vector<const float*>& inputs,
float *output,
index_t size);
void AddNFunctor<DeviceType::NEON, float>::operator()(
const vector<const float*>& inputs, float* output, index_t size);
} // namespace kernels
} // namespace mace
} // namespace kernels
} // namespace mace
#endif // MACE_KERNELS_ADDN_H_
\ No newline at end of file
#endif // MACE_KERNELS_ADDN_H_
\ No newline at end of file
......@@ -11,26 +11,21 @@
namespace mace {
namespace kernels {
template<DeviceType D, typename T>
template <DeviceType D, typename T>
struct BatchNormFunctor {
float variance_epsilon_;
BatchNormFunctor(const float variance_epsilon)
: variance_epsilon_(variance_epsilon){}
: variance_epsilon_(variance_epsilon) {}
void operator()(const T* input,
const T* scale,
const T* offset,
const T* mean,
const T* var,
const index_t n,
const index_t channel,
const index_t sample_size,
T* output) {
void operator()(const T* input, const T* scale, const T* offset,
const T* mean, const T* var, const index_t n,
const index_t channel, const index_t sample_size, T* output) {
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
// ( \offset - \frac { \scale * mean } { \sqrt{var+\variance_epsilon} }
// ( \offset - \frac { \scale * mean } {
// \sqrt{var+\variance_epsilon} }
// new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} }
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
......@@ -53,18 +48,12 @@ struct BatchNormFunctor {
};
template <>
void BatchNormFunctor<DeviceType::NEON, float>::operator()(const float* input,
const float* scale,
const float* offset,
const float* mean,
const float* var,
const index_t n,
const index_t channel,
const index_t sample_size,
float* output);
void BatchNormFunctor<DeviceType::NEON, float>::operator()(
const float* input, const float* scale, const float* offset,
const float* mean, const float* var, const index_t n, const index_t channel,
const index_t sample_size, float* output);
} // namepsace kernels
} // namespace mace
} // namepsace kernels
} // namespace mace
#endif // MACE_KERNELS_BATCH_NORM_H_
#endif // MACE_KERNELS_BATCH_NORM_H_
......@@ -10,114 +10,103 @@
namespace mace {
namespace kernels {
template<DeviceType D, typename T>
template <DeviceType D, typename T>
class Conv2dFunctor {
public:
Conv2dFunctor(const int* strides,
const int* paddings,
const int* dilations) :
strides_(strides),
paddings_(paddings),
dilations_(dilations) {}
void operator()(const T* input, // NCHW
const index_t* input_shape,
const T* filter, // c_out, c_in, kernel_h, kernel_w
const index_t* filter_shape,
const T* bias, // c_out
T* output, // NCHW
const index_t* output_shape) {
MACE_CHECK_NOTNULL(output);
index_t batch = output_shape[0];
index_t channels = output_shape[1];
index_t height = output_shape[2];
index_t width = output_shape[3];
index_t input_batch = input_shape[0];
index_t input_channels = input_shape[1];
index_t input_height = input_shape[2];
index_t input_width = input_shape[3];
index_t kernel_h = filter_shape[2];
index_t kernel_w = filter_shape[3];
int stride_h = strides_[0];
int stride_w = strides_[1];
int dilation_h = dilations_[0];
int dilation_w = dilations_[1];
MACE_CHECK(batch == input_batch, "Input/Output batch size mismatch");
// The left-upper most offset of the padded input
int padded_h_start = 0 - paddings_[0] / 2;
int padded_w_start = 0 - paddings_[1] / 2;
index_t padded_h_stop = input_height + paddings_[0] - paddings_[0] / 2;
index_t padded_w_stop = input_width + paddings_[1] - paddings_[1] / 2;
index_t kernel_size = input_channels * kernel_h * kernel_w;
public:
Conv2dFunctor(const int* strides, const int* paddings, const int* dilations)
: strides_(strides), paddings_(paddings), dilations_(dilations) {}
void operator()(const T* input, // NCHW
const index_t* input_shape,
const T* filter, // c_out, c_in, kernel_h, kernel_w
const index_t* filter_shape,
const T* bias, // c_out
T* output, // NCHW
const index_t* output_shape) {
MACE_CHECK_NOTNULL(output);
index_t batch = output_shape[0];
index_t channels = output_shape[1];
index_t height = output_shape[2];
index_t width = output_shape[3];
index_t input_batch = input_shape[0];
index_t input_channels = input_shape[1];
index_t input_height = input_shape[2];
index_t input_width = input_shape[3];
index_t kernel_h = filter_shape[2];
index_t kernel_w = filter_shape[3];
int stride_h = strides_[0];
int stride_w = strides_[1];
int dilation_h = dilations_[0];
int dilation_w = dilations_[1];
MACE_CHECK(batch == input_batch, "Input/Output batch size mismatch");
// The left-upper most offset of the padded input
int padded_h_start = 0 - paddings_[0] / 2;
int padded_w_start = 0 - paddings_[1] / 2;
index_t padded_h_stop = input_height + paddings_[0] - paddings_[0] / 2;
index_t padded_w_stop = input_width + paddings_[1] - paddings_[1] / 2;
index_t kernel_size = input_channels * kernel_h * kernel_w;
#pragma omp parallel for collapse(2)
for (int n = 0; n < batch; ++n) {
for (int c = 0; c < channels; ++c) {
for (int h = 0; h < height; ++h) {
for (int w = 0; w < width; ++w) {
index_t offset = n * channels * height * width +
c * height * width +
h * width + w;
T sum = 0;
const T* filter_ptr = filter + c * kernel_size;
for (int inc = 0; inc < input_channels; ++inc) {
for (int kh = 0; kh < kernel_h; ++kh) {
for (int kw = 0; kw < kernel_w; ++kw) {
int inh = padded_h_start + h * stride_h + dilation_h * kh;
int inw = padded_w_start + w * stride_w + dilation_w * kw;
if (inh < 0 || inh >= input_height ||
inw < 0 || inw >= input_width) {
MACE_CHECK(inh >= padded_h_start &&
inh < padded_h_stop &&
inw >= padded_w_start &&
inw < padded_w_stop,
"Out of range read from input: ",
inh, ", ", inw);
// else padding with 0:
// sum += 0;
} else {
index_t input_offset =
for (int n = 0; n < batch; ++n) {
for (int c = 0; c < channels; ++c) {
for (int h = 0; h < height; ++h) {
for (int w = 0; w < width; ++w) {
index_t offset = n * channels * height * width +
c * height * width + h * width + w;
T sum = 0;
const T* filter_ptr = filter + c * kernel_size;
for (int inc = 0; inc < input_channels; ++inc) {
for (int kh = 0; kh < kernel_h; ++kh) {
for (int kw = 0; kw < kernel_w; ++kw) {
int inh = padded_h_start + h * stride_h + dilation_h * kh;
int inw = padded_w_start + w * stride_w + dilation_w * kw;
if (inh < 0 || inh >= input_height || inw < 0 ||
inw >= input_width) {
MACE_CHECK(inh >= padded_h_start && inh < padded_h_stop &&
inw >= padded_w_start && inw < padded_w_stop,
"Out of range read from input: ", inh, ", ",
inw);
// else padding with 0:
// sum += 0;
} else {
index_t input_offset =
n * input_channels * input_height * input_width +
inc * input_height * input_width +
inh * input_width + inw;
sum += input[input_offset] * *filter_ptr;
}
++filter_ptr;
inc * input_height * input_width + inh * input_width +
inw;
sum += input[input_offset] * *filter_ptr;
}
++filter_ptr;
}
output[offset] = sum + bias[c];
}
output[offset] = sum + bias[c];
}
}
}
}
}
}
private:
const int* strides_; // [stride_h, stride_w]
const int* paddings_; // [padding_h, padding_w]
const int* dilations_; // [dilation_h, dilation_w]
private:
const int* strides_; // [stride_h, stride_w]
const int* paddings_; // [padding_h, padding_w]
const int* dilations_; // [dilation_h, dilation_w]
};
template <>
void Conv2dFunctor<DeviceType::NEON, float>::operator()(const float* input,
const index_t* input_shape,
const float* filter,
const index_t* filter_shape,
const float* bias,
float* output,
const index_t* output_shape);
} // namespace kernels
} // namespace mace
#endif // MACE_KERNELS_CONV_2D_H_
void Conv2dFunctor<DeviceType::NEON, float>::operator()(
const float* input, const index_t* input_shape, const float* filter,
const index_t* filter_shape, const float* bias, float* output,
const index_t* output_shape);
} // namespace kernels
} // namespace mace
#endif // MACE_KERNELS_CONV_2D_H_
......@@ -7,12 +7,10 @@
namespace mace {
namespace kernels {
void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW
void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW
const index_t *filter_shape, // OIHW
const int *dilations,
const int *strides,
Padding padding,
index_t *output_shape,
const int *dilations, const int *strides,
Padding padding, index_t *output_shape,
int *padding_size) {
MACE_CHECK(dilations[0] > 0 && dilations[1] > 0,
"Invalid dilations, must >= 1");
......@@ -43,14 +41,16 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW
output_height = (input_shape[2] - k_extent_height) / strides[0] + 1;
output_width = (input_shape[3] - k_extent_width) / strides[1] + 1;
break;
case SAME:output_height = (input_shape[2] - 1) / strides[0] + 1;
case SAME:
output_height = (input_shape[2] - 1) / strides[0] + 1;
output_width = (input_shape[3] - 1) / strides[1] + 1;
break;
case FULL:
output_height = (input_shape[2] + k_extent_height - 2) / strides[0] + 1;
output_width = (input_shape[3] + k_extent_width - 2) / strides[1] + 1;
break;
default:MACE_CHECK(false, "Unsupported padding type: ", padding);
default:
MACE_CHECK(false, "Unsupported padding type: ", padding);
}
// Note: TensorFlow may padded one more on the right/bottom side
......@@ -58,10 +58,10 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW
// utilize the more centered features. We need to benchmark
// based on the model accuracy.
padding_size[0] = (output_height - 1) * strides[0] +
k_extent_height - input_shape[2];
padding_size[1] = (output_width - 1) * strides[1] +
k_extent_width - input_shape[3];
padding_size[0] =
(output_height - 1) * strides[0] + k_extent_height - input_shape[2];
padding_size[1] =
(output_width - 1) * strides[1] + k_extent_width - input_shape[3];
output_shape[0] = input_shape[0];
output_shape[1] = output_channels;
......@@ -69,19 +69,15 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW
output_shape[3] = output_width;
}
void ConstructInputWithPadding(const float *input,
const index_t *input_shape,
const int *paddings,
Tensor *output_tensor) {
void ConstructInputWithPadding(const float *input, const index_t *input_shape,
const int *paddings, Tensor *output_tensor) {
index_t batch = input_shape[0];
index_t channels = input_shape[1];
index_t height = input_shape[2];
index_t width = input_shape[3];
std::vector<index_t> output_shape({batch,
channels,
paddings[0] + height,
paddings[1] + width});
std::vector<index_t> output_shape(
{batch, channels, paddings[0] + height, paddings[1] + width});
const index_t output_width = output_shape[3];
const int padded_top = paddings[0] / 2;
......@@ -105,5 +101,5 @@ void ConstructInputWithPadding(const float *input,
}
}
}
} // namespace kernels
} // namespace mace
} // namespace kernels
} // namespace mace
......@@ -10,26 +10,22 @@
namespace mace {
enum Padding {
VALID = 0, // No padding
SAME = 1, // Pads with half the filter size (rounded down) on both sides
FULL = 2, // Pads with one less than the filter size on both sides
VALID = 0, // No padding
SAME = 1, // Pads with half the filter size (rounded down) on both sides
FULL = 2, // Pads with one less than the filter size on both sides
};
namespace kernels {
void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW
void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW
const index_t *filter_shape, // OIHW
const int *dilations,
const int *strides,
Padding padding,
index_t *output_shape,
const int *dilations, const int *strides,
Padding padding, index_t *output_shape,
int *padding_size);
void ConstructInputWithPadding(const float *input,
const index_t *input_shape,
const int *paddings,
Tensor *output_tensor);
} // namespace kernels
} // namespace mace
void ConstructInputWithPadding(const float *input, const index_t *input_shape,
const int *paddings, Tensor *output_tensor);
} // namespace kernels
} // namespace mace
#endif // MACE_KERNELS_CONV_POOL_2D_UTIL_H_
#endif // MACE_KERNELS_CONV_POOL_2D_UTIL_H_
......@@ -2,16 +2,15 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include "mace/kernels/addn.h"
#include <arm_neon.h>
namespace mace {
namespace kernels {
template <>
void AddNFunctor<DeviceType::NEON, float>::operator()(const vector<const float*>& inputs,
float *output,
index_t size) {
void AddNFunctor<DeviceType::NEON, float>::operator()(
const vector<const float *> &inputs, float *output, index_t size) {
// TODO: neon mem copy
memset(output, 0, size * sizeof(float));
int n = inputs.size();
......@@ -22,7 +21,7 @@ void AddNFunctor<DeviceType::NEON, float>::operator()(const vector<const float*>
}
int64_t element_per_group = size / groups;
#pragma omp parallel for num_threads(1) // no significant performance improve
#pragma omp parallel for num_threads(1) // no significant performance improve
for (int64_t i = 0; i < size; i += element_per_group) {
int64_t count = std::min(element_per_group, size - i);
int nn = count >> 2;
......@@ -48,5 +47,5 @@ void AddNFunctor<DeviceType::NEON, float>::operator()(const vector<const float*>
}
};
} // namespace kernels
} // namespace mace
\ No newline at end of file
} // namespace kernels
} // namespace mace
\ No newline at end of file
......@@ -2,29 +2,25 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include "mace/kernels/batch_norm.h"
#include <arm_neon.h>
namespace mace {
namespace kernels {
template <>
void BatchNormFunctor<DeviceType::NEON, float>::operator()(const float* input,
const float* scale,
const float* offset,
const float* mean,
const float* var,
const index_t n,
const index_t channel,
const index_t sample_size,
float* output) {
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
// ( \offset - \frac { \scale * mean } { \sqrt{var+\variance_epsilon} }
// new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} }
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
void BatchNormFunctor<DeviceType::NEON, float>::operator()(
const float* input, const float* scale, const float* offset,
const float* mean, const float* var, const index_t n, const index_t channel,
const index_t sample_size, float* output) {
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
// ( \offset - \frac { \scale * mean } { \sqrt{var+\variance_epsilon}
// }
// new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} }
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
float new_scale, new_offset;
index_t count = sample_size >> 2;
index_t remain_count = sample_size - (count << 2);
......@@ -36,8 +32,8 @@ void BatchNormFunctor<DeviceType::NEON, float>::operator()(const float* input,
float32x4_t new_scale_f = vdupq_n_f32(new_scale);
float32x4_t new_offset_f = vdupq_n_f32(new_offset);
for (index_t i = 0; i < n; ++i) {
const float *input_sample_ptr = input + pos;
float *output_sample_ptr = output + pos;
const float* input_sample_ptr = input + pos;
float* output_sample_ptr = output + pos;
for (index_t j = 0; j < count; ++j) {
float32x4_t input_f = vld1q_f32(input_sample_ptr);
......@@ -57,5 +53,5 @@ void BatchNormFunctor<DeviceType::NEON, float>::operator()(const float* input,
}
};
} // namespace kernels
} // namespace mace
\ No newline at end of file
} // namespace kernels
} // namespace mace
\ No newline at end of file
......@@ -20,62 +20,39 @@ extern void Conv2dNeonK5x5S1(const float *input, const index_t *input_shape,
const float *filter, const float *bias,
float *output, const index_t *output_shape);
template<>
template <>
void Conv2dFunctor<DeviceType::NEON,
float>::operator()(const float *input, // NCHW
const index_t *input_shape,
const float *filter, // c_out, c_in, kernel_h, kernel_w
const index_t *filter_shape,
const float *bias, // c_out
float *output, // NCHW
const index_t *output_shape) {
typedef void (*Conv2dNeonFunction)(const float *input, // NCHW
const index_t *input_shape,
const float *filter, // c_out, c_in, kernel_h, kernel_w
const float *bias, // c_out
float *output, // NCHW
const index_t *output_shape);
float>::
operator()(const float *input, // NCHW
const index_t *input_shape,
const float *filter, // c_out, c_in, kernel_h, kernel_w
const index_t *filter_shape,
const float *bias, // c_out
float *output, // NCHW
const index_t *output_shape) {
typedef void (*Conv2dNeonFunction)(
const float *input, // NCHW
const index_t *input_shape,
const float *filter, // c_out, c_in, kernel_h, kernel_w
const float *bias, // c_out
float *output, // NCHW
const index_t *output_shape);
// Selection matrix: kernel_size x stride_size
static const Conv2dNeonFunction selector[5][2] = {
{
Conv2dNeonK1x1S1,
nullptr
},
{
nullptr,
nullptr
},
{
Conv2dNeonK3x3S1,
nullptr
},
{
nullptr,
nullptr
},
{
Conv2dNeonK5x5S1,
nullptr
}
};
{Conv2dNeonK1x1S1, nullptr},
{nullptr, nullptr},
{Conv2dNeonK3x3S1, nullptr},
{nullptr, nullptr},
{Conv2dNeonK5x5S1, nullptr}};
// not implement yet
index_t kernel_h = filter_shape[2];
index_t kernel_w = filter_shape[3];
if (kernel_h != kernel_w || kernel_h > 5 ||
strides_[0] != strides_[1] || strides_[0] > 2 ||
dilations_[0] != 1 || dilations_[1] != 1 ||
if (kernel_h != kernel_w || kernel_h > 5 || strides_[0] != strides_[1] ||
strides_[0] > 2 || dilations_[0] != 1 || dilations_[1] != 1 ||
selector[kernel_h - 1][strides_[0] - 1] == nullptr) {
LOG(WARNING) << "NEON conv2d kernel not implementated, using slow vesion";
Conv2dFunctor<DeviceType::CPU, float>(strides_, paddings_, dilations_)(
input,
input_shape,
filter,
filter_shape,
bias,
output,
output_shape
);
input, input_shape, filter, filter_shape, bias, output, output_shape);
return;
}
......@@ -87,13 +64,8 @@ void Conv2dFunctor<DeviceType::NEON,
input_shape = padded_input.shape().data();
}
auto conv2d_neon_func = selector[kernel_h - 1][strides_[0] - 1];
conv2d_neon_func(input,
input_shape,
filter,
bias,
output,
output_shape);
conv2d_neon_func(input, input_shape, filter, bias, output, output_shape);
}
} // namespace kernels
} // namespace mace
} // namespace kernels
} // namespace mace
......@@ -8,25 +8,24 @@
namespace mace {
namespace kernels {
void Conv2dNeonK1x1S1(const float* input, // NCHW
void Conv2dNeonK1x1S1(const float* input, // NCHW
const index_t* input_shape,
const float* filter, // c_out, c_in, kernel_h, kernel_w
const float* bias, // c_out
float* output, // NCHW
const float* filter, // c_out, c_in, kernel_h, kernel_w
const float* bias, // c_out
float* output, // NCHW
const index_t* output_shape) {
const index_t batch = output_shape[0];
const index_t batch = output_shape[0];
const index_t channels = output_shape[1];
const index_t height = output_shape[2];
const index_t width = output_shape[3];
const index_t height = output_shape[2];
const index_t width = output_shape[3];
const index_t input_batch = input_shape[0];
const index_t input_batch = input_shape[0];
const index_t input_channels = input_shape[1];
const index_t input_height = input_shape[2];
const index_t input_width = input_shape[3];
const index_t input_height = input_shape[2];
const index_t input_width = input_shape[3];
MACE_CHECK(input_batch == batch &&
input_height == height &&
input_width == width);
MACE_CHECK(input_batch == batch && input_height == height &&
input_width == width);
const index_t total_pixels = height * width;
// Process 4 * 2 = 8 pixels for each innermost loop
......@@ -37,17 +36,18 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
// benchmark omp collapsed(2)
for (index_t n = 0; n < batch; ++n) {
const float* filter_ptr = filter;
#pragma omp parallel for
#pragma omp parallel for
for (index_t c = 0; c < channels; ++c) {
// TODO Will GCC opt these out?
float* channel_output_start =
output + n * channels * height * width + c * height * width;
const float* input_ptr = input + n * input_channels * input_height * input_width;
output + n * channels * height * width + c * height * width;
const float* input_ptr =
input + n * input_channels * input_height * input_width;
// Fill with bias
float* output_ptr = channel_output_start;
for (index_t ptr = 0; ptr < total_pixels; ++ptr) {
output_ptr[ptr] = bias[c]; // TODO can we avoid this?
output_ptr[ptr] = bias[c]; // TODO can we avoid this?
}
index_t inc = 0;
......@@ -55,15 +55,14 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
for (; inc + 3 < input_channels; inc += 4) {
float* output_ptr = channel_output_start;
// The begining of each input feature map channel
MACE_ASSERT(input_ptr == input + n * input_channels *
input_height * input_width +
inc * input_height * input_width);
MACE_ASSERT(input_ptr ==
input + n * input_channels * input_height * input_width +
inc * input_height * input_width);
const float* input_ptr1 = input_ptr + total_pixels;
const float* input_ptr1 = input_ptr + total_pixels;
const float* input_ptr2 = input_ptr1 + total_pixels;
const float* input_ptr3 = input_ptr2 + total_pixels;
// filter is in c_out, c_in, 1, 1 order
MACE_ASSERT(filter_ptr == filter + c * input_channels + inc);
const float k0 = filter_ptr[0];
......@@ -113,7 +112,7 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
vst1q_f32(output_ptr + 4, out4);
output_ptr += 8;
input_ptr += 8;
input_ptr += 8;
input_ptr1 += 8;
input_ptr2 += 8;
input_ptr3 += 8;
......@@ -121,7 +120,7 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
// Process the remaining pixels
index_t remaining_pixels = loop_remaining;
for (; remaining_pixels > 0; --remaining_pixels) {
const float mul = *input_ptr * k0;
const float mul = *input_ptr * k0;
const float mul1 = *input_ptr1 * k1;
const float mul2 = *input_ptr2 * k2;
const float mul3 = *input_ptr3 * k3;
......@@ -141,9 +140,9 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
// Process the remaining channels
for (; inc < input_channels; ++inc) {
float* output_ptr = channel_output_start;
MACE_ASSERT(input_ptr == input + n * input_channels *
input_height * input_width +
inc * input_height * input_width);
MACE_ASSERT(input_ptr ==
input + n * input_channels * input_height * input_width +
inc * input_height * input_width);
MACE_ASSERT(filter_ptr == filter + c * input_channels + inc);
const float k0 = filter_ptr[0];
......@@ -166,13 +165,13 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
vst1q_f32(output_ptr + 4, out4);
output_ptr += 8;
input_ptr += 8;
input_ptr += 8;
}
// Process the remaining pixels
index_t remaining_pixels = loop_remaining;
for (; remaining_pixels > 0; --remaining_pixels) {
const float mul = *input_ptr * k0;
*output_ptr += mul;
++output_ptr;
......@@ -183,5 +182,5 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
}
};
} // namespace kernels
} // namespace mace
} // namespace kernels
} // namespace mace
......@@ -10,78 +10,81 @@ namespace kernels {
static const int kRegisterSize = 4;
void Conv2dNeonK3x3S1(const float* input, // NCHW
const index_t* input_shape,
const float* filter, // c_out, c_in, kernel_h, kernel_w
const float* bias, // c_out
float* output, // NCHW
const index_t* output_shape) {
int batch = output_shape[0];
void Conv2dNeonK3x3S1(const float* input, // NCHW
const index_t* input_shape,
const float* filter, // c_out, c_in, kernel_h, kernel_w
const float* bias, // c_out
float* output, // NCHW
const index_t* output_shape) {
int batch = output_shape[0];
int channels = output_shape[1];
int height = output_shape[2];
int width = output_shape[3];
int height = output_shape[2];
int width = output_shape[3];
int input_batch = input_shape[0];
int input_batch = input_shape[0];
int input_channels = input_shape[1];
int input_height = input_shape[2];
int input_width = input_shape[3];
int input_height = input_shape[2];
int input_width = input_shape[3];
int kernel_h = 3;
int kernel_w = 3;
int kernel_w = 3;
int height_count = (height >> 1) << 1;
for (int b = 0; b < batch; ++b) {
float* output_ptr_base = output + b * channels * height * width;
for (int oc = 0; oc < channels; ++oc) {
const float* filter_ptr = filter + oc * input_channels * kernel_h * kernel_w;
const float* input_ptr = input + b * input_channels * input_height * input_width;
const float* filter_ptr =
filter + oc * input_channels * kernel_h * kernel_w;
const float* input_ptr =
input + b * input_channels * input_height * input_width;
float* output_ptr = output_ptr_base + oc * height * width;
std::fill(output_ptr, output_ptr + height * width, bias[oc]);
for (int ic = 0; ic < input_channels; ++ic) {
float32x4_t filter0 = vld1q_f32(filter_ptr);
float32x4_t filter3 = vld1q_f32(filter_ptr+3);
float32x4_t filter6 = vld1q_f32(filter_ptr+6);
float32x4_t filter3 = vld1q_f32(filter_ptr + 3);
float32x4_t filter6 = vld1q_f32(filter_ptr + 6);
const float* row[kRegisterSize] = {
input_ptr, input_ptr + input_width,
input_ptr + 2 * input_width, input_ptr + 3 * input_width
};
const float* row[kRegisterSize] = {input_ptr, input_ptr + input_width,
input_ptr + 2 * input_width,
input_ptr + 3 * input_width};
float* output_ptr1 = output_ptr;
float* output_ptr2 = output_ptr + width;
for (int h = 0; h < height_count; h += 2) {
int count = width >> 2;
int remain_count = width & 3;
for (; count > 0; --count) {
float32x4_t sum0 = vdupq_n_f32(.0f);
float32x4_t sum1 = vdupq_n_f32(.0f);
float32x4_t row0_ext_0 = vld1q_f32(row[0]); //0123
float32x4_t row0_latter = vld1q_f32(row[0] + kRegisterSize); //4567
float32x4_t row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); //1234
float32x4_t row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); //2345
float32x4_t row0_ext_0 = vld1q_f32(row[0]); // 0123
float32x4_t row0_latter = vld1q_f32(row[0] + kRegisterSize); // 4567
float32x4_t row0_ext_1 =
vextq_f32(row0_ext_0, row0_latter, 1); // 1234
float32x4_t row0_ext_2 =
vextq_f32(row0_ext_0, row0_latter, 2); // 2345
sum0 = vfmaq_laneq_f32(sum0, row0_ext_0, filter0, 0);
sum0 = vfmaq_laneq_f32(sum0, row0_ext_1, filter0, 1);
sum0 = vfmaq_laneq_f32(sum0, row0_ext_2, filter0, 2);
float32x4_t row1_ext_0 = vld1q_f32(row[1]); //0123
float32x4_t row1_latter = vld1q_f32(row[1] + kRegisterSize); //4567
float32x4_t row1_ext_1 = vextq_f32(row1_ext_0, row1_latter, 1); //1234
float32x4_t row1_ext_2 = vextq_f32(row1_ext_0, row1_latter, 2); //2345
float32x4_t row1_ext_0 = vld1q_f32(row[1]); // 0123
float32x4_t row1_latter = vld1q_f32(row[1] + kRegisterSize); // 4567
float32x4_t row1_ext_1 =
vextq_f32(row1_ext_0, row1_latter, 1); // 1234
float32x4_t row1_ext_2 =
vextq_f32(row1_ext_0, row1_latter, 2); // 2345
sum0 = vfmaq_laneq_f32(sum0, row1_ext_0, filter3, 0);
sum0 = vfmaq_laneq_f32(sum0, row1_ext_1, filter3, 1);
sum0 = vfmaq_laneq_f32(sum0, row1_ext_2, filter3, 2);
row0_ext_0 = vld1q_f32(row[2]); //0123
row0_latter = vld1q_f32(row[2] + kRegisterSize); //4567
row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); //1234
row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); //2345
row0_ext_0 = vld1q_f32(row[2]); // 0123
row0_latter = vld1q_f32(row[2] + kRegisterSize); // 4567
row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); // 1234
row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); // 2345
sum0 = vfmaq_laneq_f32(sum0, row0_ext_0, filter6, 0);
sum0 = vfmaq_laneq_f32(sum0, row0_ext_1, filter6, 1);
......@@ -96,10 +99,10 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
sum1 = vfmaq_laneq_f32(sum1, row0_ext_1, filter3, 1);
sum1 = vfmaq_laneq_f32(sum1, row0_ext_2, filter3, 2);
row1_ext_0 = vld1q_f32(row[3]); //0123
row1_latter = vld1q_f32(row[3] + kRegisterSize); //4567
row1_ext_1 = vextq_f32(row1_ext_0, row1_latter, 1); //1234
row1_ext_2 = vextq_f32(row1_ext_0, row1_latter, 2); //2345
row1_ext_0 = vld1q_f32(row[3]); // 0123
row1_latter = vld1q_f32(row[3] + kRegisterSize); // 4567
row1_ext_1 = vextq_f32(row1_ext_0, row1_latter, 1); // 1234
row1_ext_2 = vextq_f32(row1_ext_0, row1_latter, 2); // 2345
sum1 = vfmaq_laneq_f32(sum1, row1_ext_0, filter6, 0);
sum1 = vfmaq_laneq_f32(sum1, row1_ext_1, filter6, 1);
......@@ -114,15 +117,15 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
output_ptr1 += kRegisterSize;
output_ptr2 += kRegisterSize;
for(int i = 0; i < kRegisterSize; ++i) {
for (int i = 0; i < kRegisterSize; ++i) {
row[i] += kRegisterSize;
}
}
for (; remain_count > 0; --remain_count) {
float32x4_t row0 = vld1q_f32(row[0]); //0123
float32x4_t row1 = vld1q_f32(row[1]); //0123
float32x4_t row2 = vld1q_f32(row[2]); //0123
float32x4_t row3 = vld1q_f32(row[3]); //0123
float32x4_t row0 = vld1q_f32(row[0]); // 0123
float32x4_t row1 = vld1q_f32(row[1]); // 0123
float32x4_t row2 = vld1q_f32(row[2]); // 0123
float32x4_t row3 = vld1q_f32(row[3]); // 0123
float32x4_t sum = vmulq_f32(row0, filter0);
sum = vmlaq_f32(sum, row1, filter3);
......@@ -138,13 +141,13 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
++output_ptr1;
++output_ptr2;
for(int i = 0; i < kRegisterSize; ++i) {
for (int i = 0; i < kRegisterSize; ++i) {
row[i] += 1;
}
}
output_ptr1 += width;
output_ptr2 += width;
for(int i = 0; i < kRegisterSize; ++i) {
for (int i = 0; i < kRegisterSize; ++i) {
row[i] += 2 + input_width;
}
}
......@@ -152,30 +155,34 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
if (height != height_count) {
int count = width >> 2;
int remain_count = width & 3;
for(; count > 0; --count) {
for (; count > 0; --count) {
float32x4_t sum0 = vdupq_n_f32(.0f);
float32x4_t row0_ext_0 = vld1q_f32(row[0]); //0123
float32x4_t row0_latter = vld1q_f32(row[0] + kRegisterSize); //4567
float32x4_t row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); //1234
float32x4_t row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); //2345
float32x4_t row0_ext_0 = vld1q_f32(row[0]); // 0123
float32x4_t row0_latter = vld1q_f32(row[0] + kRegisterSize); // 4567
float32x4_t row0_ext_1 =
vextq_f32(row0_ext_0, row0_latter, 1); // 1234
float32x4_t row0_ext_2 =
vextq_f32(row0_ext_0, row0_latter, 2); // 2345
sum0 = vfmaq_laneq_f32(sum0, row0_ext_0, filter0, 0);
sum0 = vfmaq_laneq_f32(sum0, row0_ext_1, filter0, 1);
sum0 = vfmaq_laneq_f32(sum0, row0_ext_2, filter0, 2);
float32x4_t row1_ext_0 = vld1q_f32(row[1]); //0123
float32x4_t row1_latter = vld1q_f32(row[1] + kRegisterSize); //4567
float32x4_t row1_ext_1 = vextq_f32(row1_ext_0, row1_latter, 1); //1234
float32x4_t row1_ext_2 = vextq_f32(row1_ext_0, row1_latter, 2); //2345
float32x4_t row1_ext_0 = vld1q_f32(row[1]); // 0123
float32x4_t row1_latter = vld1q_f32(row[1] + kRegisterSize); // 4567
float32x4_t row1_ext_1 =
vextq_f32(row1_ext_0, row1_latter, 1); // 1234
float32x4_t row1_ext_2 =
vextq_f32(row1_ext_0, row1_latter, 2); // 2345
sum0 = vfmaq_laneq_f32(sum0, row1_ext_0, filter3, 0);
sum0 = vfmaq_laneq_f32(sum0, row1_ext_1, filter3, 1);
sum0 = vfmaq_laneq_f32(sum0, row1_ext_2, filter3, 2);
row0_ext_0 = vld1q_f32(row[2]); //0123
row0_latter = vld1q_f32(row[2] + kRegisterSize); //4567
row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); //1234
row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); //2345
row0_ext_0 = vld1q_f32(row[2]); // 0123
row0_latter = vld1q_f32(row[2] + kRegisterSize); // 4567
row0_ext_1 = vextq_f32(row0_ext_0, row0_latter, 1); // 1234
row0_ext_2 = vextq_f32(row0_ext_0, row0_latter, 2); // 2345
sum0 = vfmaq_laneq_f32(sum0, row0_ext_0, filter6, 0);
sum0 = vfmaq_laneq_f32(sum0, row0_ext_1, filter6, 1);
......@@ -185,14 +192,14 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
output_row0 = vaddq_f32(output_row0, sum0);
vst1q_f32(output_ptr1, output_row0);
output_ptr1 += kRegisterSize;
for(int i = 0; i < 3; ++i) {
for (int i = 0; i < 3; ++i) {
row[i] += kRegisterSize;
}
}
for (; remain_count > 0; --remain_count) {
float32x4_t row0 = vld1q_f32(row[0]); //0123
float32x4_t row1 = vld1q_f32(row[1]); //0123
float32x4_t row2 = vld1q_f32(row[2]); //0123
float32x4_t row0 = vld1q_f32(row[0]); // 0123
float32x4_t row1 = vld1q_f32(row[1]); // 0123
float32x4_t row2 = vld1q_f32(row[2]); // 0123
float32x4_t sum = vmulq_f32(row0, filter0);
sum = vmlaq_f32(sum, row1, filter3);
......@@ -201,7 +208,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
*output_ptr1 = vaddvq_f32(sum);
++output_ptr1;
for(int i = 0; i < 3; ++i) {
for (int i = 0; i < 3; ++i) {
row[i] += 1;
}
}
......@@ -213,5 +220,5 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
}
}
} // namespace kernels
} // namespace mace
} // namespace kernels
} // namespace mace
......@@ -10,11 +10,11 @@
namespace mace {
namespace kernels {
void Conv2dNeonK5x5S1(const float* input, // NCHW
void Conv2dNeonK5x5S1(const float* input, // NCHW
const index_t* input_shape,
const float* filter, // c_out, c_in, kernel_h, kernel_w
const float* bias, // c_out
float* output, // NCHW
const float* filter, // c_out, c_in, kernel_h, kernel_w
const float* bias, // c_out
float* output, // NCHW
const index_t* output_shape) {
const index_t batch = output_shape[0];
const index_t channels = output_shape[1];
......@@ -30,17 +30,17 @@ void Conv2dNeonK5x5S1(const float* input, // NCHW
const index_t input_total_pixels_per_channel = input_height * input_width;
const index_t output_total_pixels_per_channel = height * width;
const index_t input_total_pixels_per_batch = input_total_pixels_per_channel
* input_channels;
const index_t output_total_pixels_per_batch = output_total_pixels_per_channel
* channels;
const index_t input_total_pixels_per_batch =
input_total_pixels_per_channel * input_channels;
const index_t output_total_pixels_per_batch =
output_total_pixels_per_channel * channels;
const index_t patch_size = input_channels * 25;
#pragma omp parallel for collapse(2)
for (index_t n = 0; n < batch; ++n) {
for (index_t c = 0; c < channels; ++c) {
float* output_ptr = output + n * output_total_pixels_per_batch
+ c * output_total_pixels_per_channel;
float* output_ptr = output + n * output_total_pixels_per_batch +
c * output_total_pixels_per_channel;
const float* input_ptr = input + n * input_total_pixels_per_batch;
// Fill with bias
......@@ -53,7 +53,7 @@ void Conv2dNeonK5x5S1(const float* input, // NCHW
float* outptr2 = outptr + width;
const float* inptr = input_ptr + inc * input_total_pixels_per_channel;
const float* filter_ptr = filter + c * patch_size + inc * 25;
const float* filter_ptr = filter + c * patch_size + inc * 25;
const float* r0 = inptr;
const float* r1 = inptr + input_width;
......@@ -246,8 +246,8 @@ void Conv2dNeonK5x5S1(const float* input, // NCHW
sum2 = r5[4] * k4[4];
float32x2_t _ss = vadd_f32(vget_low_f32(_sum), vget_high_f32(_sum));
float32x2_t
_ss2 = vadd_f32(vget_low_f32(_sum2), vget_high_f32(_sum2));
float32x2_t _ss2 =
vadd_f32(vget_low_f32(_sum2), vget_high_f32(_sum2));
float32x2_t _ss_ss2 = vpadd_f32(_ss, _ss2);
sum += vget_lane_f32(_ss_ss2, 0);
......@@ -414,7 +414,7 @@ void Conv2dNeonK5x5S1(const float* input, // NCHW
}
}
} // namespace kernels
} // namespace mace
} // namespace kernels
} // namespace mace
#endif // MACE_KERNELS_NEON_CONV_2D_NEON_5X5_H_
#endif // MACE_KERNELS_NEON_CONV_2D_NEON_5X5_H_
......@@ -2,19 +2,17 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include <float.h>
#include <limits>
#include <arm_neon.h>
#include "mace/core/common.h"
namespace mace {
namespace kernels {
void PoolingMaxNeonK2x2S2x2(const float *input,
const index_t *in_shape,
float *output,
const index_t *out_shape,
void PoolingMaxNeonK2x2S2x2(const float *input, const index_t *in_shape,
float *output, const index_t *out_shape,
const int *paddings) {
index_t batch = in_shape[0];
index_t channels = in_shape[1];
......@@ -44,7 +42,7 @@ void PoolingMaxNeonK2x2S2x2(const float *input,
int w = 0;
int num_vectors = 0;
if (!((h == 0 && padding_top > 0) ||
(h == out_height - 1 && padding_bottom > 0))) {
(h == out_height - 1 && padding_bottom > 0))) {
r0 = input + input_offset + (h * 2 - padding_top) * in_width;
r1 = r0 + in_width;
if (padding_left > 0) {
......@@ -86,8 +84,7 @@ void PoolingMaxNeonK2x2S2x2(const float *input,
for (int kw = 0; kw < 2; ++kw) {
int inh = h * 2 - padding_top + kh;
int inw = w * 2 - padding_left + kw;
if (inh >= 0 && inh < in_height &&
inw >= 0 && inw < in_width) {
if (inh >= 0 && inh < in_height && inw >= 0 && inw < in_width) {
max = std::max(max, input[input_offset + inh * in_width + inw]);
}
}
......@@ -104,10 +101,8 @@ void PoolingMaxNeonK2x2S2x2(const float *input,
}
// assume the input has already been padded
void PoolingMaxNeonK2x2S2x2Padded(const float *input,
const index_t *in_shape,
float *output,
const index_t *out_shape) {
void PoolingMaxNeonK2x2S2x2Padded(const float *input, const index_t *in_shape,
float *output, const index_t *out_shape) {
index_t batch = in_shape[0];
index_t channels = in_shape[1];
index_t in_height = in_shape[2];
......
......@@ -2,19 +2,17 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include <float.h>
#include <limits>
#include <arm_neon.h>
#include "mace/core/common.h"
namespace mace {
namespace kernels {
void PoolingMaxNeonK3x3S2x2(const float *input,
const index_t *in_shape,
float *output,
const index_t *out_shape,
void PoolingMaxNeonK3x3S2x2(const float *input, const index_t *in_shape,
float *output, const index_t *out_shape,
const int *paddings) {
index_t batch = in_shape[0];
index_t channels = in_shape[1];
......@@ -44,7 +42,7 @@ void PoolingMaxNeonK3x3S2x2(const float *input,
int num_vectors = 0;
const float *r0, *r1, *r2;
if (!((h == 0 && padding_top > 0) ||
(h == out_height - 1 && padding_bottom > 0))) {
(h == out_height - 1 && padding_bottom > 0))) {
r0 = input + input_offset + (h * 2 - padding_top) * in_width;
r1 = r0 + in_width;
r2 = r1 + in_width;
......@@ -112,8 +110,7 @@ void PoolingMaxNeonK3x3S2x2(const float *input,
for (int kw = 0; kw < 3; ++kw) {
int inh = h * 2 - padding_top + kh;
int inw = w * 2 - padding_left + kw;
if (inh >= 0 && inh < in_height &&
inw >= 0 && inw < in_width) {
if (inh >= 0 && inh < in_height && inw >= 0 && inw < in_width) {
max = std::max(max, input[input_offset + inh * in_width + inw]);
}
}
......@@ -130,10 +127,8 @@ void PoolingMaxNeonK3x3S2x2(const float *input,
}
// assume the input has already been padded
void PoolingMaxNeonK3x3S2x2Padded(const float *input,
const index_t *in_shape,
float *output,
const index_t *out_shape) {
void PoolingMaxNeonK3x3S2x2Padded(const float *input, const index_t *in_shape,
float *output, const index_t *out_shape) {
index_t batch = in_shape[0];
index_t channels = in_shape[1];
index_t in_height = in_shape[2];
......@@ -218,5 +213,5 @@ void PoolingMaxNeonK3x3S2x2Padded(const float *input,
}
}
} // namespace kernels
} // namespace mace
} // namespace kernels
} // namespace mace
......@@ -2,45 +2,36 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include "mace/kernels/pooling.h"
#include <arm_neon.h>
#include "mace/kernels/conv_pool_2d_util.h"
namespace mace {
namespace kernels {
extern void PoolingMaxNeonK2x2S2x2(const float *input,
const index_t *in_shape,
float *output,
const index_t *out_shape,
extern void PoolingMaxNeonK2x2S2x2(const float *input, const index_t *in_shape,
float *output, const index_t *out_shape,
const int *paddings);
extern void PoolingMaxNeonK3x3S2x2(const float *input,
const index_t *in_shape,
float *output,
const index_t *out_shape,
extern void PoolingMaxNeonK3x3S2x2(const float *input, const index_t *in_shape,
float *output, const index_t *out_shape,
const int *paddings);
#ifdef __COPY_MAKE_PADDING
extern void PoolingMaxNeonK2x2S2x2Padded(const float* input,
const index_t* in_shape,
float* output,
const index_t* out_shape);
extern void PoolingMaxNeonK3x3S2x2Padded(const float* input,
const index_t* in_shape,
float* output,
const index_t* out_shape);
extern void PoolingMaxNeonK2x2S2x2Padded(const float *input,
const index_t *in_shape, float *output,
const index_t *out_shape);
extern void PoolingMaxNeonK3x3S2x2Padded(const float *input,
const index_t *in_shape, float *output,
const index_t *out_shape);
#endif
template<>
template <>
void PoolingFunctor<DeviceType::NEON, float>::operator()(
const float *input,
const index_t *input_shape,
float *output,
const float *input, const index_t *input_shape, float *output,
const index_t *output_shape) {
if (kernels_[0] == 2 && kernels_[1] == 2 &&
strides_[0] == 2 && strides_[1] == 2 &&
pooling_type_ == MAX) {
if (kernels_[0] == 2 && kernels_[1] == 2 && strides_[0] == 2 &&
strides_[1] == 2 && pooling_type_ == MAX) {
#ifdef __COPY_MAKE_PADDING
Tensor padded_input;
ConstructInputWithPadding(input, input_shape, paddings_, &padded_input);
......@@ -50,9 +41,8 @@ void PoolingFunctor<DeviceType::NEON, float>::operator()(
#else
PoolingMaxNeonK2x2S2x2(input, input_shape, output, output_shape, paddings_);
#endif
} else if (kernels_[0] == 3 && kernels_[1] == 3 &&
strides_[0] == 2 && strides_[1] == 2 &&
pooling_type_ == MAX) {
} else if (kernels_[0] == 3 && kernels_[1] == 3 && strides_[0] == 2 &&
strides_[1] == 2 && pooling_type_ == MAX) {
#ifdef __COPY_MAKE_PADDING
Tensor padded_input;
ConstructInputWithPadding(input, input_shape, paddings_, &padded_input);
......@@ -65,13 +55,9 @@ void PoolingFunctor<DeviceType::NEON, float>::operator()(
} else { // not implement yet
PoolingFunctor<DeviceType::CPU, float>(pooling_type_, kernels_, strides_,
paddings_, dilations_)(
input,
input_shape,
output,
output_shape
);
input, input_shape, output, output_shape);
}
}
} // namespace kernels
} // namespace mace
\ No newline at end of file
} // namespace kernels
} // namespace mace
\ No newline at end of file
......@@ -2,17 +2,17 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include "mace/kernels/relu.h"
#include <arm_neon.h>
namespace mace {
namespace kernels {
template <>
void ReluFunctor<DeviceType::NEON, float>::operator()(const float *input,
float *output,
index_t size) {
#pragma omp parallel for num_threads(1) // no significant performance improve
float *output,
index_t size) {
#pragma omp parallel for num_threads(1) // no significant performance improve
for (int64_t i = 0; i < size; i += kCostPerGroup) {
int64_t count = std::min(static_cast<int64_t>(kCostPerGroup), size - i);
int nn = count >> 2;
......@@ -36,6 +36,5 @@ void ReluFunctor<DeviceType::NEON, float>::operator()(const float *input,
}
};
} // namespace kernels
} // namespace mace
\ No newline at end of file
} // namespace kernels
} // namespace mace
\ No newline at end of file
......@@ -11,29 +11,24 @@
namespace mace {
enum PoolingType {
AVG = 1, // avg_pool
MAX = 2, // max_pool
AVG = 1, // avg_pool
MAX = 2, // max_pool
};
namespace kernels {
template<DeviceType D, typename T>
template <DeviceType D, typename T>
class PoolingFunctor {
public:
PoolingFunctor(const PoolingType pooling_type,
const int *kernels,
const int *strides,
const int *paddings,
const int *dilations)
PoolingFunctor(const PoolingType pooling_type, const int *kernels,
const int *strides, const int *paddings, const int *dilations)
: pooling_type_(pooling_type),
kernels_(kernels),
strides_(strides),
paddings_(paddings),
dilations_(dilations) {}
void operator()(const T *input,
const index_t *input_shape,
T *output,
void operator()(const T *input, const index_t *input_shape, T *output,
const index_t *output_shape) {
index_t batch = output_shape[0];
index_t channels = output_shape[1];
......@@ -60,32 +55,31 @@ class PoolingFunctor {
#pragma omp parallel for collapse(2)
for (int n = 0; n < batch; ++n) {
for (int c = 0; c < channels; ++c) {
index_t out_offset = n * channels * height * width +
c * height * width;
index_t out_offset = n * channels * height * width + c * height * width;
index_t in_offset = n * input_channels * input_height * input_width +
c * input_height * input_width;
c * input_height * input_width;
for (int h = 0; h < height; ++h) {
for (int w = 0; w < width; ++w) {
T sum_or_max = 0;
switch (pooling_type_) {
case AVG:break;
case MAX:sum_or_max = std::numeric_limits<T>::lowest();
case AVG:
break;
case MAX:
sum_or_max = std::numeric_limits<T>::lowest();
break;
default:
MACE_CHECK(false,
"Unsupported pooling type: ",
pooling_type_);
MACE_CHECK(false, "Unsupported pooling type: ", pooling_type_);
}
for (int kh = 0; kh < kernel_h; ++kh) {
for (int kw = 0; kw < kernel_w; ++kw) {
int inh = padded_h_start + h * stride_h + dilation_h * kh;
int inw = padded_w_start + w * stride_w + dilation_w * kw;
if (inh >= 0 && inh < input_height &&
inw >= 0 && inw < input_width) {
index_t input_offset = in_offset +
inh * input_width + inw;
if (inh >= 0 && inh < input_height && inw >= 0 &&
inw < input_width) {
index_t input_offset = in_offset + inh * input_width + inw;
switch (pooling_type_) {
case AVG:sum_or_max += input[input_offset];
case AVG:
sum_or_max += input[input_offset];
break;
case MAX:
sum_or_max = std::max(sum_or_max, input[input_offset]);
......@@ -98,14 +92,14 @@ class PoolingFunctor {
}
}
switch (pooling_type_) {
case AVG:output[out_offset] = sum_or_max / (kernel_h * kernel_w);
case AVG:
output[out_offset] = sum_or_max / (kernel_h * kernel_w);
break;
case MAX:output[out_offset] = sum_or_max;
case MAX:
output[out_offset] = sum_or_max;
break;
default:
MACE_CHECK(false,
"Unsupported pooling type: ",
pooling_type_);
MACE_CHECK(false, "Unsupported pooling type: ", pooling_type_);
}
out_offset += 1;
}
......@@ -122,14 +116,12 @@ class PoolingFunctor {
const int *dilations_;
};
template<>
template <>
void PoolingFunctor<DeviceType::NEON, float>::operator()(
const float *input,
const index_t *input_shape,
float *output,
const float *input, const index_t *input_shape, float *output,
const index_t *output_shape);
} // namespace kernels
} // namespace mace
} // namespace kernels
} // namespace mace
#endif //MACE_KERNELS_POOLING_H
#endif // MACE_KERNELS_POOLING_H
......@@ -10,7 +10,7 @@
namespace mace {
namespace kernels {
template<DeviceType D, typename T>
template <DeviceType D, typename T>
struct ReluFunctor {
void operator()(const T *input, T *output, index_t size) {
for (index_t i = 0; i < size; ++i) {
......@@ -24,7 +24,7 @@ void ReluFunctor<DeviceType::NEON, float>::operator()(const float *input,
float *output,
index_t size);
} // namespace kernels
} // namespace mace
} // namespace kernels
} // namespace mace
#endif // MACE_KERNELS_RELU_H_
\ No newline at end of file
#endif // MACE_KERNELS_RELU_H_
\ No newline at end of file
......@@ -22,8 +22,8 @@ struct CachedInterpolation {
inline float CalculateResizeScale(index_t in_size, index_t out_size,
bool align_corners) {
return (align_corners && out_size > 1)
? (in_size - 1) / static_cast<float>(out_size - 1)
: in_size / static_cast<float>(out_size);
? (in_size - 1) / static_cast<float>(out_size - 1)
: in_size / static_cast<float>(out_size);
}
inline void ComputeInterpolationWeights(const index_t out_size,
......@@ -41,21 +41,20 @@ inline void ComputeInterpolationWeights(const index_t out_size,
}
inline float ComputeLerp(const float top_left, const float top_right,
const float bottom_left, const float bottom_right,
const float x_lerp, const float y_lerp) {
const float bottom_left, const float bottom_right,
const float x_lerp, const float y_lerp) {
const float top = top_left + (top_right - top_left) * x_lerp;
const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp;
return top + (bottom - top) * y_lerp;
}
template<typename T>
void ResizeImage(const T *images,
const index_t batch_size, const index_t in_height,
const index_t in_width, const index_t out_height,
const index_t out_width, const index_t channels,
template <typename T>
void ResizeImage(const T *images, const index_t batch_size,
const index_t in_height, const index_t in_width,
const index_t out_height, const index_t out_width,
const index_t channels,
const std::vector<CachedInterpolation> &xs_vec,
const std::vector<CachedInterpolation> &ys,
float *output) {
const std::vector<CachedInterpolation> &ys, float *output) {
const index_t in_channel_size = in_height * in_width;
const index_t in_batch_num_values = channels * in_channel_size;
const index_t out_channel_size = out_height * out_width;
......@@ -65,10 +64,10 @@ void ResizeImage(const T *images,
#pragma omp parallel for collapse(2)
for (index_t b = 0; b < batch_size; ++b) {
for (index_t c = 0; c < channels; ++c) {
const T* input_ptr = images + in_batch_num_values * b
+ in_channel_size * c;
float *output_ptr = output + out_batch_num_values * b
+ out_channel_size * c;
const T *input_ptr =
images + in_batch_num_values * b + in_channel_size * c;
float *output_ptr =
output + out_batch_num_values * b + out_channel_size * c;
for (index_t y = 0; y < out_height; ++y) {
const T *ys_input_lower_ptr = input_ptr + ys[y].lower * in_width;
const T *ys_input_upper_ptr = input_ptr + ys[y].upper * in_width;
......@@ -83,9 +82,8 @@ void ResizeImage(const T *images,
const float bottom_left = ys_input_upper_ptr[xs_lower];
const float bottom_right = ys_input_upper_ptr[xs_upper];
output_ptr[x] =
ComputeLerp(top_left, top_right, bottom_left, bottom_right,
xs_lerp, ys_lerp);
output_ptr[x] = ComputeLerp(top_left, top_right, bottom_left,
bottom_right, xs_lerp, ys_lerp);
}
output_ptr += out_width;
}
......@@ -94,16 +92,15 @@ void ResizeImage(const T *images,
}
}
template<DeviceType D, typename T>
template <DeviceType D, typename T>
struct ResizeBilinearFunctor {
bool align_corners_;
ResizeBilinearFunctor(bool align_corners)
: align_corners_(align_corners) {}
ResizeBilinearFunctor(bool align_corners) : align_corners_(align_corners) {}
void operator()(const T *input, T *output,
index_t n, index_t channels, index_t in_height,
index_t in_width, index_t out_height, index_t out_width) {
void operator()(const T *input, T *output, index_t n, index_t channels,
index_t in_height, index_t in_width, index_t out_height,
index_t out_width) {
if (out_height == in_height && out_width == in_width) {
std::copy(input, input + channels * in_height * in_width, output);
return;
......@@ -111,8 +108,8 @@ struct ResizeBilinearFunctor {
float height_scale =
CalculateResizeScale(in_height, out_height, align_corners_);
float
width_scale = CalculateResizeScale(in_width, out_width, align_corners_);
float width_scale =
CalculateResizeScale(in_width, out_width, align_corners_);
std::vector<CachedInterpolation> ys(out_height + 1);
std::vector<CachedInterpolation> xs(out_width + 1);
......@@ -121,12 +118,12 @@ struct ResizeBilinearFunctor {
ComputeInterpolationWeights(out_height, in_height, height_scale, ys.data());
ComputeInterpolationWeights(out_width, in_width, width_scale, xs.data());
ResizeImage(input, n, in_height, in_width, out_height, out_width,
channels, xs, ys, output);
ResizeImage(input, n, in_height, in_width, out_height, out_width, channels,
xs, ys, output);
}
};
} // namespace kernels
} // namespace mace
} // namespace kernels
} // namespace mace
#endif // MACE_KERNELS_RESIZE_BILINEAR_H_
#endif // MACE_KERNELS_RESIZE_BILINEAR_H_
......@@ -10,6 +10,6 @@ REGISTER_CPU_OPERATOR(AddN, AddNOp<DeviceType::CPU, float>);
#if __ARM_NEON
REGISTER_NEON_OPERATOR(AddN, AddNOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
#endif // __ARM_NEON
} // namespace mace
} // namespace mace
......@@ -10,10 +10,10 @@
namespace mace {
template<DeviceType D, class T>
template <DeviceType D, class T>
class AddNOp : public Operator<D, T> {
public:
AddNOp(const OperatorDef &operator_def, Workspace *ws)
AddNOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<D, T>(operator_def, ws) {}
bool Run() override {
......@@ -36,6 +36,6 @@ class AddNOp : public Operator<D, T> {
kernels::AddNFunctor<D, T> functor_;
};
} // namespace mace
} // namespace mace
#endif // MACE_OPS_ADDN_H_
#endif // MACE_OPS_ADDN_H_
......@@ -10,7 +10,6 @@
namespace mace {
template <DeviceType D, typename T>
static void AddNBenchmark(int iters, int n, int size) {
mace::testing::StopTiming();
OpsTestNet net;
......@@ -18,8 +17,7 @@ static void AddNBenchmark(int iters, int n, int size) {
for (int i = 0; i < n; ++i) {
op_def_builder.Input(internal::MakeString("Input", i).c_str());
}
op_def_builder.Output("Output")
.Finalize(net.operator_def());
op_def_builder.Output("Output").Finalize(net.operator_def());
// Add input data
for (int i = 0; i < n; ++i) {
......@@ -32,27 +30,26 @@ static void AddNBenchmark(int iters, int n, int size) {
}
mace::testing::StartTiming();
while(iters--) {
while (iters--) {
net.RunOp(D);
}
}
#define BM_ADDN_MACRO(N, SIZE, TYPE, DEVICE) \
static void BM_ADDN_##N##_##SIZE##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * SIZE; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot * (sizeof(TYPE))); \
AddNBenchmark<DEVICE, TYPE>(iters, N, SIZE); \
} \
#define BM_ADDN_MACRO(N, SIZE, TYPE, DEVICE) \
static void BM_ADDN_##N##_##SIZE##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * SIZE; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot*(sizeof(TYPE))); \
AddNBenchmark<DEVICE, TYPE>(iters, N, SIZE); \
} \
BENCHMARK(BM_ADDN_##N##_##SIZE##_##TYPE##_##DEVICE)
#define BM_ADDN(N, SIZE, TYPE) \
BM_ADDN_MACRO(N, SIZE, TYPE, CPU); \
#define BM_ADDN(N, SIZE, TYPE) \
BM_ADDN_MACRO(N, SIZE, TYPE, CPU); \
BM_ADDN_MACRO(N, SIZE, TYPE, NEON);
BM_ADDN(10, 1000, float);
BM_ADDN(10, 10000, float);
BM_ADDN(100, 1000, float);
BM_ADDN(100, 10000, float);
} // namespace mace
\ No newline at end of file
} // namespace mace
\ No newline at end of file
......@@ -36,4 +36,4 @@ TEST_F(AddnOpTest, AddnOp) {
ExpectTensorNear<float>(expected, *net.GetOutput("Output"), 0.01);
}
} // namespace mace
} // namespace mace
......@@ -10,6 +10,6 @@ REGISTER_CPU_OPERATOR(BatchNorm, BatchNormOp<DeviceType::CPU, float>);
#if __ARM_NEON
REGISTER_NEON_OPERATOR(BatchNorm, BatchNormOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
#endif // __ARM_NEON
} // namespace mace
\ No newline at end of file
} // namespace mace
\ No newline at end of file
......@@ -10,50 +10,55 @@
namespace mace {
template<DeviceType D, class T>
template <DeviceType D, class T>
class BatchNormOp : public Operator<D, T> {
public:
BatchNormOp(const OperatorDef &operator_def, Workspace *ws)
: Operator<D, T>(operator_def, ws),
functor_(OperatorBase::GetSingleArgument<float>("variance_epsilon", 1e-4)){}
bool Run() override {
const Tensor* input = this->Input(0);
const Tensor* scale = this->Input(1);
const Tensor* offset = this->Input(2);
const Tensor* mean = this->Input(3);
const Tensor* var = this->Input(4);
MACE_CHECK(input->dim_size() == 4, "input must be 4-dimensional. ", input->dim_size());
MACE_CHECK(scale->dim_size() == 1, "scale must be 1-dimensional. ", scale->dim_size());
MACE_CHECK(offset->dim_size() == 1, "offset must be 1-dimensional. ", offset->dim_size());
MACE_CHECK(mean->dim_size() == 1, "mean must be 1-dimensional. ", mean->dim_size());
MACE_CHECK(var->dim_size() == 1, "var must be 1-dimensional. ", var->dim_size());
Tensor* output = this->Output(0);
output->ResizeLike(input);
const index_t n = input->dim(0);
const index_t channel = input->dim(1);
const index_t sample_size = input->dim(2) * input->dim(3);
const T* input_ptr = input->data<T>();
const T* scale_ptr = scale->data<T>();
const T* offset_ptr = offset->data<T>();
const T* mean_ptr = mean->data<T>();
const T* var_ptr = var->data<T>();
T* output_ptr = output->mutable_data<T>();
functor_(input_ptr, scale_ptr, offset_ptr, mean_ptr, var_ptr,
n, channel, sample_size,
output_ptr);
return true;
}
private:
kernels::BatchNormFunctor<D, T> functor_;
public:
BatchNormOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<D, T>(operator_def, ws),
functor_(
OperatorBase::GetSingleArgument<float>("variance_epsilon", 1e-4)) {}
bool Run() override {
const Tensor* input = this->Input(0);
const Tensor* scale = this->Input(1);
const Tensor* offset = this->Input(2);
const Tensor* mean = this->Input(3);
const Tensor* var = this->Input(4);
MACE_CHECK(input->dim_size() == 4, "input must be 4-dimensional. ",
input->dim_size());
MACE_CHECK(scale->dim_size() == 1, "scale must be 1-dimensional. ",
scale->dim_size());
MACE_CHECK(offset->dim_size() == 1, "offset must be 1-dimensional. ",
offset->dim_size());
MACE_CHECK(mean->dim_size() == 1, "mean must be 1-dimensional. ",
mean->dim_size());
MACE_CHECK(var->dim_size() == 1, "var must be 1-dimensional. ",
var->dim_size());
Tensor* output = this->Output(0);
output->ResizeLike(input);
const index_t n = input->dim(0);
const index_t channel = input->dim(1);
const index_t sample_size = input->dim(2) * input->dim(3);
const T* input_ptr = input->data<T>();
const T* scale_ptr = scale->data<T>();
const T* offset_ptr = offset->data<T>();
const T* mean_ptr = mean->data<T>();
const T* var_ptr = var->data<T>();
T* output_ptr = output->mutable_data<T>();
functor_(input_ptr, scale_ptr, offset_ptr, mean_ptr, var_ptr, n, channel,
sample_size, output_ptr);
return true;
}
private:
kernels::BatchNormFunctor<D, T> functor_;
};
} // namespace mace
} // namespace mace
#endif // MACE_BATCH_NORM_H_
#endif // MACE_BATCH_NORM_H_
......@@ -8,19 +8,19 @@
namespace mace {
template <DeviceType D, typename T>
static void BatchNorm(int iters, int batch, int channels, int height, int width) {
static void BatchNorm(int iters, int batch, int channels, int height,
int width) {
mace::testing::StopTiming();
OpsTestNet net;
OpDefBuilder("BatchNorm", "BatchNormBM")
.Input("Input")
.Input("Scale")
.Input("Offset")
.Input("Mean")
.Input("Var")
.Output("Output")
.Finalize(net.operator_def());
.Input("Input")
.Input("Scale")
.Input("Offset")
.Input("Mean")
.Input("Var")
.Output("Output")
.Finalize(net.operator_def());
// Add input data
net.AddRandomInput<T>("Input", {batch, channels, height, width});
......@@ -35,23 +35,23 @@ static void BatchNorm(int iters, int batch, int channels, int height, int width)
}
mace::testing::StartTiming();
while(iters--) {
while (iters--) {
net.RunOp(D);
}
}
#define BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot * (sizeof(TYPE))); \
BatchNorm<DEVICE, TYPE>(iters, N, C, H, W); \
} \
#define BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot*(sizeof(TYPE))); \
BatchNorm<DEVICE, TYPE>(iters, N, C, H, W); \
} \
BENCHMARK(BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_BATCH_NORM(N, C, H, W, TYPE) \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \
#define BM_BATCH_NORM(N, C, H, W, TYPE) \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, NEON);
BM_BATCH_NORM(1, 1, 512, 512, float);
......@@ -65,4 +65,4 @@ BM_BATCH_NORM(1, 128, 256, 256, float);
BM_BATCH_NORM(1, 128, 512, 512, float);
BM_BATCH_NORM(32, 1, 256, 256, float);
BM_BATCH_NORM(32, 3, 256, 256, float);
} // namespace mace
\ No newline at end of file
} // namespace mace
\ No newline at end of file
......@@ -13,17 +13,17 @@ TEST_F(BatchNormOpTest, SimpleCPU) {
// Construct graph
auto& net = test_net();
OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("Input")
.Input("Scale")
.Input("Offset")
.Input("Mean")
.Input("Var")
.Output("Output")
.Finalize(net.operator_def());
.Input("Input")
.Input("Scale")
.Input("Offset")
.Input("Mean")
.Input("Var")
.Output("Output")
.Finalize(net.operator_def());
// Add input data
net.AddInputFromArray<float>("Input", {1, 1, 6, 2},
{5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15});
{5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15});
net.AddInputFromArray<float>("Scale", {1}, {4.0f});
net.AddInputFromArray<float>("Offset", {1}, {2.0});
net.AddInputFromArray<float>("Mean", {1}, {10});
......@@ -33,8 +33,8 @@ TEST_F(BatchNormOpTest, SimpleCPU) {
net.RunOp();
// Check
auto expected = CreateTensor<float>({1, 1, 6, 2},
{-3.86, -3.86, -1.51, -1.51, 0.83, 0.83,
auto expected =
CreateTensor<float>({1, 1, 6, 2}, {-3.86, -3.86, -1.51, -1.51, 0.83, 0.83,
3.17, 3.17, 5.51, 5.51, 7.86, 7.86});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.01);
......@@ -51,13 +51,13 @@ TEST_F(BatchNormOpTest, SimpleNeon) {
// Construct graph
auto& net = test_net();
OpDefBuilder("BatchNorm", "BatchNormTest")
.Input("Input")
.Input("Scale")
.Input("Offset")
.Input("Mean")
.Input("Var")
.Output("Output")
.Finalize(net.operator_def());
.Input("Input")
.Input("Scale")
.Input("Offset")
.Input("Mean")
.Input("Var")
.Output("Output")
.Finalize(net.operator_def());
// Add input data
net.AddRandomInput<float>("Input", {batch, channels, height, width});
......@@ -77,5 +77,4 @@ TEST_F(BatchNormOpTest, SimpleNeon) {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
}
}
......@@ -11,6 +11,6 @@ REGISTER_CPU_OPERATOR(Conv2d, Conv2dOp<DeviceType::CPU, float>);
#if __ARM_NEON
REGISTER_NEON_OPERATOR(Conv2d, Conv2dOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
#endif // __ARM_NEON
} // namespace mace
} // namespace mace
......@@ -13,11 +13,11 @@
namespace mace {
template<DeviceType D, typename T>
template <DeviceType D, typename T>
class Conv2dOp : public ConvPool2dOpBase<D, T> {
public:
Conv2dOp(const OperatorDef& op_def, Workspace* ws)
: ConvPool2dOpBase<D, T>(op_def, ws) {};
: ConvPool2dOpBase<D, T>(op_def, ws){};
bool Run() override {
const Tensor* input = this->Input(INPUT);
......@@ -27,21 +27,16 @@ class Conv2dOp : public ConvPool2dOpBase<D, T> {
std::vector<index_t> output_shape(4);
std::vector<int> paddings(2);
kernels::CalcPaddingAndOutputSize(input->shape().data(),
filter->shape().data(),
this->dilations_.data(),
this->strides_.data(),
this->padding_,
output_shape.data(),
paddings.data());
kernels::CalcPaddingAndOutputSize(
input->shape().data(), filter->shape().data(), this->dilations_.data(),
this->strides_.data(), this->padding_, output_shape.data(),
paddings.data());
output->Resize(output_shape);
auto conv2d = kernels::Conv2dFunctor<D, T>(this->strides_.data(),
paddings.data(),
this->dilations_.data());
conv2d(input->data<T>(), input->shape().data(),
filter->data<T>(), filter->shape().data(),
bias->data<T>(), output->mutable_data<T>(),
auto conv2d = kernels::Conv2dFunctor<D, T>(
this->strides_.data(), paddings.data(), this->dilations_.data());
conv2d(input->data<T>(), input->shape().data(), filter->data<T>(),
filter->shape().data(), bias->data<T>(), output->mutable_data<T>(),
output->shape().data());
return true;
......@@ -52,6 +47,6 @@ class Conv2dOp : public ConvPool2dOpBase<D, T> {
OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace mace
} // namespace mace
#endif // MACE_OPS_CONV_2D_H_
#endif // MACE_OPS_CONV_2D_H_
......@@ -13,17 +13,17 @@ namespace mace {
template <DeviceType D, typename T>
static void Conv2d(int iters, int batch, int channels, int height, int width,
int kernel_h, int kernel_w, int stride,
Padding padding, int output_channels) {
int kernel_h, int kernel_w, int stride, Padding padding,
int output_channels) {
mace::testing::StopTiming();
OpsTestNet net;
OpDefBuilder("Conv2d", "Conv2dTest")
.Input("Input")
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
.Input("Input")
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("strides", {stride, stride});
......@@ -32,7 +32,8 @@ static void Conv2d(int iters, int batch, int channels, int height, int width,
// Add input data
net.AddRandomInput<float>("Input", {batch, channels, height, width});
net.AddRandomInput<float>("Filter", {output_channels, channels, kernel_h, kernel_w});
net.AddRandomInput<float>("Filter",
{output_channels, channels, kernel_h, kernel_w});
net.AddRandomInput<float>("Bias", {output_channels});
// Warm-up
......@@ -41,27 +42,30 @@ static void Conv2d(int iters, int batch, int channels, int height, int width,
}
mace::testing::StartTiming();
while(iters--) {
while (iters--) {
net.RunOp(D);
}
}
#define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, DEVICE) \
static void BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot * (sizeof(TYPE))); \
Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, OC); \
} \
BENCHMARK(BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE)
#define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, DEVICE) \
static void \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot*(sizeof(TYPE))); \
Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, \
OC); \
} \
BENCHMARK( \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE)
#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \
#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, NEON);
BM_CONV_2D(1, 64, 32, 32, 1, 1, 1, VALID, 128, float);
BM_CONV_2D(1, 64, 33, 31, 1, 1, 1, VALID, 128, float); // Test bad alignments
BM_CONV_2D(1, 64, 33, 31, 1, 1, 1, VALID, 128, float); // Test bad alignments
BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 128, float);
BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, VALID, 128, float);
BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, SAME, 128, float);
......@@ -71,4 +75,4 @@ BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, VALID, 128, float);
BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, SAME, 128, float);
BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, SAME, 128, float);
} // namespace mace
} // namespace mace
......@@ -2,8 +2,8 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/operator.h"
#include "mace/ops/conv_2d.h"
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
using namespace mace;
......@@ -14,11 +14,11 @@ TEST_F(Conv2dOpTest, Simple_VALID) {
// Construct graph
auto& net = test_net();
OpDefBuilder("Conv2d", "Conv2dTest")
.Input("Input")
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
.Input("Input")
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("strides", {1, 1});
......@@ -26,17 +26,13 @@ TEST_F(Conv2dOpTest, Simple_VALID) {
net.AddIntsArg("dilations", {1, 1});
// Add input data
net.AddInputFromArray<float>("Input", {1, 2, 3, 3},
{1, 1, 1,
1, 1, 1,
1, 1, 1,
1, 1, 1,
1, 1, 1,
1, 1, 1});
net.AddInputFromArray<float>("Filter", {1, 2, 3, 3},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});
net.AddInputFromArray<float>(
"Input", {1, 2, 3, 3},
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
net.AddInputFromArray<float>(
"Filter", {1, 2, 3, 3},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});
net.AddInputFromArray<float>("Bias", {1}, {0.1f});
// Run
......@@ -52,11 +48,11 @@ TEST_F(Conv2dOpTest, Simple_SAME) {
// Construct graph
auto& net = test_net();
OpDefBuilder("Conv2d", "Conv2dTest")
.Input("Input")
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
.Input("Input")
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("strides", {1, 1});
......@@ -64,27 +60,22 @@ TEST_F(Conv2dOpTest, Simple_SAME) {
net.AddIntsArg("dilations", {1, 1});
// Add input data
net.AddInputFromArray<float>("Input", {1, 2, 3, 3},
{1, 1, 1,
1, 1, 1,
1, 1, 1,
1, 1, 1,
1, 1, 1,
1, 1, 1});
net.AddInputFromArray<float>("Filter", {1, 2, 3, 3},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});
net.AddInputFromArray<float>(
"Input", {1, 2, 3, 3},
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
net.AddInputFromArray<float>(
"Filter", {1, 2, 3, 3},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f});
net.AddInputFromArray<float>("Bias", {1}, {0.1f});
// Run
net.RunOp();
// Check
auto expected = CreateTensor<float>({1, 1, 3, 3},
{ 8.1f, 12.1f, 8.1f,
12.1f, 18.1f, 12.1f,
8.1f, 12.1f, 8.1f});
auto expected = CreateTensor<float>(
{1, 1, 3, 3},
{8.1f, 12.1f, 8.1f, 12.1f, 18.1f, 12.1f, 8.1f, 12.1f, 8.1f});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
}
......@@ -93,11 +84,11 @@ TEST_F(Conv2dOpTest, Combined) {
// Construct graph
auto& net = test_net();
OpDefBuilder("Conv2d", "Conv2dTest")
.Input("Input")
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
.Input("Input")
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("strides", {2, 2});
......@@ -105,36 +96,24 @@ TEST_F(Conv2dOpTest, Combined) {
net.AddIntsArg("dilations", {1, 1});
// Add input data
net.AddInputFromArray<float>("Input", {1, 2, 5, 5},
{1, 1, 1, 1, 1,
1, 1, 1, 1, 1,
1, 1, 1, 1, 1,
1, 1, 1, 1, 1,
1, 1, 1, 1, 1,
1, 1, 1, 1, 1,
1, 1, 1, 1, 1,
1, 1, 1, 1, 1,
1, 1, 1, 1, 1,
1, 1, 1, 1, 1});
net.AddInputFromArray<float>("Filter", {2, 2, 3, 3},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f});
net.AddInputFromArray<float>(
"Input", {1, 2, 5, 5}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
net.AddInputFromArray<float>(
"Filter", {2, 2, 3, 3},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f,
0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f});
net.AddInputFromArray<float>("Bias", {2}, {0.1f, 0.2f});
// Run
net.RunOp();
// Check
auto expected = CreateTensor<float>({1, 2, 3, 3},
{ 8.1f, 12.1f, 8.1f,
12.1f, 18.1f, 12.1f,
8.1f, 12.1f, 8.1f,
4.2f, 6.2f, 4.2f,
6.2f, 9.2f, 6.2f,
4.2f, 6.2f, 4.2f});
auto expected = CreateTensor<float>(
{1, 2, 3, 3}, {8.1f, 12.1f, 8.1f, 12.1f, 18.1f, 12.1f, 8.1f, 12.1f, 8.1f,
4.2f, 6.2f, 4.2f, 6.2f, 9.2f, 6.2f, 4.2f, 6.2f, 4.2f});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
}
......@@ -143,11 +122,11 @@ TEST_F(Conv2dOpTest, Conv1x1) {
// Construct graph
auto& net = test_net();
OpDefBuilder("Conv2d", "Conv2dTest")
.Input("Input")
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
.Input("Input")
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("strides", {1, 1});
......@@ -155,38 +134,32 @@ TEST_F(Conv2dOpTest, Conv1x1) {
net.AddIntsArg("dilations", {1, 1});
// Add input data
net.AddInputFromArray<float>("Input", {1, 5, 3, 10},
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
net.AddInputFromArray<float>("Filter", {2, 5, 1, 1},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
2.0f, 2.0f, 2.0f, 2.0f, 2.0f});
net.AddInputFromArray<float>(
"Input", {1, 5, 3, 10},
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
net.AddInputFromArray<float>(
"Filter", {2, 5, 1, 1},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f});
net.AddInputFromArray<float>("Bias", {2}, {0.1f, 0.2f});
// Run
net.RunOp();
// Check
auto expected = CreateTensor<float>({1, 2, 3, 10},
{5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f,
5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f,
5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f,
10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f,
10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f,
10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f});
auto expected = CreateTensor<float>(
{1, 2, 3, 10},
{5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f,
5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f,
5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f, 5.1f,
10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f,
10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f,
10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f, 10.2f});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
}
......@@ -194,8 +167,7 @@ TEST_F(Conv2dOpTest, Conv1x1) {
// TODO we need more tests
TEST_F(Conv2dOpTest, ConvNxNS12) {
testing::internal::LogToStderr();
auto func = [&](int kernel_h, int kernel_w,
int stride_h, int stride_w,
auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w,
Padding type) {
srand(time(NULL));
......@@ -206,7 +178,7 @@ TEST_F(Conv2dOpTest, ConvNxNS12) {
index_t width = 7 + rand() % 100;
index_t output_channels = 1 + rand() % 50;
// Construct graph
auto &net = test_net();
auto& net = test_net();
OpDefBuilder("Conv2d", "Conv2dTest")
.Input("Input")
.Input("Filter")
......@@ -221,8 +193,8 @@ TEST_F(Conv2dOpTest, ConvNxNS12) {
// Add input data
net.AddRandomInput<float>("Input", {batch, input_channels, height, width});
net.AddRandomInput<float>("Filter", {output_channels, input_channels,
kernel_h, kernel_w});
net.AddRandomInput<float>(
"Filter", {output_channels, input_channels, kernel_h, kernel_w});
net.AddRandomInput<float>("Bias", {output_channels});
// run cpu
net.RunOp();
......
......@@ -10,16 +10,15 @@
namespace mace {
template<DeviceType D, class T>
template <DeviceType D, class T>
class ConvPool2dOpBase : public Operator<D, T> {
public:
ConvPool2dOpBase(const OperatorDef& op_def, Workspace* ws)
: Operator<D, T>(op_def, ws),
strides_(OperatorBase::GetRepeatedArgument<int>("strides")),
padding_(static_cast<Padding>(
OperatorBase::GetSingleArgument<int>("padding",
static_cast<int>(SAME)))),
dilations_(OperatorBase::GetRepeatedArgument<int>("dilations")) {}
: Operator<D, T>(op_def, ws),
strides_(OperatorBase::GetRepeatedArgument<int>("strides")),
padding_(static_cast<Padding>(OperatorBase::GetSingleArgument<int>(
"padding", static_cast<int>(SAME)))),
dilations_(OperatorBase::GetRepeatedArgument<int>("dilations")) {}
protected:
std::vector<int> strides_;
......@@ -27,6 +26,6 @@ class ConvPool2dOpBase : public Operator<D, T> {
std::vector<int> dilations_;
};
} // namespace mace
} // namespace mace
#endif // MACE_OPS_CONV_POOL_2D_BASE_H_
#endif // MACE_OPS_CONV_POOL_2D_BASE_H_
......@@ -43,31 +43,33 @@ class OpsTestNet {
public:
OpsTestNet() {}
template<typename T>
void AddInputFromArray(const char *name,
const std::vector<index_t> &shape,
template <typename T>
void AddInputFromArray(const char *name, const std::vector<index_t> &shape,
const std::vector<T> &data) {
Tensor *input = ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
Tensor *input =
ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
input->Resize(shape);
T *input_data = input->mutable_data<T>();
MACE_CHECK(input->size() == data.size());
memcpy(input_data, data.data(), data.size() * sizeof(T));
}
template<typename T>
void AddRepeatedInput(const char *name,
const std::vector<index_t> &shape,
const T data) {
Tensor *input = ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
template <typename T>
void AddRepeatedInput(const char *name, const std::vector<index_t> &shape,
const T data) {
Tensor *input =
ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
input->Resize(shape);
T *input_data = input->mutable_data<T>();
MACE_CHECK(input->size() == data.size());
std::fill(input_data, input_data + input->size(), data);
}
template<typename T>
void AddRandomInput(const char *name, const std::vector<index_t> &shape, bool positive = false) {
Tensor *input = ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
template <typename T>
void AddRandomInput(const char *name, const std::vector<index_t> &shape,
bool positive = false) {
Tensor *input =
ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
input->Resize(shape);
float *input_data = input->mutable_data<T>();
......@@ -76,12 +78,16 @@ class OpsTestNet {
std::normal_distribution<T> nd(0, 1);
std::generate(input_data, input_data + input->size(),
[&gen, &nd, positive] { return positive ? std::abs(nd(gen)) : nd(gen); });
[&gen, &nd, positive] {
return positive ? std::abs(nd(gen)) : nd(gen);
});
}
template<typename T>
void AddFixedInput(const char *name, const std::vector<index_t> &shape, T value) {
Tensor *input = ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
template <typename T>
void AddFixedInput(const char *name, const std::vector<index_t> &shape,
T value) {
Tensor *input =
ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
input->Resize(shape);
float *input_data = input->mutable_data<T>();
......@@ -122,7 +128,8 @@ class OpsTestNet {
}
}
void AddStringsArg(const char *name, const std::vector<const char *> &values) {
void AddStringsArg(const char *name,
const std::vector<const char *> &values) {
auto arg = op_def_.add_arg();
arg->set_name(name);
for (auto value : values) {
......@@ -145,9 +152,7 @@ class OpsTestNet {
return net_->Run();
}
bool RunOp() {
return RunOp(DeviceType::CPU);
}
bool RunOp() { return RunOp(DeviceType::CPU); }
Tensor *GetOutput(const char *output_name) {
return ws_.GetTensor(output_name);
......@@ -177,8 +182,9 @@ class OpsTestBase : public ::testing::Test {
OpsTestNet test_net_;
};
template<typename T>
unique_ptr<Tensor> CreateTensor(const std::vector<index_t> &shape, const std::vector<T> &data) {
template <typename T>
unique_ptr<Tensor> CreateTensor(const std::vector<index_t> &shape,
const std::vector<T> &data) {
unique_ptr<Tensor> res(new Tensor(cpu_allocator(), DataTypeToEnum<T>::v()));
res->Resize(shape);
T *input_data = res->mutable_data<T>();
......@@ -209,40 +215,38 @@ inline std::string ShapeToString(const Tensor &x) {
return std::string(stream.str());
}
template<typename T>
template <typename T>
struct is_floating_point_type {
static const bool value = std::is_same<T, float>::value ||
std::is_same<T, double>::value;
static const bool value =
std::is_same<T, float>::value || std::is_same<T, double>::value;
};
template<typename T>
template <typename T>
inline void ExpectEqual(const T &a, const T &b) {
EXPECT_EQ(a, b);
}
template<>
template <>
inline void ExpectEqual<float>(const float &a, const float &b) {
EXPECT_FLOAT_EQ(a, b);
}
template<>
template <>
inline void ExpectEqual<double>(const double &a, const double &b) {
EXPECT_DOUBLE_EQ(a, b);
}
inline void AssertSameTypeDims(const Tensor &x, const Tensor &y) {
ASSERT_EQ(x.dtype(), y.dtype());
ASSERT_TRUE(IsSameSize(x, y))
<< "x.shape [" << ShapeToString(x) << "] vs "
<< "y.shape [ " << ShapeToString(y) << "]";
ASSERT_TRUE(IsSameSize(x, y)) << "x.shape [" << ShapeToString(x) << "] vs "
<< "y.shape [ " << ShapeToString(y) << "]";
}
template<typename T, bool is_fp = is_floating_point_type<T>::value>
template <typename T, bool is_fp = is_floating_point_type<T>::value>
struct Expector;
// Partial specialization for float and double.
template<typename T>
template <typename T>
struct Expector<T, true> {
static void Equal(const T &a, const T &b) { ExpectEqual(a, b); }
......@@ -262,18 +266,19 @@ struct Expector<T, true> {
auto a = x.data<T>();
auto b = y.data<T>();
for (int i = 0; i < x.size(); ++i) {
EXPECT_NEAR(a[i], b[i], abs_err)
<< "a = " << a << " b = " << b << " index = " << i;
EXPECT_NEAR(a[i], b[i], abs_err) << "a = " << a << " b = " << b
<< " index = " << i;
}
}
};
template<typename T>
template <typename T>
void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) {
static_assert(is_floating_point_type<T>::value, "T is not a floating point type");
static_assert(is_floating_point_type<T>::value,
"T is not a floating point type");
Expector<T>::Near(x, y, abs_err);
}
} // namespace mace
} // namespace mace
#endif // MACE_OPS_TEST_UTIL_H_
#endif // MACE_OPS_TEST_UTIL_H_
......@@ -2,7 +2,6 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/ops/pooling.h"
namespace mace {
......@@ -11,6 +10,6 @@ REGISTER_CPU_OPERATOR(Pooling, PoolingOp<DeviceType::CPU, float>);
#if __ARM_NEON
REGISTER_NEON_OPERATOR(Pooling, PoolingOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
#endif // __ARM_NEON
} // namespace mace
} // namespace mace
......@@ -11,17 +11,17 @@
namespace mace {
template<DeviceType D, class T>
template <DeviceType D, class T>
class PoolingOp : public ConvPool2dOpBase<D, T> {
public:
public:
PoolingOp(const OperatorDef& op_def, Workspace* ws)
: ConvPool2dOpBase<D, T>(op_def, ws),
kernels_(OperatorBase::GetRepeatedArgument<int>("kernels")),
pooling_type_(static_cast<PoolingType>(
OperatorBase::GetSingleArgument<int>(
"pooling_type", static_cast<int>(AVG)))) {};
: ConvPool2dOpBase<D, T>(op_def, ws),
kernels_(OperatorBase::GetRepeatedArgument<int>("kernels")),
pooling_type_(
static_cast<PoolingType>(OperatorBase::GetSingleArgument<int>(
"pooling_type", static_cast<int>(AVG)))){};
bool Run() override{
bool Run() override {
const Tensor* input = this->Input(INPUT);
Tensor* output = this->Output(OUTPUT);
std::vector<index_t> in_shape = input->shape();
......@@ -33,28 +33,21 @@ public:
filter_shape[1] = in_shape[0];
filter_shape[2] = kernels_[0];
filter_shape[3] = kernels_[1];
kernels::CalcPaddingAndOutputSize(in_shape.data(),
filter_shape.data(),
kernels::CalcPaddingAndOutputSize(in_shape.data(), filter_shape.data(),
this->dilations_.data(),
this->strides_.data(),
this->padding_,
output_shape.data(),
paddings.data());
this->strides_.data(), this->padding_,
output_shape.data(), paddings.data());
output->Resize(output_shape);
auto pooling_func = kernels::PoolingFunctor<D, T>(pooling_type_,
kernels_.data(),
this->strides_.data(),
paddings.data(),
this->dilations_.data());
pooling_func(input->data<float>(),
in_shape.data(),
output->mutable_data<float>(),
output->shape().data());
auto pooling_func = kernels::PoolingFunctor<D, T>(
pooling_type_, kernels_.data(), this->strides_.data(), paddings.data(),
this->dilations_.data());
pooling_func(input->data<float>(), in_shape.data(),
output->mutable_data<float>(), output->shape().data());
return true;
};
protected:
protected:
std::vector<int> kernels_;
PoolingType pooling_type_;
......@@ -62,6 +55,6 @@ protected:
OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace mace
} // namespace mace
#endif //MACE_OPS_POOLING_H_
#endif // MACE_OPS_POOLING_H_
......@@ -2,20 +2,19 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/testing/test_benchmark.h"
#include "mace/core/operator.h"
#include "mace/kernels/pooling.h"
#include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/kernels/conv_pool_2d_util.h"
#include "mace/ops/ops_test_util.h"
using namespace mace;
using namespace mace::kernels;
template<DeviceType D>
static void Pooling(int iters, int batch, int channels, int height,
int width, int kernel, int stride, Padding padding,
template <DeviceType D>
static void Pooling(int iters, int batch, int channels, int height, int width,
int kernel, int stride, Padding padding,
PoolingType pooling_type) {
mace::testing::StopTiming();
OpsTestNet net;
......@@ -45,18 +44,21 @@ static void Pooling(int iters, int batch, int channels, int height,
}
}
#define BM_POOLING_MACRO(N, C, H, W, KE, STRIDE, PA, PO, DEVICE) \
static void BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot * (sizeof(float)));\
Pooling<DEVICE>(iters, N, C, H, W, KE, STRIDE, Padding::PA, PoolingType::PO); \
} \
BENCHMARK(BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE)
#define BM_POOLING_MACRO(N, C, H, W, KE, STRIDE, PA, PO, DEVICE) \
static void \
BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot*(sizeof(float))); \
Pooling<DEVICE>(iters, N, C, H, W, KE, STRIDE, Padding::PA, \
PoolingType::PO); \
} \
BENCHMARK( \
BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE)
#define BM_POOLING(N, C, H, W, K, S, PA, PO) \
BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, CPU); \
#define BM_POOLING(N, C, H, W, K, S, PA, PO) \
BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, CPU); \
BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, NEON);
BM_POOLING(1, 3, 129, 129, 2, 2, SAME, MAX);
......
......@@ -5,9 +5,9 @@
#include "gtest/gtest.h"
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
#include "mace/ops/conv_pool_2d_base.h"
#include "mace/kernels/pooling.h"
#include "mace/ops/conv_pool_2d_base.h"
#include "mace/ops/ops_test_util.h"
using namespace mace;
......@@ -17,9 +17,9 @@ TEST_F(PoolingOpTest, MAX_VALID) {
// Construct graph
auto& net = test_net();
OpDefBuilder("Pooling", "PoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("kernels", {2, 2});
......@@ -29,34 +29,28 @@ TEST_F(PoolingOpTest, MAX_VALID) {
net.AddIntArg("pooling_type", PoolingType::MAX);
// Add input data
net.AddInputFromArray<float>("Input", {1, 2, 4, 4},
{0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, 14, 15,
16, 17, 18, 19,
20, 21, 22, 23,
24, 25, 26, 27,
28, 29, 30, 31});
net.AddInputFromArray<float>(
"Input", {1, 2, 4, 4},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31});
// Run
net.RunOp();
// Check
auto expected = CreateTensor<float>({1, 2, 2, 2},
{5, 7, 13, 15, 21, 23, 29, 31});
auto expected =
CreateTensor<float>({1, 2, 2, 2}, {5, 7, 13, 15, 21, 23, 29, 31});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
}
TEST_F(PoolingOpTest, AVG_VALID) {
// Construct graph
auto& net = test_net();
OpDefBuilder("Pooling", "PoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("kernels", {2, 2});
......@@ -66,22 +60,17 @@ TEST_F(PoolingOpTest, AVG_VALID) {
net.AddIntArg("pooling_type", PoolingType::AVG);
// Add input data
net.AddInputFromArray<float>("Input", {1, 2, 4, 4},
{0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, 14, 15,
16, 17, 18, 19,
20, 21, 22, 23,
24, 25, 26, 27,
28, 29, 30, 31});
net.AddInputFromArray<float>(
"Input", {1, 2, 4, 4},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31});
// Run
net.RunOp();
// Check
auto expected = CreateTensor<float>({1, 2, 2, 2},
{2.5, 4.5, 10.5, 12.5, 18.5, 20.5, 26.5, 28.5});
auto expected = CreateTensor<float>(
{1, 2, 2, 2}, {2.5, 4.5, 10.5, 12.5, 18.5, 20.5, 26.5, 28.5});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
}
......@@ -90,9 +79,9 @@ TEST_F(PoolingOpTest, MAX_SAME) {
// Construct graph
auto& net = test_net();
OpDefBuilder("Pooling", "PoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("kernels", {2, 2});
......@@ -103,16 +92,13 @@ TEST_F(PoolingOpTest, MAX_SAME) {
// Add input data
net.AddInputFromArray<float>("Input", {1, 1, 3, 3},
{0, 1, 2,
3, 4, 5,
6, 7, 8});
{0, 1, 2, 3, 4, 5, 6, 7, 8});
// Run
net.RunOp();
// Check
auto expected = CreateTensor<float>({1, 1, 2, 2},
{4, 5, 7, 8});
auto expected = CreateTensor<float>({1, 1, 2, 2}, {4, 5, 7, 8});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
}
......@@ -121,9 +107,9 @@ TEST_F(PoolingOpTest, MAX_VALID_DILATION) {
// Construct graph
auto& net = test_net();
OpDefBuilder("Pooling", "PoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("kernels", {2, 2});
......@@ -133,18 +119,15 @@ TEST_F(PoolingOpTest, MAX_VALID_DILATION) {
net.AddIntArg("pooling_type", PoolingType::MAX);
// Add input data
net.AddInputFromArray<float>("Input", {1, 1, 4, 4},
{0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, 14, 15});
net.AddInputFromArray<float>(
"Input", {1, 1, 4, 4},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15});
// Run
net.RunOp();
// Check
auto expected = CreateTensor<float>({1, 1, 2, 2},
{10, 11, 14, 15});
auto expected = CreateTensor<float>({1, 1, 2, 2}, {10, 11, 14, 15});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 0.001);
}
......@@ -153,9 +136,9 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) {
// Construct graph
auto& net = test_net();
OpDefBuilder("Pooling", "PoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntArg("pooling_type", PoolingType::MAX);
......@@ -165,18 +148,14 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) {
net.AddIntsArg("dilations", {1, 1});
// Add input data
net.AddInputFromArray<float>("Input", {1, 1, 4, 5},
{0, 1, 2, 3, 4,
5, 6, 7, 8, 9,
10, 11, 12, 13, 14,
15, 16, 17, 18, 19});
net.AddInputFromArray<float>(
"Input", {1, 1, 4, 5},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19});
// Run
net.RunOp(DeviceType::NEON);
// Check
Tensor expected = CreateTensor<float>({1, 1, 2, 3},
{6, 8, 9,
16, 18, 19});
Tensor expected = CreateTensor<float>({1, 1, 2, 3}, {6, 8, 9, 16, 18, 19});
ExpectTensorNear<float>(expected, *net.GetOutput("Output"), 0.001);
}
......@@ -185,9 +164,9 @@ TEST_F(PoolingOpTest, MAX_k3x3s2x2) {
// Construct graph
auto& net = test_net();
OpDefBuilder("Pooling", "PoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntArg("pooling_type", PoolingType::MAX);
......@@ -197,18 +176,14 @@ TEST_F(PoolingOpTest, MAX_k3x3s2x2) {
net.AddIntsArg("dilations", {1, 1});
// Add input data
net.AddInputFromArray<float>("Input", {1, 1, 4, 5},
{0, 1, 2, 3, 4,
5, 6, 7, 8, 9,
10, 11, 12, 13, 14,
15, 16, 17, 18, 19});
net.AddInputFromArray<float>(
"Input", {1, 1, 4, 5},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19});
// Run
net.RunOp(DeviceType::NEON);
// Check
Tensor expected = CreateTensor<float>({1, 1, 2, 3},
{11, 13, 14,
16, 18, 19});
Tensor expected = CreateTensor<float>({1, 1, 2, 3}, {11, 13, 14, 16, 18, 19});
ExpectTensorNear<float>(expected, *net.GetOutput("Output"), 0.001);
}
......@@ -10,6 +10,6 @@ REGISTER_CPU_OPERATOR(Relu, ReluOp<DeviceType::CPU, float>);
#if __ARM_NEON
REGISTER_NEON_OPERATOR(Relu, ReluOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
#endif // __ARM_NEON
} // namespace mace
} // namespace mace
......@@ -10,10 +10,10 @@
namespace mace {
template<DeviceType D, class T>
template <DeviceType D, class T>
class ReluOp : public Operator<D, T> {
public:
ReluOp(const OperatorDef &operator_def, Workspace *ws)
ReluOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<D, T>(operator_def, ws) {}
bool Run() override {
const Tensor* input_tensor = this->inputs_[0];
......@@ -31,6 +31,6 @@ class ReluOp : public Operator<D, T> {
kernels::ReluFunctor<D, T> functor_;
};
} // namespace mace
} // namespace mace
#endif // MACE_OPS_RELU_H_
#endif // MACE_OPS_RELU_H_
......@@ -10,7 +10,6 @@
namespace mace {
template <DeviceType D, typename T>
static void ReluBenchmark(int iters, int size) {
mace::testing::StopTiming();
OpsTestNet net;
......@@ -28,26 +27,25 @@ static void ReluBenchmark(int iters, int size) {
}
mace::testing::StartTiming();
while(iters--) {
while (iters--) {
net.RunOp(D);
}
}
#define BM_RELU_MACRO(SIZE, TYPE, DEVICE) \
static void BM_RELU_##SIZE##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * SIZE; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot * (sizeof(TYPE))); \
ReluBenchmark<DEVICE, TYPE>(iters, SIZE); \
} \
#define BM_RELU_MACRO(SIZE, TYPE, DEVICE) \
static void BM_RELU_##SIZE##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * SIZE; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot*(sizeof(TYPE))); \
ReluBenchmark<DEVICE, TYPE>(iters, SIZE); \
} \
BENCHMARK(BM_RELU_##SIZE##_##TYPE##_##DEVICE)
#define BM_RELU(SIZE, TYPE) \
BM_RELU_MACRO(SIZE, TYPE, CPU); \
#define BM_RELU(SIZE, TYPE) \
BM_RELU_MACRO(SIZE, TYPE, CPU); \
BM_RELU_MACRO(SIZE, TYPE, NEON);
BM_RELU(1000, float);
BM_RELU(100000, float);
BM_RELU(10000000, float);
} // namespace mace
\ No newline at end of file
} // namespace mace
\ No newline at end of file
......@@ -32,4 +32,4 @@ TEST_F(ReluOpTest, ReluOp) {
ExpectTensorNear<float>(expected, *net.GetOutput("Output"), 0.01);
}
} // namespace mace
} // namespace mace
......@@ -9,7 +9,8 @@ namespace mace {
REGISTER_CPU_OPERATOR(ResizeBilinear, ResizeBilinearOp<DeviceType::CPU, float>);
#if __ARM_NEON
REGISTER_NEON_OPERATOR(ResizeBilinear, ResizeBilinearOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
REGISTER_NEON_OPERATOR(ResizeBilinear,
ResizeBilinearOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
} // namespace mace
} // namespace mace
......@@ -5,18 +5,18 @@
#ifndef MACE_RESIZE_BILINEAR_H
#define MACE_RESIZE_BILINEAR_H
#include "mace/core/operator.h"
#include "mace/kernels/resize_bilinear.h"
namespace mace {
template<DeviceType D, class T>
template <DeviceType D, class T>
class ResizeBilinearOp : public Operator<D, T> {
public:
ResizeBilinearOp(const OperatorDef &operator_def, Workspace *ws)
ResizeBilinearOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<D, T>(operator_def, ws),
functor_(OperatorBase::GetSingleArgument<bool>("align_corners", false)) {}
functor_(
OperatorBase::GetSingleArgument<bool>("align_corners", false)) {}
bool Run() override {
const Tensor* input = this->Input(0);
......@@ -24,8 +24,8 @@ class ResizeBilinearOp : public Operator<D, T> {
MACE_CHECK(input->dim_size() == 4, "input must be 4-dimensional.",
input->dim_size());
MACE_CHECK(resize_dims->dim_size() == 1, "resize dim must be 2-dimensional.",
resize_dims->dim_size());
MACE_CHECK(resize_dims->dim_size() == 1,
"resize dim must be 2-dimensional.", resize_dims->dim_size());
Tensor* output = this->Output(0);
......@@ -35,7 +35,7 @@ class ResizeBilinearOp : public Operator<D, T> {
index_t in_width = input->dim(3);
index_t out_height = resize_dims->data<index_t>()[0];
index_t out_width = resize_dims->data<index_t>()[1];
vector<index_t> out_shape {n, channels, out_height, out_width};
vector<index_t> out_shape{n, channels, out_height, out_width};
output->Resize(out_shape);
const T* input_ptr = input->data<T>();
......@@ -45,10 +45,11 @@ class ResizeBilinearOp : public Operator<D, T> {
out_height, out_width);
return true;
}
private:
kernels::ResizeBilinearFunctor<D, T> functor_;
};
} // namespace mace
} // namespace mace
#endif // MACE_RESIZE_BILINEAR_H
#endif // MACE_RESIZE_BILINEAR_H
......@@ -2,9 +2,9 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/ops/resize_bilinear.h"
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
#include "mace/ops/resize_bilinear.h"
using namespace mace;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册