提交 291a5ee6 编写于 作者: L Liangliang He

set clang-format to BinPackParameters=false

上级 578b382a
...@@ -30,7 +30,8 @@ inline void MakeStringInternal(std::stringstream& ss, const T& t) { ...@@ -30,7 +30,8 @@ inline void MakeStringInternal(std::stringstream& ss, const T& t) {
} }
template <typename T, typename... Args> template <typename T, typename... Args>
inline void MakeStringInternal(std::stringstream& ss, const T& t, inline void MakeStringInternal(std::stringstream& ss,
const T& t,
const Args&... args) { const Args&... args) {
MakeStringInternal(ss, t); MakeStringInternal(ss, t);
MakeStringInternal(ss, args...); MakeStringInternal(ss, args...);
......
...@@ -6,12 +6,14 @@ ...@@ -6,12 +6,14 @@
namespace mace { namespace mace {
NetBase::NetBase(const std::shared_ptr<const NetDef>& net_def, Workspace* ws, NetBase::NetBase(const std::shared_ptr<const NetDef>& net_def,
Workspace* ws,
DeviceType type) DeviceType type)
: name_(net_def->name()) {} : name_(net_def->name()) {}
SimpleNet::SimpleNet(const std::shared_ptr<const NetDef>& net_def, SimpleNet::SimpleNet(const std::shared_ptr<const NetDef>& net_def,
Workspace* ws, DeviceType type) Workspace* ws,
DeviceType type)
: NetBase(net_def, ws, type) { : NetBase(net_def, ws, type) {
VLOG(1) << "Constructing SimpleNet " << net_def->name(); VLOG(1) << "Constructing SimpleNet " << net_def->name();
for (int idx = 0; idx < net_def->op_size(); ++idx) { for (int idx = 0; idx < net_def->op_size(); ++idx) {
...@@ -37,14 +39,16 @@ bool SimpleNet::Run() { ...@@ -37,14 +39,16 @@ bool SimpleNet::Run() {
return true; return true;
} }
unique_ptr<NetBase> CreateNet(const NetDef& net_def, Workspace* ws, unique_ptr<NetBase> CreateNet(const NetDef& net_def,
Workspace* ws,
DeviceType type) { DeviceType type) {
std::shared_ptr<NetDef> tmp_net_def(new NetDef(net_def)); std::shared_ptr<NetDef> tmp_net_def(new NetDef(net_def));
return CreateNet(tmp_net_def, ws, type); return CreateNet(tmp_net_def, ws, type);
} }
unique_ptr<NetBase> CreateNet(const std::shared_ptr<const NetDef>& net_def, unique_ptr<NetBase> CreateNet(const std::shared_ptr<const NetDef>& net_def,
Workspace* ws, DeviceType type) { Workspace* ws,
DeviceType type) {
unique_ptr<NetBase> net(new SimpleNet(net_def, ws, type)); unique_ptr<NetBase> net(new SimpleNet(net_def, ws, type));
return net; return net;
} }
......
...@@ -14,7 +14,8 @@ namespace mace { ...@@ -14,7 +14,8 @@ namespace mace {
class NetBase { class NetBase {
public: public:
NetBase(const std::shared_ptr<const NetDef>& net_def, Workspace* ws, NetBase(const std::shared_ptr<const NetDef>& net_def,
Workspace* ws,
DeviceType type); DeviceType type);
virtual ~NetBase() noexcept {} virtual ~NetBase() noexcept {}
...@@ -30,7 +31,8 @@ class NetBase { ...@@ -30,7 +31,8 @@ class NetBase {
class SimpleNet : public NetBase { class SimpleNet : public NetBase {
public: public:
SimpleNet(const std::shared_ptr<const NetDef>& net_def, Workspace* ws, SimpleNet(const std::shared_ptr<const NetDef>& net_def,
Workspace* ws,
DeviceType type); DeviceType type);
bool Run() override; bool Run() override;
...@@ -41,10 +43,12 @@ class SimpleNet : public NetBase { ...@@ -41,10 +43,12 @@ class SimpleNet : public NetBase {
DISABLE_COPY_AND_ASSIGN(SimpleNet); DISABLE_COPY_AND_ASSIGN(SimpleNet);
}; };
unique_ptr<NetBase> CreateNet(const NetDef& net_def, Workspace* ws, unique_ptr<NetBase> CreateNet(const NetDef& net_def,
Workspace* ws,
DeviceType type); DeviceType type);
unique_ptr<NetBase> CreateNet(const std::shared_ptr<const NetDef>& net_def, unique_ptr<NetBase> CreateNet(const std::shared_ptr<const NetDef>& net_def,
Workspace* ws, DeviceType type); Workspace* ws,
DeviceType type);
} // namespace mace } // namespace mace
......
...@@ -11,16 +11,21 @@ std::map<int32_t, OperatorRegistry*>* gDeviceTypeRegistry() { ...@@ -11,16 +11,21 @@ std::map<int32_t, OperatorRegistry*>* gDeviceTypeRegistry() {
return &g_device_type_registry; return &g_device_type_registry;
} }
MACE_DEFINE_REGISTRY(CPUOperatorRegistry, OperatorBase, const OperatorDef&, MACE_DEFINE_REGISTRY(CPUOperatorRegistry,
OperatorBase,
const OperatorDef&,
Workspace*); Workspace*);
MACE_REGISTER_DEVICE_TYPE(DeviceType::CPU, CPUOperatorRegistry); MACE_REGISTER_DEVICE_TYPE(DeviceType::CPU, CPUOperatorRegistry);
MACE_DEFINE_REGISTRY(NEONOperatorRegistry, OperatorBase, const OperatorDef&, MACE_DEFINE_REGISTRY(NEONOperatorRegistry,
OperatorBase,
const OperatorDef&,
Workspace*); Workspace*);
MACE_REGISTER_DEVICE_TYPE(DeviceType::NEON, NEONOperatorRegistry); MACE_REGISTER_DEVICE_TYPE(DeviceType::NEON, NEONOperatorRegistry);
unique_ptr<OperatorBase> CreateOperator(const OperatorDef& operator_def, unique_ptr<OperatorBase> CreateOperator(const OperatorDef& operator_def,
Workspace* ws, DeviceType type) { Workspace* ws,
DeviceType type) {
OperatorRegistry* registry = gDeviceTypeRegistry()->at(type); OperatorRegistry* registry = gDeviceTypeRegistry()->at(type);
return registry->Create(operator_def.type(), operator_def, ws); return registry->Create(operator_def.type(), operator_def, ws);
} }
......
...@@ -140,7 +140,9 @@ struct DeviceTypeRegisterer { ...@@ -140,7 +140,9 @@ struct DeviceTypeRegisterer {
type, &registry_function); \ type, &registry_function); \
} }
MACE_DECLARE_REGISTRY(CPUOperatorRegistry, OperatorBase, const OperatorDef &, MACE_DECLARE_REGISTRY(CPUOperatorRegistry,
OperatorBase,
const OperatorDef &,
Workspace *); Workspace *);
#define REGISTER_CPU_OPERATOR_CREATOR(key, ...) \ #define REGISTER_CPU_OPERATOR_CREATOR(key, ...) \
...@@ -148,7 +150,9 @@ MACE_DECLARE_REGISTRY(CPUOperatorRegistry, OperatorBase, const OperatorDef &, ...@@ -148,7 +150,9 @@ MACE_DECLARE_REGISTRY(CPUOperatorRegistry, OperatorBase, const OperatorDef &,
#define REGISTER_CPU_OPERATOR(name, ...) \ #define REGISTER_CPU_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__) MACE_REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__)
MACE_DECLARE_REGISTRY(NEONOperatorRegistry, OperatorBase, const OperatorDef &, MACE_DECLARE_REGISTRY(NEONOperatorRegistry,
OperatorBase,
const OperatorDef &,
Workspace *); Workspace *);
#define REGISTER_NEON_OPERATOR_CREATOR(key, ...) \ #define REGISTER_NEON_OPERATOR_CREATOR(key, ...) \
...@@ -157,7 +161,8 @@ MACE_DECLARE_REGISTRY(NEONOperatorRegistry, OperatorBase, const OperatorDef &, ...@@ -157,7 +161,8 @@ MACE_DECLARE_REGISTRY(NEONOperatorRegistry, OperatorBase, const OperatorDef &,
MACE_REGISTER_CLASS(NEONOperatorRegistry, name, __VA_ARGS__) MACE_REGISTER_CLASS(NEONOperatorRegistry, name, __VA_ARGS__)
unique_ptr<OperatorBase> CreateOperator(const OperatorDef &operator_def, unique_ptr<OperatorBase> CreateOperator(const OperatorDef &operator_def,
Workspace *ws, DeviceType type); Workspace *ws,
DeviceType type);
} // namespace mace } // namespace mace
......
...@@ -316,7 +316,8 @@ const Argument& GetArgument(const OperatorDef& def, const string& name) { ...@@ -316,7 +316,8 @@ const Argument& GetArgument(const OperatorDef& def, const string& name) {
ProtoDebugString(def)); ProtoDebugString(def));
} }
bool GetFlagArgument(const OperatorDef& def, const string& name, bool GetFlagArgument(const OperatorDef& def,
const string& name,
bool def_value) { bool def_value) {
for (const Argument& arg : def.arg()) { for (const Argument& arg : def.arg()) {
if (arg.name() == name) { if (arg.name() == name) {
...@@ -328,7 +329,8 @@ bool GetFlagArgument(const OperatorDef& def, const string& name, ...@@ -328,7 +329,8 @@ bool GetFlagArgument(const OperatorDef& def, const string& name,
return def_value; return def_value;
} }
Argument* GetMutableArgument(const string& name, const bool create_if_missing, Argument* GetMutableArgument(const string& name,
const bool create_if_missing,
OperatorDef* def) { OperatorDef* def) {
for (int i = 0; i < def->arg_size(); ++i) { for (int i = 0; i < def->arg_size(); ++i) {
if (def->arg(i).name() == name) { if (def->arg(i).name() == name) {
......
...@@ -107,7 +107,8 @@ inline bool ReadProtoFromFile(const string& filename, Message* proto) { ...@@ -107,7 +107,8 @@ inline bool ReadProtoFromFile(const string& filename, Message* proto) {
template <class IterableInputs = std::initializer_list<string>, template <class IterableInputs = std::initializer_list<string>,
class IterableOutputs = std::initializer_list<string>, class IterableOutputs = std::initializer_list<string>,
class IterableArgs = std::initializer_list<Argument>> class IterableArgs = std::initializer_list<Argument>>
OperatorDef CreateOperatorDef(const string& type, const string& name, OperatorDef CreateOperatorDef(const string& type,
const string& name,
const IterableInputs& inputs, const IterableInputs& inputs,
const IterableOutputs& outputs, const IterableOutputs& outputs,
const IterableArgs& args) { const IterableArgs& args) {
...@@ -130,7 +131,8 @@ OperatorDef CreateOperatorDef(const string& type, const string& name, ...@@ -130,7 +131,8 @@ OperatorDef CreateOperatorDef(const string& type, const string& name,
// to specify args. // to specify args.
template <class IterableInputs = std::initializer_list<string>, template <class IterableInputs = std::initializer_list<string>,
class IterableOutputs = std::initializer_list<string>> class IterableOutputs = std::initializer_list<string>>
inline OperatorDef CreateOperatorDef(const string& type, const string& name, inline OperatorDef CreateOperatorDef(const string& type,
const string& name,
const IterableInputs& inputs, const IterableInputs& inputs,
const IterableOutputs& outputs) { const IterableOutputs& outputs) {
return CreateOperatorDef(type, name, inputs, outputs, return CreateOperatorDef(type, name, inputs, outputs,
...@@ -153,7 +155,8 @@ class ArgumentHelper { ...@@ -153,7 +155,8 @@ class ArgumentHelper {
} }
template <typename Def, typename T> template <typename Def, typename T>
static T GetSingleArgument(const Def& def, const string& name, static T GetSingleArgument(const Def& def,
const string& name,
const T& default_value) { const T& default_value) {
return ArgumentHelper(def).GetSingleArgument<T>(name, default_value); return ArgumentHelper(def).GetSingleArgument<T>(name, default_value);
} }
...@@ -165,7 +168,8 @@ class ArgumentHelper { ...@@ -165,7 +168,8 @@ class ArgumentHelper {
template <typename Def, typename T> template <typename Def, typename T>
static vector<T> GetRepeatedArgument( static vector<T> GetRepeatedArgument(
const Def& def, const string& name, const Def& def,
const string& name,
const std::vector<T>& default_value = std::vector<T>()) { const std::vector<T>& default_value = std::vector<T>()) {
return ArgumentHelper(def).GetRepeatedArgument<T>(name, default_value); return ArgumentHelper(def).GetRepeatedArgument<T>(name, default_value);
} }
...@@ -223,10 +227,12 @@ class ArgumentHelper { ...@@ -223,10 +227,12 @@ class ArgumentHelper {
}; };
const Argument& GetArgument(const OperatorDef& def, const string& name); const Argument& GetArgument(const OperatorDef& def, const string& name);
bool GetFlagArgument(const OperatorDef& def, const string& name, bool GetFlagArgument(const OperatorDef& def,
const string& name,
bool def_value = false); bool def_value = false);
Argument* GetMutableArgument(const string& name, const bool create_if_missing, Argument* GetMutableArgument(const string& name,
const bool create_if_missing,
OperatorDef* def); OperatorDef* def);
template <typename T> template <typename T>
......
...@@ -101,7 +101,8 @@ class Registerer { ...@@ -101,7 +101,8 @@ class Registerer {
#define MACE_REGISTER_TYPED_CLASS(RegistryName, key, ...) \ #define MACE_REGISTER_TYPED_CLASS(RegistryName, key, ...) \
namespace { \ namespace { \
static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \ static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \
key, RegistryName(), \ key, \
RegistryName(), \
Registerer##RegistryName::DefaultCreator<__VA_ARGS__>); \ Registerer##RegistryName::DefaultCreator<__VA_ARGS__>); \
} }
......
...@@ -16,7 +16,8 @@ vector<string> Workspace::Tensors() const { ...@@ -16,7 +16,8 @@ vector<string> Workspace::Tensors() const {
return names; return names;
} }
Tensor* Workspace::CreateTensor(const string& name, Allocator* alloc, Tensor* Workspace::CreateTensor(const string& name,
Allocator* alloc,
DataType type) { DataType type) {
if (HasTensor(name)) { if (HasTensor(name)) {
VLOG(1) << "Tensor " << name << " already exists. Skipping."; VLOG(1) << "Tensor " << name << " already exists. Skipping.";
......
...@@ -18,9 +18,15 @@ struct BatchNormFunctor { ...@@ -18,9 +18,15 @@ struct BatchNormFunctor {
BatchNormFunctor(const float variance_epsilon) BatchNormFunctor(const float variance_epsilon)
: variance_epsilon_(variance_epsilon) {} : variance_epsilon_(variance_epsilon) {}
void operator()(const T* input, const T* scale, const T* offset, void operator()(const T* input,
const T* mean, const T* var, const index_t n, const T* scale,
const index_t channel, const index_t sample_size, T* output) { const T* offset,
const T* mean,
const T* var,
const index_t n,
const index_t channel,
const index_t sample_size,
T* output) {
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 . // Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is // The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X + // Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
...@@ -49,9 +55,15 @@ struct BatchNormFunctor { ...@@ -49,9 +55,15 @@ struct BatchNormFunctor {
template <> template <>
void BatchNormFunctor<DeviceType::NEON, float>::operator()( void BatchNormFunctor<DeviceType::NEON, float>::operator()(
const float* input, const float* scale, const float* offset, const float* input,
const float* mean, const float* var, const index_t n, const index_t channel, const float* scale,
const index_t sample_size, float* output); const float* offset,
const float* mean,
const float* var,
const index_t n,
const index_t channel,
const index_t sample_size,
float* output);
} // namepsace kernels } // namepsace kernels
} // namespace mace } // namespace mace
......
...@@ -102,8 +102,12 @@ class Conv2dFunctor { ...@@ -102,8 +102,12 @@ class Conv2dFunctor {
template <> template <>
void Conv2dFunctor<DeviceType::NEON, float>::operator()( void Conv2dFunctor<DeviceType::NEON, float>::operator()(
const float* input, const index_t* input_shape, const float* filter, const float* input,
const index_t* filter_shape, const float* bias, float* output, const index_t* input_shape,
const float* filter,
const index_t* filter_shape,
const float* bias,
float* output,
const index_t* output_shape); const index_t* output_shape);
} // namespace kernels } // namespace kernels
......
...@@ -9,8 +9,10 @@ namespace kernels { ...@@ -9,8 +9,10 @@ namespace kernels {
void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW
const index_t *filter_shape, // OIHW const index_t *filter_shape, // OIHW
const int *dilations, const int *strides, const int *dilations,
Padding padding, index_t *output_shape, const int *strides,
Padding padding,
index_t *output_shape,
int *padding_size) { int *padding_size) {
MACE_CHECK(dilations[0] > 0 && dilations[1] > 0, MACE_CHECK(dilations[0] > 0 && dilations[1] > 0,
"Invalid dilations, must >= 1"); "Invalid dilations, must >= 1");
...@@ -69,8 +71,10 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW ...@@ -69,8 +71,10 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW
output_shape[3] = output_width; output_shape[3] = output_width;
} }
void ConstructInputWithPadding(const float *input, const index_t *input_shape, void ConstructInputWithPadding(const float *input,
const int *paddings, Tensor *output_tensor) { const index_t *input_shape,
const int *paddings,
Tensor *output_tensor) {
index_t batch = input_shape[0]; index_t batch = input_shape[0];
index_t channels = input_shape[1]; index_t channels = input_shape[1];
index_t height = input_shape[2]; index_t height = input_shape[2];
......
...@@ -19,12 +19,16 @@ namespace kernels { ...@@ -19,12 +19,16 @@ namespace kernels {
void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW
const index_t *filter_shape, // OIHW const index_t *filter_shape, // OIHW
const int *dilations, const int *strides, const int *dilations,
Padding padding, index_t *output_shape, const int *strides,
Padding padding,
index_t *output_shape,
int *padding_size); int *padding_size);
void ConstructInputWithPadding(const float *input, const index_t *input_shape, void ConstructInputWithPadding(const float *input,
const int *paddings, Tensor *output_tensor); const index_t *input_shape,
const int *paddings,
Tensor *output_tensor);
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -10,9 +10,15 @@ namespace kernels { ...@@ -10,9 +10,15 @@ namespace kernels {
template <> template <>
void BatchNormFunctor<DeviceType::NEON, float>::operator()( void BatchNormFunctor<DeviceType::NEON, float>::operator()(
const float* input, const float* scale, const float* offset, const float* input,
const float* mean, const float* var, const index_t n, const index_t channel, const float* scale,
const index_t sample_size, float* output) { const float* offset,
const float* mean,
const float* var,
const index_t n,
const index_t channel,
const index_t sample_size,
float* output) {
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 . // Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is // The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X + // Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
......
...@@ -8,17 +8,26 @@ ...@@ -8,17 +8,26 @@
namespace mace { namespace mace {
namespace kernels { namespace kernels {
extern void Conv2dNeonK1x1S1(const float *input, const index_t *input_shape, extern void Conv2dNeonK1x1S1(const float *input,
const float *filter, const float *bias, const index_t *input_shape,
float *output, const index_t *output_shape); const float *filter,
const float *bias,
float *output,
const index_t *output_shape);
extern void Conv2dNeonK3x3S1(const float *input, const index_t *input_shape, extern void Conv2dNeonK3x3S1(const float *input,
const float *filter, const float *bias, const index_t *input_shape,
float *output, const index_t *output_shape); const float *filter,
const float *bias,
float *output,
const index_t *output_shape);
extern void Conv2dNeonK5x5S1(const float *input, const index_t *input_shape, extern void Conv2dNeonK5x5S1(const float *input,
const float *filter, const float *bias, const index_t *input_shape,
float *output, const index_t *output_shape); const float *filter,
const float *bias,
float *output,
const index_t *output_shape);
template <> template <>
void Conv2dFunctor<DeviceType::NEON, void Conv2dFunctor<DeviceType::NEON,
......
...@@ -60,7 +60,8 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW ...@@ -60,7 +60,8 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
float32x4_t sum0 = vdupq_n_f32(.0f); float32x4_t sum0 = vdupq_n_f32(.0f);
float32x4_t sum1 = vdupq_n_f32(.0f); float32x4_t sum1 = vdupq_n_f32(.0f);
float32x4_t row0_ext_0 = vld1q_f32(row[0]); // 0123 float32x4_t row0_ext_0 = vld1q_f32(row[0]); // 0123
float32x4_t row0_latter = vld1q_f32(row[0] + kRegisterSize); // 4567 float32x4_t row0_latter =
vld1q_f32(row[0] + kRegisterSize); // 4567
float32x4_t row0_ext_1 = float32x4_t row0_ext_1 =
vextq_f32(row0_ext_0, row0_latter, 1); // 1234 vextq_f32(row0_ext_0, row0_latter, 1); // 1234
float32x4_t row0_ext_2 = float32x4_t row0_ext_2 =
...@@ -71,7 +72,8 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW ...@@ -71,7 +72,8 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
sum0 = vfmaq_laneq_f32(sum0, row0_ext_2, filter0, 2); sum0 = vfmaq_laneq_f32(sum0, row0_ext_2, filter0, 2);
float32x4_t row1_ext_0 = vld1q_f32(row[1]); // 0123 float32x4_t row1_ext_0 = vld1q_f32(row[1]); // 0123
float32x4_t row1_latter = vld1q_f32(row[1] + kRegisterSize); // 4567 float32x4_t row1_latter =
vld1q_f32(row[1] + kRegisterSize); // 4567
float32x4_t row1_ext_1 = float32x4_t row1_ext_1 =
vextq_f32(row1_ext_0, row1_latter, 1); // 1234 vextq_f32(row1_ext_0, row1_latter, 1); // 1234
float32x4_t row1_ext_2 = float32x4_t row1_ext_2 =
...@@ -158,7 +160,8 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW ...@@ -158,7 +160,8 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
for (; count > 0; --count) { for (; count > 0; --count) {
float32x4_t sum0 = vdupq_n_f32(.0f); float32x4_t sum0 = vdupq_n_f32(.0f);
float32x4_t row0_ext_0 = vld1q_f32(row[0]); // 0123 float32x4_t row0_ext_0 = vld1q_f32(row[0]); // 0123
float32x4_t row0_latter = vld1q_f32(row[0] + kRegisterSize); // 4567 float32x4_t row0_latter =
vld1q_f32(row[0] + kRegisterSize); // 4567
float32x4_t row0_ext_1 = float32x4_t row0_ext_1 =
vextq_f32(row0_ext_0, row0_latter, 1); // 1234 vextq_f32(row0_ext_0, row0_latter, 1); // 1234
float32x4_t row0_ext_2 = float32x4_t row0_ext_2 =
...@@ -169,7 +172,8 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW ...@@ -169,7 +172,8 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
sum0 = vfmaq_laneq_f32(sum0, row0_ext_2, filter0, 2); sum0 = vfmaq_laneq_f32(sum0, row0_ext_2, filter0, 2);
float32x4_t row1_ext_0 = vld1q_f32(row[1]); // 0123 float32x4_t row1_ext_0 = vld1q_f32(row[1]); // 0123
float32x4_t row1_latter = vld1q_f32(row[1] + kRegisterSize); // 4567 float32x4_t row1_latter =
vld1q_f32(row[1] + kRegisterSize); // 4567
float32x4_t row1_ext_1 = float32x4_t row1_ext_1 =
vextq_f32(row1_ext_0, row1_latter, 1); // 1234 vextq_f32(row1_ext_0, row1_latter, 1); // 1234
float32x4_t row1_ext_2 = float32x4_t row1_ext_2 =
......
...@@ -11,8 +11,10 @@ ...@@ -11,8 +11,10 @@
namespace mace { namespace mace {
namespace kernels { namespace kernels {
void PoolingMaxNeonK2x2S2x2(const float *input, const index_t *in_shape, void PoolingMaxNeonK2x2S2x2(const float *input,
float *output, const index_t *out_shape, const index_t *in_shape,
float *output,
const index_t *out_shape,
const int *paddings) { const int *paddings) {
index_t batch = in_shape[0]; index_t batch = in_shape[0];
index_t channels = in_shape[1]; index_t channels = in_shape[1];
...@@ -101,8 +103,10 @@ void PoolingMaxNeonK2x2S2x2(const float *input, const index_t *in_shape, ...@@ -101,8 +103,10 @@ void PoolingMaxNeonK2x2S2x2(const float *input, const index_t *in_shape,
} }
// assume the input has already been padded // assume the input has already been padded
void PoolingMaxNeonK2x2S2x2Padded(const float *input, const index_t *in_shape, void PoolingMaxNeonK2x2S2x2Padded(const float *input,
float *output, const index_t *out_shape) { const index_t *in_shape,
float *output,
const index_t *out_shape) {
index_t batch = in_shape[0]; index_t batch = in_shape[0];
index_t channels = in_shape[1]; index_t channels = in_shape[1];
index_t in_height = in_shape[2]; index_t in_height = in_shape[2];
......
...@@ -11,8 +11,10 @@ ...@@ -11,8 +11,10 @@
namespace mace { namespace mace {
namespace kernels { namespace kernels {
void PoolingMaxNeonK3x3S2x2(const float *input, const index_t *in_shape, void PoolingMaxNeonK3x3S2x2(const float *input,
float *output, const index_t *out_shape, const index_t *in_shape,
float *output,
const index_t *out_shape,
const int *paddings) { const int *paddings) {
index_t batch = in_shape[0]; index_t batch = in_shape[0];
index_t channels = in_shape[1]; index_t channels = in_shape[1];
...@@ -127,8 +129,10 @@ void PoolingMaxNeonK3x3S2x2(const float *input, const index_t *in_shape, ...@@ -127,8 +129,10 @@ void PoolingMaxNeonK3x3S2x2(const float *input, const index_t *in_shape,
} }
// assume the input has already been padded // assume the input has already been padded
void PoolingMaxNeonK3x3S2x2Padded(const float *input, const index_t *in_shape, void PoolingMaxNeonK3x3S2x2Padded(const float *input,
float *output, const index_t *out_shape) { const index_t *in_shape,
float *output,
const index_t *out_shape) {
index_t batch = in_shape[0]; index_t batch = in_shape[0];
index_t channels = in_shape[1]; index_t channels = in_shape[1];
index_t in_height = in_shape[2]; index_t in_height = in_shape[2];
......
...@@ -9,26 +9,34 @@ ...@@ -9,26 +9,34 @@
namespace mace { namespace mace {
namespace kernels { namespace kernels {
extern void PoolingMaxNeonK2x2S2x2(const float *input, const index_t *in_shape, extern void PoolingMaxNeonK2x2S2x2(const float *input,
float *output, const index_t *out_shape, const index_t *in_shape,
float *output,
const index_t *out_shape,
const int *paddings); const int *paddings);
extern void PoolingMaxNeonK3x3S2x2(const float *input, const index_t *in_shape, extern void PoolingMaxNeonK3x3S2x2(const float *input,
float *output, const index_t *out_shape, const index_t *in_shape,
float *output,
const index_t *out_shape,
const int *paddings); const int *paddings);
#ifdef __COPY_MAKE_PADDING #ifdef __COPY_MAKE_PADDING
extern void PoolingMaxNeonK2x2S2x2Padded(const float *input, extern void PoolingMaxNeonK2x2S2x2Padded(const float *input,
const index_t *in_shape, float *output, const index_t *in_shape,
float *output,
const index_t *out_shape); const index_t *out_shape);
extern void PoolingMaxNeonK3x3S2x2Padded(const float *input, extern void PoolingMaxNeonK3x3S2x2Padded(const float *input,
const index_t *in_shape, float *output, const index_t *in_shape,
float *output,
const index_t *out_shape); const index_t *out_shape);
#endif #endif
template <> template <>
void PoolingFunctor<DeviceType::NEON, float>::operator()( void PoolingFunctor<DeviceType::NEON, float>::operator()(
const float *input, const index_t *input_shape, float *output, const float *input,
const index_t *input_shape,
float *output,
const index_t *output_shape) { const index_t *output_shape) {
if (kernels_[0] == 2 && kernels_[1] == 2 && strides_[0] == 2 && if (kernels_[0] == 2 && kernels_[1] == 2 && strides_[0] == 2 &&
strides_[1] == 2 && pooling_type_ == MAX) { strides_[1] == 2 && pooling_type_ == MAX) {
......
...@@ -20,15 +20,20 @@ namespace kernels { ...@@ -20,15 +20,20 @@ namespace kernels {
template <DeviceType D, typename T> template <DeviceType D, typename T>
class PoolingFunctor { class PoolingFunctor {
public: public:
PoolingFunctor(const PoolingType pooling_type, const int *kernels, PoolingFunctor(const PoolingType pooling_type,
const int *strides, const int *paddings, const int *dilations) const int *kernels,
const int *strides,
const int *paddings,
const int *dilations)
: pooling_type_(pooling_type), : pooling_type_(pooling_type),
kernels_(kernels), kernels_(kernels),
strides_(strides), strides_(strides),
paddings_(paddings), paddings_(paddings),
dilations_(dilations) {} dilations_(dilations) {}
void operator()(const T *input, const index_t *input_shape, T *output, void operator()(const T *input,
const index_t *input_shape,
T *output,
const index_t *output_shape) { const index_t *output_shape) {
index_t batch = output_shape[0]; index_t batch = output_shape[0];
index_t channels = output_shape[1]; index_t channels = output_shape[1];
...@@ -118,7 +123,9 @@ class PoolingFunctor { ...@@ -118,7 +123,9 @@ class PoolingFunctor {
template <> template <>
void PoolingFunctor<DeviceType::NEON, float>::operator()( void PoolingFunctor<DeviceType::NEON, float>::operator()(
const float *input, const index_t *input_shape, float *output, const float *input,
const index_t *input_shape,
float *output,
const index_t *output_shape); const index_t *output_shape);
} // namespace kernels } // namespace kernels
......
...@@ -19,7 +19,8 @@ struct CachedInterpolation { ...@@ -19,7 +19,8 @@ struct CachedInterpolation {
float lerp; float lerp;
}; };
inline float CalculateResizeScale(index_t in_size, index_t out_size, inline float CalculateResizeScale(index_t in_size,
index_t out_size,
bool align_corners) { bool align_corners) {
return (align_corners && out_size > 1) return (align_corners && out_size > 1)
? (in_size - 1) / static_cast<float>(out_size - 1) ? (in_size - 1) / static_cast<float>(out_size - 1)
...@@ -40,21 +41,28 @@ inline void ComputeInterpolationWeights(const index_t out_size, ...@@ -40,21 +41,28 @@ inline void ComputeInterpolationWeights(const index_t out_size,
} }
} }
inline float ComputeLerp(const float top_left, const float top_right, inline float ComputeLerp(const float top_left,
const float bottom_left, const float bottom_right, const float top_right,
const float x_lerp, const float y_lerp) { const float bottom_left,
const float bottom_right,
const float x_lerp,
const float y_lerp) {
const float top = top_left + (top_right - top_left) * x_lerp; const float top = top_left + (top_right - top_left) * x_lerp;
const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp; const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp;
return top + (bottom - top) * y_lerp; return top + (bottom - top) * y_lerp;
} }
template <typename T> template <typename T>
void ResizeImage(const T *images, const index_t batch_size, void ResizeImage(const T *images,
const index_t in_height, const index_t in_width, const index_t batch_size,
const index_t out_height, const index_t out_width, const index_t in_height,
const index_t in_width,
const index_t out_height,
const index_t out_width,
const index_t channels, const index_t channels,
const std::vector<CachedInterpolation> &xs_vec, const std::vector<CachedInterpolation> &xs_vec,
const std::vector<CachedInterpolation> &ys, float *output) { const std::vector<CachedInterpolation> &ys,
float *output) {
const index_t in_channel_size = in_height * in_width; const index_t in_channel_size = in_height * in_width;
const index_t in_batch_num_values = channels * in_channel_size; const index_t in_batch_num_values = channels * in_channel_size;
const index_t out_channel_size = out_height * out_width; const index_t out_channel_size = out_height * out_width;
...@@ -98,8 +106,13 @@ struct ResizeBilinearFunctor { ...@@ -98,8 +106,13 @@ struct ResizeBilinearFunctor {
ResizeBilinearFunctor(bool align_corners) : align_corners_(align_corners) {} ResizeBilinearFunctor(bool align_corners) : align_corners_(align_corners) {}
void operator()(const T *input, T *output, index_t n, index_t channels, void operator()(const T *input,
index_t in_height, index_t in_width, index_t out_height, T *output,
index_t n,
index_t channels,
index_t in_height,
index_t in_width,
index_t out_height,
index_t out_width) { index_t out_width) {
if (out_height == in_height && out_width == in_width) { if (out_height == in_height && out_width == in_width) {
std::copy(input, input + channels * in_height * in_width, output); std::copy(input, input + channels * in_height * in_width, output);
......
...@@ -8,8 +8,8 @@ ...@@ -8,8 +8,8 @@
namespace mace { namespace mace {
template <DeviceType D, typename T> template <DeviceType D, typename T>
static void BatchNorm(int iters, int batch, int channels, int height, static void BatchNorm(
int width) { int iters, int batch, int channels, int height, int width) {
mace::testing::StopTiming(); mace::testing::StopTiming();
OpsTestNet net; OpsTestNet net;
......
...@@ -12,8 +12,15 @@ ...@@ -12,8 +12,15 @@
namespace mace { namespace mace {
template <DeviceType D, typename T> template <DeviceType D, typename T>
static void Conv2d(int iters, int batch, int channels, int height, int width, static void Conv2d(int iters,
int kernel_h, int kernel_w, int stride, Padding padding, int batch,
int channels,
int height,
int width,
int kernel_h,
int kernel_w,
int stride,
Padding padding,
int output_channels) { int output_channels) {
mace::testing::StopTiming(); mace::testing::StopTiming();
......
...@@ -44,7 +44,8 @@ class OpsTestNet { ...@@ -44,7 +44,8 @@ class OpsTestNet {
OpsTestNet() {} OpsTestNet() {}
template <typename T> template <typename T>
void AddInputFromArray(const char *name, const std::vector<index_t> &shape, void AddInputFromArray(const char *name,
const std::vector<index_t> &shape,
const std::vector<T> &data) { const std::vector<T> &data) {
Tensor *input = Tensor *input =
ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v()); ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
...@@ -55,7 +56,8 @@ class OpsTestNet { ...@@ -55,7 +56,8 @@ class OpsTestNet {
} }
template <typename T> template <typename T>
void AddRepeatedInput(const char *name, const std::vector<index_t> &shape, void AddRepeatedInput(const char *name,
const std::vector<index_t> &shape,
const T data) { const T data) {
Tensor *input = Tensor *input =
ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v()); ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
...@@ -66,7 +68,8 @@ class OpsTestNet { ...@@ -66,7 +68,8 @@ class OpsTestNet {
} }
template <typename T> template <typename T>
void AddRandomInput(const char *name, const std::vector<index_t> &shape, void AddRandomInput(const char *name,
const std::vector<index_t> &shape,
bool positive = false) { bool positive = false) {
Tensor *input = Tensor *input =
ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v()); ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
...@@ -84,7 +87,8 @@ class OpsTestNet { ...@@ -84,7 +87,8 @@ class OpsTestNet {
} }
template <typename T> template <typename T>
void AddFixedInput(const char *name, const std::vector<index_t> &shape, void AddFixedInput(const char *name,
const std::vector<index_t> &shape,
T value) { T value) {
Tensor *input = Tensor *input =
ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v()); ws_.CreateTensor(name, cpu_allocator(), DataTypeToEnum<T>::v());
......
...@@ -12,8 +12,14 @@ using namespace mace; ...@@ -12,8 +12,14 @@ using namespace mace;
using namespace mace::kernels; using namespace mace::kernels;
template <DeviceType D> template <DeviceType D>
static void Pooling(int iters, int batch, int channels, int height, int width, static void Pooling(int iters,
int kernel, int stride, Padding padding, int batch,
int channels,
int height,
int width,
int kernel,
int stride,
Padding padding,
PoolingType pooling_type) { PoolingType pooling_type) {
mace::testing::StopTiming(); mace::testing::StopTiming();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册