From 303311afb03eceed6f528c8e87a0650a14dcca83 Mon Sep 17 00:00:00 2001 From: liuqi Date: Wed, 6 Sep 2017 18:08:09 +0800 Subject: [PATCH] Change index type to TIndex and Add member variable to batch_norm kernel functor. --- mace/core/operator.h | 4 +- mace/core/tensor.h | 15 ++----- mace/kernels/batch_norm.h | 61 +++++++++++++--------------- mace/kernels/neon/batch_norm_neon.cc | 38 +++++++++-------- mace/ops/batch_norm.cc | 1 - mace/ops/batch_norm.h | 17 ++++---- 6 files changed, 64 insertions(+), 72 deletions(-) diff --git a/mace/core/operator.h b/mace/core/operator.h index ddf8fb2e..970404f6 100644 --- a/mace/core/operator.h +++ b/mace/core/operator.h @@ -44,8 +44,8 @@ class OperatorBase { *operator_def_, name, default_value); } - inline const Tensor *Input(int idx) { - MACE_CHECK(static_cast(idx) < inputs_.size()); + inline const Tensor *Input(TIndex idx) { + MACE_CHECK(idx < inputs_.size()); return inputs_[idx]; } diff --git a/mace/core/tensor.h b/mace/core/tensor.h index 2c45255d..77a44615 100644 --- a/mace/core/tensor.h +++ b/mace/core/tensor.h @@ -68,15 +68,8 @@ class Tensor { inline TIndex dim_size() const { return shape_.size(); } - inline int dim32(int index) const { - MACE_CHECK(static_cast(index) < shape_.size(), "Exceeding ndim limit"); - MACE_CHECK(index >= 0, "Cannot have negative dimension index"); - MACE_CHECK(shape_[index], std::numeric_limits::max()); - return static_cast(shape_[index]); - } - - inline TIndex dim(int index) const { - MACE_CHECK(static_cast(index) < shape_.size(), "Exceeding ndim limit"); + inline TIndex dim(TIndex index) const { + MACE_CHECK(index < shape_.size(), "Exceeding ndim limit"); MACE_CHECK(index >= 0, "Cannot have negative dimension index"); return shape_[index]; } @@ -133,8 +126,8 @@ class Tensor { } template - inline void Copy(const T* src, size_t size) { - MACE_CHECK(static_cast(size) == size_, "copy src and dst with different size."); + inline void Copy(const T* src, TIndex size) { + MACE_CHECK(size == size_, "copy src and dst with different size."); CopyBytes(static_cast(src), sizeof(T) * size); } diff --git a/mace/kernels/batch_norm.h b/mace/kernels/batch_norm.h index fd405fa5..d81fdae9 100644 --- a/mace/kernels/batch_norm.h +++ b/mace/kernels/batch_norm.h @@ -11,33 +11,29 @@ namespace mace { namespace kernels { +template +struct BatchNormFunctorBase { + BatchNormFunctorBase(const float variance_epsilon) + :variance_epsilon_(variance_epsilon){} -template -struct BatchNormFunctor { - void operator()(const float* input, - const float* scale, - const float* offset, - const float* mean, - const float* var, - const int n, - const int channel, - const int sample_size, - const float variance_epsilon, - float* output) ; + float variance_epsilon_; }; -template<> -struct BatchNormFunctor { - void operator()(const float* input, - const float* scale, - const float* offset, - const float* mean, - const float* var, - const int n, - const int channel, - const int sample_size, - const float variance_epsilon, - float* output) { + +template +struct BatchNormFunctor : public BatchNormFunctorBase { + BatchNormFunctor(const float variance_epsilon) + :BatchNormFunctorBase(variance_epsilon){} + + void operator()(const T* input, + const T* scale, + const T* offset, + const T* mean, + const T* var, + const TIndex n, + const TIndex channel, + const TIndex sample_size, + T* output) { // Batch normalization in the paper https://arxiv.org/abs/1502.03167 . // The calculation formula for inference is // Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X + @@ -45,21 +41,22 @@ struct BatchNormFunctor { // new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} } // new_offset = \offset - mean * common_val; // Y = new_scale * X + new_offset; - float new_scale, new_offset; - for (int c = 0; c < channel; ++c) { - new_scale = scale[c] / std::sqrt(var[c] + variance_epsilon); + T new_scale, new_offset; + for (TIndex c = 0; c < channel; ++c) { + new_scale = scale[c] / std::sqrt(var[c] + this->variance_epsilon_); new_offset = offset[c] - mean[c] * new_scale; - for (int i = 0; i < n; ++i) { - int pos = i * channel * sample_size + c * sample_size; - const float* input_sample_ptr = input + pos; - float* output_sample_ptr = output + pos; - for (int j = 0; j < sample_size; ++j) { + for (TIndex i = 0; i < n; ++i) { + TIndex pos = i * channel * sample_size + c * sample_size; + const T* input_sample_ptr = input + pos; + T* output_sample_ptr = output + pos; + for (TIndex j = 0; j < sample_size; ++j) { output_sample_ptr[j] = new_scale * input_sample_ptr[j] + new_offset; } } } } + }; } // namepsace kernels diff --git a/mace/kernels/neon/batch_norm_neon.cc b/mace/kernels/neon/batch_norm_neon.cc index 2fbf6ece..d307173f 100644 --- a/mace/kernels/neon/batch_norm_neon.cc +++ b/mace/kernels/neon/batch_norm_neon.cc @@ -2,25 +2,27 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#if __ARM_NEON +//#if __ARM_NEON #include #include "mace/kernels/batch_norm.h" namespace mace { namespace kernels { -template<> -struct BatchNormFunctor { - void operator()(const float* input, - const float* scale, - const float* offset, - const float* mean, - const float* var, +template +struct BatchNormFunctor : public BatchNormFunctorBase { + BatchNormFunctor(const float variance_epsilon) + :BatchNormFunctorBase(variance_epsilon){} + + void operator()(const T* input, + const T* scale, + const T* offset, + const T* mean, + const T* var, const int n, const int channel, const int sample_size, - const float variance_epsilon, - float* output) { + T* output) { // Batch normalization in the paper https://arxiv.org/abs/1502.03167 . // The calculation formula for inference is @@ -29,21 +31,21 @@ struct BatchNormFunctor { // new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} } // new_offset = \offset - mean * common_val; // Y = new_scale * X + new_offset; - float new_scale, new_offset; + T new_scale, new_offset; int count = sample_size >> 2; int remain_count = sample_size - count; - for (int c = 0; c < channel; ++c) { - new_scale = scale[c] / std::sqrt(var[c] + variance_epsilon); + for (TIndex c = 0; c < channel; ++c) { + new_scale = scale[c] / std::sqrt(var[c] + variance_epsilon_); new_offset = offset[c] - mean[c] * new_scale; float32x4_t new_scale_f = vdupq_n_f32(new_scale); float32x4_t new_offset_f = vdupq_n_f32(new_offset); - for (int i = 0; i < n; ++i) { - int pos = i * channel * sample_size + c * sample_size; + for (TIndex i = 0; i < n; ++i) { + TIndex pos = i * channel * sample_size + c * sample_size; const float* input_sample_ptr = input + pos; float* output_sample_ptr = output + pos; - for(int j = 0; j < count; ++j) { + for(TIndex j = 0; j < count; ++j) { float32x4_t input_f = vld1q_f32(input_sample_ptr); float32x4_t output_f = new_offset_f; output_f = vfmaq_f32(output_f, input_f, new_scale_f); @@ -51,7 +53,7 @@ struct BatchNormFunctor { input_sample_ptr += 4; output_sample_ptr += 4; } - for(int j = 0; j < remain_count; ++j) { + for(TIndex j = 0; j < remain_count; ++j) { *output_sample_ptr = new_scale * *input_sample_ptr + new_offset; ++output_sample_ptr; ++input_sample_ptr; @@ -63,4 +65,4 @@ struct BatchNormFunctor { } // namespace kernels } // namespace mace -#endif // __ARM_NEON +//#endif // __ARM_NEON diff --git a/mace/ops/batch_norm.cc b/mace/ops/batch_norm.cc index 09d0e300..9a48b669 100644 --- a/mace/ops/batch_norm.cc +++ b/mace/ops/batch_norm.cc @@ -3,7 +3,6 @@ // #include "mace/ops/batch_norm.h" -#include "mace/proto/mace.pb.h" namespace mace { diff --git a/mace/ops/batch_norm.h b/mace/ops/batch_norm.h index 2b4fad42..8a3c01b4 100644 --- a/mace/ops/batch_norm.h +++ b/mace/ops/batch_norm.h @@ -14,7 +14,8 @@ template class BatchNormOp : public Operator { public: BatchNormOp(const OperatorDef &operator_def, Workspace *ws) - : Operator(operator_def, ws) {} + : Operator(operator_def, ws), + functor_(OperatorBase::GetSingleArgument("variance_epsilon", 1e-4)){} bool Run() override { const Tensor* input = this->Input(0); @@ -23,8 +24,6 @@ class BatchNormOp : public Operator { const Tensor* mean = this->Input(3); const Tensor* var = this->Input(4); - const float variance_epsilon = this->template GetSingleArgument("variance_epsilon", 1e-4); - MACE_CHECK(input->dim_size() == 4, "input must be 4-dimensional. ", input->dim_size()); MACE_CHECK(scale->dim_size() == 1, "scale must be 1-dimensional. ", scale->dim_size()); MACE_CHECK(offset->dim_size() == 1, "offset must be 1-dimensional. ", offset->dim_size()); @@ -34,9 +33,9 @@ class BatchNormOp : public Operator { Tensor* output = this->Output(0); output->ResizeLike(input); - const int n = input->dim32(0); - const int channel = input->dim32(1); - const int sample_size = input->dim32(2) * input->dim32(3); + const TIndex n = input->dim(0); + const TIndex channel = input->dim(1); + const TIndex sample_size = input->dim(2) * input->dim(3); const float* input_ptr = input->data(); const float* scale_ptr = scale->data(); @@ -45,11 +44,13 @@ class BatchNormOp : public Operator { const float* var_ptr = var->data(); float* output_ptr = output->mutable_data(); - kernels::BatchNormFunctor()(input_ptr, scale_ptr, offset_ptr, mean_ptr, var_ptr, + functor_(input_ptr, scale_ptr, offset_ptr, mean_ptr, var_ptr, n, channel, sample_size, - variance_epsilon, output_ptr); + output_ptr); return true; } + private: + kernels::BatchNormFunctor functor_; }; -- GitLab