提交 303311af 编写于 作者: L liuqi

Change index type to TIndex and Add member variable to batch_norm kernel functor.

上级 9aabdccc
...@@ -44,8 +44,8 @@ class OperatorBase { ...@@ -44,8 +44,8 @@ class OperatorBase {
*operator_def_, name, default_value); *operator_def_, name, default_value);
} }
inline const Tensor *Input(int idx) { inline const Tensor *Input(TIndex idx) {
MACE_CHECK(static_cast<size_t>(idx) < inputs_.size()); MACE_CHECK(idx < inputs_.size());
return inputs_[idx]; return inputs_[idx];
} }
......
...@@ -68,15 +68,8 @@ class Tensor { ...@@ -68,15 +68,8 @@ class Tensor {
inline TIndex dim_size() const { return shape_.size(); } inline TIndex dim_size() const { return shape_.size(); }
inline int dim32(int index) const { inline TIndex dim(TIndex index) const {
MACE_CHECK(static_cast<size_t>(index) < shape_.size(), "Exceeding ndim limit"); MACE_CHECK(index < shape_.size(), "Exceeding ndim limit");
MACE_CHECK(index >= 0, "Cannot have negative dimension index");
MACE_CHECK(shape_[index], std::numeric_limits<int>::max());
return static_cast<int>(shape_[index]);
}
inline TIndex dim(int index) const {
MACE_CHECK(static_cast<size_t>(index) < shape_.size(), "Exceeding ndim limit");
MACE_CHECK(index >= 0, "Cannot have negative dimension index"); MACE_CHECK(index >= 0, "Cannot have negative dimension index");
return shape_[index]; return shape_[index];
} }
...@@ -133,8 +126,8 @@ class Tensor { ...@@ -133,8 +126,8 @@ class Tensor {
} }
template <typename T> template <typename T>
inline void Copy(const T* src, size_t size) { inline void Copy(const T* src, TIndex size) {
MACE_CHECK(static_cast<TIndex>(size) == size_, "copy src and dst with different size."); MACE_CHECK(size == size_, "copy src and dst with different size.");
CopyBytes(static_cast<const void*>(src), sizeof(T) * size); CopyBytes(static_cast<const void*>(src), sizeof(T) * size);
} }
......
...@@ -11,33 +11,29 @@ ...@@ -11,33 +11,29 @@
namespace mace { namespace mace {
namespace kernels { namespace kernels {
template <DeviceType D, typename T>
struct BatchNormFunctorBase {
BatchNormFunctorBase(const float variance_epsilon)
:variance_epsilon_(variance_epsilon){}
template<DeviceType D> float variance_epsilon_;
struct BatchNormFunctor {
void operator()(const float* input,
const float* scale,
const float* offset,
const float* mean,
const float* var,
const int n,
const int channel,
const int sample_size,
const float variance_epsilon,
float* output) ;
}; };
template<>
struct BatchNormFunctor<DeviceType::CPU> { template<DeviceType D, typename T>
void operator()(const float* input, struct BatchNormFunctor : public BatchNormFunctorBase<D, T> {
const float* scale, BatchNormFunctor(const float variance_epsilon)
const float* offset, :BatchNormFunctorBase<D, T>(variance_epsilon){}
const float* mean,
const float* var, void operator()(const T* input,
const int n, const T* scale,
const int channel, const T* offset,
const int sample_size, const T* mean,
const float variance_epsilon, const T* var,
float* output) { const TIndex n,
const TIndex channel,
const TIndex sample_size,
T* output) {
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 . // Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is // The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X + // Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
...@@ -45,21 +41,22 @@ struct BatchNormFunctor<DeviceType::CPU> { ...@@ -45,21 +41,22 @@ struct BatchNormFunctor<DeviceType::CPU> {
// new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} } // new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} }
// new_offset = \offset - mean * common_val; // new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset; // Y = new_scale * X + new_offset;
float new_scale, new_offset; T new_scale, new_offset;
for (int c = 0; c < channel; ++c) { for (TIndex c = 0; c < channel; ++c) {
new_scale = scale[c] / std::sqrt(var[c] + variance_epsilon); new_scale = scale[c] / std::sqrt(var[c] + this->variance_epsilon_);
new_offset = offset[c] - mean[c] * new_scale; new_offset = offset[c] - mean[c] * new_scale;
for (int i = 0; i < n; ++i) { for (TIndex i = 0; i < n; ++i) {
int pos = i * channel * sample_size + c * sample_size; TIndex pos = i * channel * sample_size + c * sample_size;
const float* input_sample_ptr = input + pos; const T* input_sample_ptr = input + pos;
float* output_sample_ptr = output + pos; T* output_sample_ptr = output + pos;
for (int j = 0; j < sample_size; ++j) { for (TIndex j = 0; j < sample_size; ++j) {
output_sample_ptr[j] = new_scale * input_sample_ptr[j] + new_offset; output_sample_ptr[j] = new_scale * input_sample_ptr[j] + new_offset;
} }
} }
} }
} }
}; };
} // namepsace kernels } // namepsace kernels
......
...@@ -2,25 +2,27 @@ ...@@ -2,25 +2,27 @@
// Copyright (c) 2017 XiaoMi All rights reserved. // Copyright (c) 2017 XiaoMi All rights reserved.
// //
#if __ARM_NEON //#if __ARM_NEON
#include <arm_neon.h> #include <arm_neon.h>
#include "mace/kernels/batch_norm.h" #include "mace/kernels/batch_norm.h"
namespace mace { namespace mace {
namespace kernels { namespace kernels {
template<> template <typename T>
struct BatchNormFunctor<DeviceType::NEON> { struct BatchNormFunctor<DeviceType::NEON> : public BatchNormFunctorBase<DeviceType::NEON, T> {
void operator()(const float* input, BatchNormFunctor(const float variance_epsilon)
const float* scale, :BatchNormFunctorBase<DeviceType::NEON, T>(variance_epsilon){}
const float* offset,
const float* mean, void operator()(const T* input,
const float* var, const T* scale,
const T* offset,
const T* mean,
const T* var,
const int n, const int n,
const int channel, const int channel,
const int sample_size, const int sample_size,
const float variance_epsilon, T* output) {
float* output) {
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 . // Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is // The calculation formula for inference is
...@@ -29,21 +31,21 @@ struct BatchNormFunctor<DeviceType::NEON> { ...@@ -29,21 +31,21 @@ struct BatchNormFunctor<DeviceType::NEON> {
// new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} } // new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} }
// new_offset = \offset - mean * common_val; // new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset; // Y = new_scale * X + new_offset;
float new_scale, new_offset; T new_scale, new_offset;
int count = sample_size >> 2; int count = sample_size >> 2;
int remain_count = sample_size - count; int remain_count = sample_size - count;
for (int c = 0; c < channel; ++c) { for (TIndex c = 0; c < channel; ++c) {
new_scale = scale[c] / std::sqrt(var[c] + variance_epsilon); new_scale = scale[c] / std::sqrt(var[c] + variance_epsilon_);
new_offset = offset[c] - mean[c] * new_scale; new_offset = offset[c] - mean[c] * new_scale;
float32x4_t new_scale_f = vdupq_n_f32(new_scale); float32x4_t new_scale_f = vdupq_n_f32(new_scale);
float32x4_t new_offset_f = vdupq_n_f32(new_offset); float32x4_t new_offset_f = vdupq_n_f32(new_offset);
for (int i = 0; i < n; ++i) { for (TIndex i = 0; i < n; ++i) {
int pos = i * channel * sample_size + c * sample_size; TIndex pos = i * channel * sample_size + c * sample_size;
const float* input_sample_ptr = input + pos; const float* input_sample_ptr = input + pos;
float* output_sample_ptr = output + pos; float* output_sample_ptr = output + pos;
for(int j = 0; j < count; ++j) { for(TIndex j = 0; j < count; ++j) {
float32x4_t input_f = vld1q_f32(input_sample_ptr); float32x4_t input_f = vld1q_f32(input_sample_ptr);
float32x4_t output_f = new_offset_f; float32x4_t output_f = new_offset_f;
output_f = vfmaq_f32(output_f, input_f, new_scale_f); output_f = vfmaq_f32(output_f, input_f, new_scale_f);
...@@ -51,7 +53,7 @@ struct BatchNormFunctor<DeviceType::NEON> { ...@@ -51,7 +53,7 @@ struct BatchNormFunctor<DeviceType::NEON> {
input_sample_ptr += 4; input_sample_ptr += 4;
output_sample_ptr += 4; output_sample_ptr += 4;
} }
for(int j = 0; j < remain_count; ++j) { for(TIndex j = 0; j < remain_count; ++j) {
*output_sample_ptr = new_scale * *input_sample_ptr + new_offset; *output_sample_ptr = new_scale * *input_sample_ptr + new_offset;
++output_sample_ptr; ++output_sample_ptr;
++input_sample_ptr; ++input_sample_ptr;
...@@ -63,4 +65,4 @@ struct BatchNormFunctor<DeviceType::NEON> { ...@@ -63,4 +65,4 @@ struct BatchNormFunctor<DeviceType::NEON> {
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
#endif // __ARM_NEON //#endif // __ARM_NEON
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
// //
#include "mace/ops/batch_norm.h" #include "mace/ops/batch_norm.h"
#include "mace/proto/mace.pb.h"
namespace mace { namespace mace {
......
...@@ -14,7 +14,8 @@ template<DeviceType D, class T> ...@@ -14,7 +14,8 @@ template<DeviceType D, class T>
class BatchNormOp : public Operator<D, T> { class BatchNormOp : public Operator<D, T> {
public: public:
BatchNormOp(const OperatorDef &operator_def, Workspace *ws) BatchNormOp(const OperatorDef &operator_def, Workspace *ws)
: Operator<D, T>(operator_def, ws) {} : Operator<D, T>(operator_def, ws),
functor_(OperatorBase::GetSingleArgument<float>("variance_epsilon", 1e-4)){}
bool Run() override { bool Run() override {
const Tensor* input = this->Input(0); const Tensor* input = this->Input(0);
...@@ -23,8 +24,6 @@ class BatchNormOp : public Operator<D, T> { ...@@ -23,8 +24,6 @@ class BatchNormOp : public Operator<D, T> {
const Tensor* mean = this->Input(3); const Tensor* mean = this->Input(3);
const Tensor* var = this->Input(4); const Tensor* var = this->Input(4);
const float variance_epsilon = this->template GetSingleArgument<float>("variance_epsilon", 1e-4);
MACE_CHECK(input->dim_size() == 4, "input must be 4-dimensional. ", input->dim_size()); MACE_CHECK(input->dim_size() == 4, "input must be 4-dimensional. ", input->dim_size());
MACE_CHECK(scale->dim_size() == 1, "scale must be 1-dimensional. ", scale->dim_size()); MACE_CHECK(scale->dim_size() == 1, "scale must be 1-dimensional. ", scale->dim_size());
MACE_CHECK(offset->dim_size() == 1, "offset must be 1-dimensional. ", offset->dim_size()); MACE_CHECK(offset->dim_size() == 1, "offset must be 1-dimensional. ", offset->dim_size());
...@@ -34,9 +33,9 @@ class BatchNormOp : public Operator<D, T> { ...@@ -34,9 +33,9 @@ class BatchNormOp : public Operator<D, T> {
Tensor* output = this->Output(0); Tensor* output = this->Output(0);
output->ResizeLike(input); output->ResizeLike(input);
const int n = input->dim32(0); const TIndex n = input->dim(0);
const int channel = input->dim32(1); const TIndex channel = input->dim(1);
const int sample_size = input->dim32(2) * input->dim32(3); const TIndex sample_size = input->dim(2) * input->dim(3);
const float* input_ptr = input->data<float>(); const float* input_ptr = input->data<float>();
const float* scale_ptr = scale->data<float>(); const float* scale_ptr = scale->data<float>();
...@@ -45,11 +44,13 @@ class BatchNormOp : public Operator<D, T> { ...@@ -45,11 +44,13 @@ class BatchNormOp : public Operator<D, T> {
const float* var_ptr = var->data<float>(); const float* var_ptr = var->data<float>();
float* output_ptr = output->mutable_data<float>(); float* output_ptr = output->mutable_data<float>();
kernels::BatchNormFunctor<D>()(input_ptr, scale_ptr, offset_ptr, mean_ptr, var_ptr, functor_(input_ptr, scale_ptr, offset_ptr, mean_ptr, var_ptr,
n, channel, sample_size, n, channel, sample_size,
variance_epsilon, output_ptr); output_ptr);
return true; return true;
} }
private:
kernels::BatchNormFunctor<D, T> functor_;
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册