提交 303311af 编写于 作者: L liuqi

Change index type to TIndex and Add member variable to batch_norm kernel functor.

上级 9aabdccc
......@@ -44,8 +44,8 @@ class OperatorBase {
*operator_def_, name, default_value);
}
inline const Tensor *Input(int idx) {
MACE_CHECK(static_cast<size_t>(idx) < inputs_.size());
inline const Tensor *Input(TIndex idx) {
MACE_CHECK(idx < inputs_.size());
return inputs_[idx];
}
......
......@@ -68,15 +68,8 @@ class Tensor {
inline TIndex dim_size() const { return shape_.size(); }
inline int dim32(int index) const {
MACE_CHECK(static_cast<size_t>(index) < shape_.size(), "Exceeding ndim limit");
MACE_CHECK(index >= 0, "Cannot have negative dimension index");
MACE_CHECK(shape_[index], std::numeric_limits<int>::max());
return static_cast<int>(shape_[index]);
}
inline TIndex dim(int index) const {
MACE_CHECK(static_cast<size_t>(index) < shape_.size(), "Exceeding ndim limit");
inline TIndex dim(TIndex index) const {
MACE_CHECK(index < shape_.size(), "Exceeding ndim limit");
MACE_CHECK(index >= 0, "Cannot have negative dimension index");
return shape_[index];
}
......@@ -133,8 +126,8 @@ class Tensor {
}
template <typename T>
inline void Copy(const T* src, size_t size) {
MACE_CHECK(static_cast<TIndex>(size) == size_, "copy src and dst with different size.");
inline void Copy(const T* src, TIndex size) {
MACE_CHECK(size == size_, "copy src and dst with different size.");
CopyBytes(static_cast<const void*>(src), sizeof(T) * size);
}
......
......@@ -11,33 +11,29 @@
namespace mace {
namespace kernels {
template <DeviceType D, typename T>
struct BatchNormFunctorBase {
BatchNormFunctorBase(const float variance_epsilon)
:variance_epsilon_(variance_epsilon){}
template<DeviceType D>
struct BatchNormFunctor {
void operator()(const float* input,
const float* scale,
const float* offset,
const float* mean,
const float* var,
const int n,
const int channel,
const int sample_size,
const float variance_epsilon,
float* output) ;
float variance_epsilon_;
};
template<>
struct BatchNormFunctor<DeviceType::CPU> {
void operator()(const float* input,
const float* scale,
const float* offset,
const float* mean,
const float* var,
const int n,
const int channel,
const int sample_size,
const float variance_epsilon,
float* output) {
template<DeviceType D, typename T>
struct BatchNormFunctor : public BatchNormFunctorBase<D, T> {
BatchNormFunctor(const float variance_epsilon)
:BatchNormFunctorBase<D, T>(variance_epsilon){}
void operator()(const T* input,
const T* scale,
const T* offset,
const T* mean,
const T* var,
const TIndex n,
const TIndex channel,
const TIndex sample_size,
T* output) {
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
......@@ -45,21 +41,22 @@ struct BatchNormFunctor<DeviceType::CPU> {
// new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} }
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
float new_scale, new_offset;
for (int c = 0; c < channel; ++c) {
new_scale = scale[c] / std::sqrt(var[c] + variance_epsilon);
T new_scale, new_offset;
for (TIndex c = 0; c < channel; ++c) {
new_scale = scale[c] / std::sqrt(var[c] + this->variance_epsilon_);
new_offset = offset[c] - mean[c] * new_scale;
for (int i = 0; i < n; ++i) {
int pos = i * channel * sample_size + c * sample_size;
const float* input_sample_ptr = input + pos;
float* output_sample_ptr = output + pos;
for (int j = 0; j < sample_size; ++j) {
for (TIndex i = 0; i < n; ++i) {
TIndex pos = i * channel * sample_size + c * sample_size;
const T* input_sample_ptr = input + pos;
T* output_sample_ptr = output + pos;
for (TIndex j = 0; j < sample_size; ++j) {
output_sample_ptr[j] = new_scale * input_sample_ptr[j] + new_offset;
}
}
}
}
};
} // namepsace kernels
......
......@@ -2,25 +2,27 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#if __ARM_NEON
//#if __ARM_NEON
#include <arm_neon.h>
#include "mace/kernels/batch_norm.h"
namespace mace {
namespace kernels {
template<>
struct BatchNormFunctor<DeviceType::NEON> {
void operator()(const float* input,
const float* scale,
const float* offset,
const float* mean,
const float* var,
template <typename T>
struct BatchNormFunctor<DeviceType::NEON> : public BatchNormFunctorBase<DeviceType::NEON, T> {
BatchNormFunctor(const float variance_epsilon)
:BatchNormFunctorBase<DeviceType::NEON, T>(variance_epsilon){}
void operator()(const T* input,
const T* scale,
const T* offset,
const T* mean,
const T* var,
const int n,
const int channel,
const int sample_size,
const float variance_epsilon,
float* output) {
T* output) {
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
......@@ -29,21 +31,21 @@ struct BatchNormFunctor<DeviceType::NEON> {
// new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} }
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
float new_scale, new_offset;
T new_scale, new_offset;
int count = sample_size >> 2;
int remain_count = sample_size - count;
for (int c = 0; c < channel; ++c) {
new_scale = scale[c] / std::sqrt(var[c] + variance_epsilon);
for (TIndex c = 0; c < channel; ++c) {
new_scale = scale[c] / std::sqrt(var[c] + variance_epsilon_);
new_offset = offset[c] - mean[c] * new_scale;
float32x4_t new_scale_f = vdupq_n_f32(new_scale);
float32x4_t new_offset_f = vdupq_n_f32(new_offset);
for (int i = 0; i < n; ++i) {
int pos = i * channel * sample_size + c * sample_size;
for (TIndex i = 0; i < n; ++i) {
TIndex pos = i * channel * sample_size + c * sample_size;
const float* input_sample_ptr = input + pos;
float* output_sample_ptr = output + pos;
for(int j = 0; j < count; ++j) {
for(TIndex j = 0; j < count; ++j) {
float32x4_t input_f = vld1q_f32(input_sample_ptr);
float32x4_t output_f = new_offset_f;
output_f = vfmaq_f32(output_f, input_f, new_scale_f);
......@@ -51,7 +53,7 @@ struct BatchNormFunctor<DeviceType::NEON> {
input_sample_ptr += 4;
output_sample_ptr += 4;
}
for(int j = 0; j < remain_count; ++j) {
for(TIndex j = 0; j < remain_count; ++j) {
*output_sample_ptr = new_scale * *input_sample_ptr + new_offset;
++output_sample_ptr;
++input_sample_ptr;
......@@ -63,4 +65,4 @@ struct BatchNormFunctor<DeviceType::NEON> {
} // namespace kernels
} // namespace mace
#endif // __ARM_NEON
//#endif // __ARM_NEON
......@@ -3,7 +3,6 @@
//
#include "mace/ops/batch_norm.h"
#include "mace/proto/mace.pb.h"
namespace mace {
......
......@@ -14,7 +14,8 @@ template<DeviceType D, class T>
class BatchNormOp : public Operator<D, T> {
public:
BatchNormOp(const OperatorDef &operator_def, Workspace *ws)
: Operator<D, T>(operator_def, ws) {}
: Operator<D, T>(operator_def, ws),
functor_(OperatorBase::GetSingleArgument<float>("variance_epsilon", 1e-4)){}
bool Run() override {
const Tensor* input = this->Input(0);
......@@ -23,8 +24,6 @@ class BatchNormOp : public Operator<D, T> {
const Tensor* mean = this->Input(3);
const Tensor* var = this->Input(4);
const float variance_epsilon = this->template GetSingleArgument<float>("variance_epsilon", 1e-4);
MACE_CHECK(input->dim_size() == 4, "input must be 4-dimensional. ", input->dim_size());
MACE_CHECK(scale->dim_size() == 1, "scale must be 1-dimensional. ", scale->dim_size());
MACE_CHECK(offset->dim_size() == 1, "offset must be 1-dimensional. ", offset->dim_size());
......@@ -34,9 +33,9 @@ class BatchNormOp : public Operator<D, T> {
Tensor* output = this->Output(0);
output->ResizeLike(input);
const int n = input->dim32(0);
const int channel = input->dim32(1);
const int sample_size = input->dim32(2) * input->dim32(3);
const TIndex n = input->dim(0);
const TIndex channel = input->dim(1);
const TIndex sample_size = input->dim(2) * input->dim(3);
const float* input_ptr = input->data<float>();
const float* scale_ptr = scale->data<float>();
......@@ -45,11 +44,13 @@ class BatchNormOp : public Operator<D, T> {
const float* var_ptr = var->data<float>();
float* output_ptr = output->mutable_data<float>();
kernels::BatchNormFunctor<D>()(input_ptr, scale_ptr, offset_ptr, mean_ptr, var_ptr,
functor_(input_ptr, scale_ptr, offset_ptr, mean_ptr, var_ptr,
n, channel, sample_size,
variance_epsilon, output_ptr);
output_ptr);
return true;
}
private:
kernels::BatchNormFunctor<D, T> functor_;
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册