diff --git a/mace/kernels/batch_norm.h b/mace/kernels/batch_norm.h index 9287feb5cf19c887e6cbd719b48f26577037fcc9..84312a03d2e59d10fd76eec93f9e4cff2199696a 100644 --- a/mace/kernels/batch_norm.h +++ b/mace/kernels/batch_norm.h @@ -13,16 +13,13 @@ namespace kernels { template struct BatchNormFunctor { - float variance_epsilon_; - - BatchNormFunctor(const float variance_epsilon) - : variance_epsilon_(variance_epsilon) {} void operator()(const T* input, const T* scale, const T* offset, const T* mean, const T* var, + const float variance_epsilon, const index_t n, const index_t channel, const index_t sample_size, @@ -37,7 +34,7 @@ struct BatchNormFunctor { // Y = new_scale * X + new_offset; T new_scale, new_offset; for (index_t c = 0; c < channel; ++c) { - new_scale = scale[c] / std::sqrt(var[c] + this->variance_epsilon_); + new_scale = scale[c] / std::sqrt(var[c] + variance_epsilon); new_offset = offset[c] - mean[c] * new_scale; index_t pos = c * sample_size; @@ -60,6 +57,7 @@ void BatchNormFunctor::operator()( const float* offset, const float* mean, const float* var, + const float variance_epsilon, const index_t n, const index_t channel, const index_t sample_size, diff --git a/mace/kernels/neon/batch_norm_neon.cc b/mace/kernels/neon/batch_norm_neon.cc index 9c99695bc0712f53b482ac2445500b6daef43eae..ca7b0f1a169fcb5e91f711be1ba7f24c1af3ce58 100644 --- a/mace/kernels/neon/batch_norm_neon.cc +++ b/mace/kernels/neon/batch_norm_neon.cc @@ -15,6 +15,7 @@ void BatchNormFunctor::operator()( const float* offset, const float* mean, const float* var, + const float variance_epsilon, const index_t n, const index_t channel, const index_t sample_size, @@ -31,7 +32,7 @@ void BatchNormFunctor::operator()( index_t count = sample_size >> 2; index_t remain_count = sample_size - (count << 2); for (index_t c = 0; c < channel; ++c) { - new_scale = scale[c] / std::sqrt(var[c] + this->variance_epsilon_); + new_scale = scale[c] / std::sqrt(var[c] + variance_epsilon); new_offset = offset[c] - mean[c] * new_scale; index_t pos = c * sample_size; diff --git a/mace/kernels/neon/max_pooling_neon_3x3.cc b/mace/kernels/neon/max_pooling_neon_3x3.cc index 5a8bf246c9d338b9e777df2caeabda81bf86c47b..0c7a74d0b0d1133d9367ceac158240e84aa49d83 100644 --- a/mace/kernels/neon/max_pooling_neon_3x3.cc +++ b/mace/kernels/neon/max_pooling_neon_3x3.cc @@ -3,7 +3,6 @@ // #include -#include #include #include "mace/core/common.h" diff --git a/mace/ops/batch_norm.h b/mace/ops/batch_norm.h index a9b1f9f52b3087f1daebee25b6daedac964e4922..e92d9ebb69de1e88512808bfb674842b8a6346c8 100644 --- a/mace/ops/batch_norm.h +++ b/mace/ops/batch_norm.h @@ -15,8 +15,7 @@ class BatchNormOp : public Operator { public: BatchNormOp(const OperatorDef& operator_def, Workspace* ws) : Operator(operator_def, ws), - functor_( - OperatorBase::GetSingleArgument("variance_epsilon", 1e-4)) {} + functor_() {} bool Run() override { const Tensor* input = this->Input(0); @@ -24,6 +23,7 @@ class BatchNormOp : public Operator { const Tensor* offset = this->Input(2); const Tensor* mean = this->Input(3); const Tensor* var = this->Input(4); + const Tensor* epsilon = this->Input(5); MACE_CHECK(input->dim_size() == 4, "input must be 4-dimensional. ", input->dim_size()); @@ -35,6 +35,8 @@ class BatchNormOp : public Operator { mean->dim_size()); MACE_CHECK(var->dim_size() == 1, "var must be 1-dimensional. ", var->dim_size()); + MACE_CHECK(epsilon->dim_size() == 0, "epsilon must be 0-dimensional. ", + epsilon->dim_size()); Tensor* output = this->Output(0); output->ResizeLike(input); @@ -48,9 +50,10 @@ class BatchNormOp : public Operator { const T* offset_ptr = offset->data(); const T* mean_ptr = mean->data(); const T* var_ptr = var->data(); + const T* epsilon_ptr = epsilon->data(); T* output_ptr = output->mutable_data(); - functor_(input_ptr, scale_ptr, offset_ptr, mean_ptr, var_ptr, n, channel, + functor_(input_ptr, scale_ptr, offset_ptr, mean_ptr, var_ptr, *epsilon_ptr, n, channel, sample_size, output_ptr); return true; } diff --git a/mace/ops/batch_norm_benchmark.cc b/mace/ops/batch_norm_benchmark.cc index ecd647d4394439a79b77197e5e8ea46718ae0efa..079ad6f1a15c82b98487ec3850b21ee29accb19e 100644 --- a/mace/ops/batch_norm_benchmark.cc +++ b/mace/ops/batch_norm_benchmark.cc @@ -19,6 +19,7 @@ static void BatchNorm( .Input("Offset") .Input("Mean") .Input("Var") + .Input("Epsilon") .Output("Output") .Finalize(net.operator_def()); @@ -28,6 +29,7 @@ static void BatchNorm( net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); net.AddRandomInput("Var", {channels}, true); + net.AddInputFromArray("Epsilon", {}, {1e-3}); // Warm-up for (int i = 0; i < 5; ++i) { diff --git a/mace/ops/batch_norm_test.cc b/mace/ops/batch_norm_test.cc index f963de217ef2527ad890ffffedb6f1f68eb7a2d0..fd503ed567115d8c0508b642ccac4b07402b7cb0 100644 --- a/mace/ops/batch_norm_test.cc +++ b/mace/ops/batch_norm_test.cc @@ -18,6 +18,7 @@ TEST_F(BatchNormOpTest, SimpleCPU) { .Input("Offset") .Input("Mean") .Input("Var") + .Input("Epsilon") .Output("Output") .Finalize(net.operator_def()); @@ -28,6 +29,7 @@ TEST_F(BatchNormOpTest, SimpleCPU) { net.AddInputFromArray("Offset", {1}, {2.0}); net.AddInputFromArray("Mean", {1}, {10}); net.AddInputFromArray("Var", {1}, {11.67f}); + net.AddInputFromArray("Epsilon", {}, {1e-3}); // Run net.RunOp(); @@ -46,8 +48,8 @@ TEST_F(BatchNormOpTest, SimpleNeon) { // generate random input index_t batch = 1 + rand() % 10; index_t channels = 3 + rand() % 50; - index_t height = 10 + rand() % 50; - index_t width = 10 + rand() % 50; + index_t height = 103; + index_t width = 113; // Construct graph auto& net = test_net(); OpDefBuilder("BatchNorm", "BatchNormTest") @@ -56,6 +58,7 @@ TEST_F(BatchNormOpTest, SimpleNeon) { .Input("Offset") .Input("Mean") .Input("Var") + .Input("Epsilon") .Output("Output") .Finalize(net.operator_def()); @@ -65,6 +68,7 @@ TEST_F(BatchNormOpTest, SimpleNeon) { net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); net.AddRandomInput("Var", {channels}, true); + net.AddInputFromArray("Epsilon", {}, {1e-3}); // run cpu net.RunOp();