diff --git a/mace/kernels/batch_norm.h b/mace/kernels/batch_norm.h index 36b2925742ce6214d3d4d41146221750f47a35b2..f2084be73274b0a3463ee1f2c900f2c0b3a271f6 100644 --- a/mace/kernels/batch_norm.h +++ b/mace/kernels/batch_norm.h @@ -13,12 +13,13 @@ namespace kernels { template struct BatchNormFunctor { + T epsilon_; + void operator()(const Tensor *input, const Tensor *scale, const Tensor *offset, const Tensor *mean, const Tensor *var, - const Tensor *epsilon, Tensor *output) { // Batch normalization in the paper https://arxiv.org/abs/1502.03167 . // The calculation formula for inference is @@ -38,7 +39,6 @@ struct BatchNormFunctor { Tensor::MappingGuard offset_mapper(offset); Tensor::MappingGuard mean_mapper(mean); Tensor::MappingGuard var_mapper(var); - Tensor::MappingGuard epsilon_mapper(epsilon); Tensor::MappingGuard output_mapper(output); const T *input_ptr = input->data(); @@ -46,7 +46,6 @@ struct BatchNormFunctor { const T *offset_ptr = offset->data(); const T *mean_ptr = mean->data(); const T *var_ptr = var->data(); - const T *epsilon_ptr = epsilon->data(); T *output_ptr = output->mutable_data(); vector new_scale(channels); @@ -54,7 +53,7 @@ struct BatchNormFunctor { #pragma omp parallel for for (index_t c = 0; c < channels; ++c) { - new_scale[c] = scale_ptr[c] / std::sqrt(var_ptr[c] + *epsilon_ptr); + new_scale[c] = scale_ptr[c] / std::sqrt(var_ptr[c] + epsilon_); new_offset[c] = offset_ptr[c] - mean_ptr[c] * new_scale[c]; } @@ -81,17 +80,17 @@ void BatchNormFunctor::operator()( const Tensor *offset, const Tensor *mean, const Tensor *var, - const Tensor *epsilon, Tensor *output); template struct BatchNormFunctor { + T epsilon_; + void operator()(const Tensor *input, const Tensor *scale, const Tensor *offset, const Tensor *mean, const Tensor *var, - const Tensor *epsilon, Tensor *output); }; diff --git a/mace/kernels/neon/batch_norm_neon.cc b/mace/kernels/neon/batch_norm_neon.cc index 295cc59d48d44fe385ae7d86564674d5d8eecc78..7f67616442f84eff0a98a7bf7e022c224e9ceab9 100644 --- a/mace/kernels/neon/batch_norm_neon.cc +++ b/mace/kernels/neon/batch_norm_neon.cc @@ -15,7 +15,6 @@ void BatchNormFunctor::operator()( const Tensor *offset, const Tensor *mean, const Tensor *var, - const Tensor *epsilon, Tensor *output) { // Batch normalization in the paper https://arxiv.org/abs/1502.03167 . // The calculation formula for inference is @@ -34,14 +33,13 @@ void BatchNormFunctor::operator()( const float *offset_ptr = offset->data(); const float *mean_ptr = mean->data(); const float *var_ptr = var->data(); - const float *epsilon_ptr = epsilon->data(); float *output_ptr = output->mutable_data(); index_t count = sample_size >> 2; index_t remain_count = sample_size - (count << 2); #pragma omp parallel for for (index_t c = 0; c < channel; ++c) { - float new_scale = scale_ptr[c] / std::sqrt(var_ptr[c] + *epsilon_ptr); + float new_scale = scale_ptr[c] / std::sqrt(var_ptr[c] + epsilon_); float new_offset = offset_ptr[c] - mean_ptr[c] * new_scale; index_t pos = c * sample_size; @@ -69,4 +67,4 @@ void BatchNormFunctor::operator()( }; } // namespace kernels -} // namespace mace \ No newline at end of file +} // namespace mace diff --git a/mace/kernels/opencl/batch_norm_opencl.cc b/mace/kernels/opencl/batch_norm_opencl.cc index da0a755622339dd44f410991675ad2fa208b7cb8..09b160d7142f19e13c6e8478aa4f85fb6b88eaac 100644 --- a/mace/kernels/opencl/batch_norm_opencl.cc +++ b/mace/kernels/opencl/batch_norm_opencl.cc @@ -18,7 +18,6 @@ void BatchNormFunctor::operator()( const Tensor *offset, const Tensor *mean, const Tensor *var, - const Tensor *epsilon, Tensor *output) { const index_t batch = input->dim(0); @@ -48,7 +47,7 @@ void BatchNormFunctor::operator()( bm_kernel.setArg(idx++, *(static_cast(offset->buffer()))); bm_kernel.setArg(idx++, *(static_cast(mean->buffer()))); bm_kernel.setArg(idx++, *(static_cast(var->buffer()))); - bm_kernel.setArg(idx++, *(static_cast(epsilon->buffer()))); + bm_kernel.setArg(idx++, epsilon_); bm_kernel.setArg(idx++, *(static_cast(output->buffer()))); auto params_generator = [&kwg_size]()->std::vector> { diff --git a/mace/kernels/opencl/cl/batch_norm.cl b/mace/kernels/opencl/cl/batch_norm.cl index d0ad2e2aca77a2cc0fb7a51a8a4671060842b077..f8b2f8a271837be04516e942098d6ac7d17a0526 100644 --- a/mace/kernels/opencl/cl/batch_norm.cl +++ b/mace/kernels/opencl/cl/batch_norm.cl @@ -5,7 +5,7 @@ __kernel void batch_norm(__read_only image2d_t input, __read_only image2d_t offset, __read_only image2d_t mean, __read_only image2d_t var, - __global const DATA_TYPE *epsilon, + __private const DATA_TYPE epsilon, __write_only image2d_t output) { const int ch_blk = get_global_id(0); const int w = get_global_id(1); @@ -17,7 +17,7 @@ __kernel void batch_norm(__read_only image2d_t input, DATA_TYPE4 mean_value = READ_IMAGET(mean, SAMPLER, (int2)(ch_blk, 0)); DATA_TYPE4 var_value = READ_IMAGET(var, SAMPLER, (int2)(ch_blk, 0)); - DATA_TYPE4 new_scale = scale_value * rsqrt(var_value + (DATA_TYPE4)(*epsilon)); + DATA_TYPE4 new_scale = scale_value * rsqrt(var_value + (DATA_TYPE4)epsilon); DATA_TYPE4 new_offset = offset_value - mean_value * new_scale; const int pos = ch_blk * width + w; diff --git a/mace/ops/batch_norm.h b/mace/ops/batch_norm.h index 0c5909546f88a0e149a4b0628990c417b5b22630..195d9727e1842db37fc29a5865e6e2785a9838e9 100644 --- a/mace/ops/batch_norm.h +++ b/mace/ops/batch_norm.h @@ -14,7 +14,10 @@ template class BatchNormOp : public Operator { public: BatchNormOp(const OperatorDef &operator_def, Workspace *ws) - : Operator(operator_def, ws), functor_() {} + : Operator(operator_def, ws), functor_() { + functor_.epsilon_ = + OperatorBase::GetSingleArgument("epsilon", static_cast(-1)); + } bool Run() override { const Tensor *input = this->Input(INPUT); @@ -22,7 +25,6 @@ class BatchNormOp : public Operator { const Tensor *offset = this->Input(OFFSET); const Tensor *mean = this->Input(MEAN); const Tensor *var = this->Input(VAR); - const Tensor *epsilon = this->Input(EPSILON); MACE_CHECK(input->dim_size() == 4, "input must be 4-dimensional. ", input->dim_size()); @@ -34,13 +36,11 @@ class BatchNormOp : public Operator { mean->dim_size()); MACE_CHECK(var->dim_size() == 1, "var must be 1-dimensional. ", var->dim_size()); - MACE_CHECK(epsilon->dim_size() == 0, "epsilon must be 0-dimensional. ", - epsilon->dim_size()); Tensor *output = this->Output(OUTPUT); output->ResizeLike(input); - functor_(input, scale, offset, mean, var, epsilon, output); + functor_(input, scale, offset, mean, var, output); return true; } @@ -48,7 +48,7 @@ class BatchNormOp : public Operator { kernels::BatchNormFunctor functor_; protected: - OP_INPUT_TAGS(INPUT, SCALE, OFFSET, MEAN, VAR, EPSILON); + OP_INPUT_TAGS(INPUT, SCALE, OFFSET, MEAN, VAR); OP_OUTPUT_TAGS(OUTPUT); }; diff --git a/mace/ops/batch_norm_benchmark.cc b/mace/ops/batch_norm_benchmark.cc index 4b34de14a0b298dee564bbd1aeab3f1434b2ac4f..3f54e745c3d527d1cc786793008b9c97a6362214 100644 --- a/mace/ops/batch_norm_benchmark.cc +++ b/mace/ops/batch_norm_benchmark.cc @@ -21,7 +21,6 @@ static void BatchNorm( net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); net.AddRandomInput("Var", {channels}, true); - net.AddInputFromArray("Epsilon", {}, {1e-3}); if (D == DeviceType::OPENCL) { BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); @@ -35,7 +34,7 @@ static void BatchNorm( .Input("OffsetImage") .Input("MeanImage") .Input("VarImage") - .Input("Epsilon") + .AddFloatArg("epsilon", 1e-3) .Output("Output") .Finalize(net.NewOperatorDef()); } @@ -46,7 +45,7 @@ static void BatchNorm( .Input("Offset") .Input("Mean") .Input("Var") - .Input("Epsilon") + .AddFloatArg("epsilon", 1e-3) .Output("Output") .Finalize(net.NewOperatorDef()); } diff --git a/mace/ops/batch_norm_test.cc b/mace/ops/batch_norm_test.cc index 73e386caab16bbaff893fb56553a5ba3c4d5bae0..dc807ffbbf753fb07a55bab06066b8b8eed94e2d 100644 --- a/mace/ops/batch_norm_test.cc +++ b/mace/ops/batch_norm_test.cc @@ -20,7 +20,6 @@ void Simple() { net.AddInputFromArray("Offset", {1}, {2.0}); net.AddInputFromArray("Mean", {1}, {10}); net.AddInputFromArray("Var", {1}, {11.67f}); - net.AddInputFromArray("Epsilon", {}, {1e-3}); if (D == DeviceType::OPENCL) { BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT); @@ -35,7 +34,7 @@ void Simple() { .Input("OffsetImage") .Input("MeanImage") .Input("VarImage") - .Input("Epsilon") + .AddFloatArg("epsilon", 1e-3) .Output("OutputImage") .Finalize(net.NewOperatorDef()); // Run @@ -50,7 +49,7 @@ void Simple() { .Input("Offset") .Input("Mean") .Input("Var") - .Input("Epsilon") + .AddFloatArg("epsilon", 1e-3) .Output("Output") .Finalize(net.NewOperatorDef()); // Run @@ -180,7 +179,7 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { .Input("Offset") .Input("Mean") .Input("Var") - .Input("Epsilon") + .AddFloatArg("epsilon", 1e-3) .Output("Output") .Finalize(net.NewOperatorDef()); @@ -190,7 +189,6 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); net.AddRandomInput("Var", {channels}, true); - net.AddInputFromArray("Epsilon", {}, {1e-3}); // run cpu net.RunOp(); @@ -212,7 +210,7 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { .Input("OffsetImage") .Input("MeanImage") .Input("VarImage") - .Input("Epsilon") + .AddFloatArg("epsilon", 1e-3) .Output("OutputImage") .Finalize(net.NewOperatorDef()); @@ -246,7 +244,7 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { .Input("Offset") .Input("Mean") .Input("Var") - .Input("Epsilon") + .AddFloatArg("epsilon", 1e-3) .Output("Output") .Finalize(net.NewOperatorDef()); @@ -256,7 +254,6 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); net.AddRandomInput("Var", {channels}, true); - net.AddInputFromArray("Epsilon", {}, {1e-3}); // run cpu net.RunOp(); @@ -279,7 +276,7 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { .Input("OffsetImage") .Input("MeanImage") .Input("VarImage") - .Input("Epsilon") + .AddFloatArg("epsilon", 1e-3) .Output("OutputImage") .Finalize(net.NewOperatorDef());