diff --git a/.gitignore b/.gitignore index a140a563c14ed3ee0df64b29c3bf65f762011dfa..f8cd60a580410913d290167dd5e67f4bf92e4542 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ bazel-* .idea/ cmake-build-debug/ +*.sh diff --git a/mace/kernels/benchmark/batch_norm_benchmark.cc b/mace/kernels/benchmark/batch_norm_benchmark.cc new file mode 100644 index 0000000000000000000000000000000000000000..a5e5279639dad37af4ed341068eeb74da3fd30ea --- /dev/null +++ b/mace/kernels/benchmark/batch_norm_benchmark.cc @@ -0,0 +1,76 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#include "mace/core/testing/test_benchmark.h" +#include "mace/kernels/batch_norm.h" + +namespace mace { +template +static void BatchNorm(int iters, int batch, int channels, int height, int width) { + + std::random_device rd; + std::mt19937 gen(rd()); + std::normal_distribution nd(0, 1); + + TIndex input_size = batch * channels * height * width; + std::vector input(input_size, 0.0); + std::vector scale(channels, 0.0); + std::vector offset(channels, 0.0); + std::vector mean(channels, 0.0); + std::vector var(channels, 0.0); + + for (int i = 0; i < input_size; ++i) { + input[i] = nd(gen); + } + for (int i = 0; i < channels; ++i) { + scale[i] = nd(gen); + offset[i] = nd(gen); + mean[i] = nd(gen); + var[i] = std::abs(nd(gen)); + } + + // declare output + std::unique_ptr output(new T[input_size]); + auto functor = kernels::BatchNormFunctor(1e-5); + + while(iters--) { + functor(input.data(), + scale.data(), + offset.data(), + mean.data(), + var.data(), + batch, + channels, + height * width, + output.get()); + } +} + +#define BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, DEVICE) \ + static void BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \ + int iters) { \ + const int64 tot = static_cast(iters) * N * C * H * W; \ + mace::testing::ItemsProcessed(tot); \ + mace::testing::BytesProcessed(tot * (sizeof(TYPE)));\ + BatchNorm(iters, N, C, H, W); \ + } \ + BENCHMARK(BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE) + +#define BM_BATCH_NORM(N, C, H, W, TYPE) \ + BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \ + BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, NEON); + +BM_BATCH_NORM(1, 1, 128, 128, float); +BM_BATCH_NORM(1, 1, 512, 512, float); +BM_BATCH_NORM(1, 1, 1024, 1024, float); +BM_BATCH_NORM(16, 1, 256, 256, float); +BM_BATCH_NORM(32, 1, 256, 256, float); +BM_BATCH_NORM(64, 1, 256, 256, float); +BM_BATCH_NORM(1, 3, 128, 128, float); +BM_BATCH_NORM(1, 3, 512, 512, float); +BM_BATCH_NORM(1, 3, 1024, 1024, float); +BM_BATCH_NORM(16, 3, 256, 256, float); +BM_BATCH_NORM(32, 3, 256, 256, float); +BM_BATCH_NORM(64, 3, 256, 256, float); +} // namespace mace \ No newline at end of file diff --git a/mace/kernels/test/batch_norm_neon_test.cc b/mace/kernels/test/batch_norm_neon_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..32a6c1f9ad0c72c1c4ef54164e2613c3333f03cd --- /dev/null +++ b/mace/kernels/test/batch_norm_neon_test.cc @@ -0,0 +1,73 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#include +#include "gtest/gtest.h" +#include "mace/kernels/batch_norm.h" + +namespace mace { + +TEST(BatchNormNeonTest, Simple) { + std::random_device rd; + std::mt19937 gen(rd()); + std::normal_distribution nd(0, 1); + srand(time(NULL)); + + // generate random input + TIndex batch = 1 + rand() % 128; + TIndex channels = 3; + TIndex height = 2 + rand() % 100; + TIndex width = 2 + rand() % 100; + + TIndex input_size = batch * channels * height * width; + std::vector input(input_size, 0.0); + std::vector scale(channels, 0.0); + std::vector offset(channels, 0.0); + std::vector mean(channels, 0.0); + std::vector var(channels, 0.0); + + for (int i = 0; i < input_size; ++i) { + input[i] = nd(gen); + } + for (int i = 0; i < channels; ++i) { + scale[i] = nd(gen); + offset[i] = nd(gen); + mean[i] = nd(gen); + var[i] = std::abs(nd(gen)); + } + + // declare output + std::unique_ptr output(new float[input_size]); + std::unique_ptr output_neon(new float[input_size]); + + kernels::BatchNormFunctor(1e-5)( + input.data(), + scale.data(), + offset.data(), + mean.data(), + var.data(), + batch, + channels, + height * width, + output.get() + ); + kernels::BatchNormFunctor(1e-5)( + input.data(), + scale.data(), + offset.data(), + mean.data(), + var.data(), + batch, + channels, + height * width, + output_neon.get() + ); + + for (TIndex i = 0; i < input_size; ++i) { + EXPECT_FLOAT_EQ(output[i], output_neon[i]); + } + +} + +} // namespace mace \ No newline at end of file diff --git a/mace/ops/batch_norm.h b/mace/ops/batch_norm.h index 59c227c865b519b81c7e6d818a052336acd2e570..e58886b057df3a3f7dd89bb9989a3e1524cae8ba 100644 --- a/mace/ops/batch_norm.h +++ b/mace/ops/batch_norm.h @@ -37,12 +37,12 @@ class BatchNormOp : public Operator { const index_t channel = input->dim(1); const index_t sample_size = input->dim(2) * input->dim(3); - const float* input_ptr = input->data(); - const float* scale_ptr = scale->data(); - const float* offset_ptr = offset->data(); - const float* mean_ptr = mean->data(); - const float* var_ptr = var->data(); - float* output_ptr = output->mutable_data(); + const T* input_ptr = input->data(); + const T* scale_ptr = scale->data(); + const T* offset_ptr = offset->data(); + const T* mean_ptr = mean->data(); + const T* var_ptr = var->data(); + T* output_ptr = output->mutable_data(); functor_(input_ptr, scale_ptr, offset_ptr, mean_ptr, var_ptr, n, channel, sample_size, diff --git a/mace/ops/batch_norm_test.cc b/mace/ops/batch_norm_test.cc index 5b52d0590dbdb77b1ef8c5a35215b7c6a9582ef1..ef89fcee44b7cccda03c1e3919c0ca2132afb668 100644 --- a/mace/ops/batch_norm_test.cc +++ b/mace/ops/batch_norm_test.cc @@ -23,14 +23,10 @@ TEST_F(BatchNormOpTest, Simple) { // Add input data AddInputFromArray("Input", {1, 1, 6, 2}, {5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}); - AddInputFromArray("Scale", {2}, - {4.0f, 4.0f}); - AddInputFromArray("Offset", {2}, - {2.0, 2.0}); - AddInputFromArray("Mean", {2}, - {10, 10}); - AddInputFromArray("Var", {2}, - {11.67f, 11.67f}); + AddInputFromArray("Scale", {1}, {4.0f}); + AddInputFromArray("Offset", {1}, {2.0}); + AddInputFromArray("Mean", {1}, {10}); + AddInputFromArray("Var", {1}, {11.67f}); // Run RunOp();