From 4dd645b8f0e38450d97cbf43bc4a61dc8ba57df8 Mon Sep 17 00:00:00 2001 From: liuqi Date: Tue, 31 Oct 2017 15:25:04 +0800 Subject: [PATCH] Vectorization batch norm opencl kernel. --- mace/kernels/opencl/cl/batch_norm.cl | 10 +++++----- mace/utils/BUILD | 1 + mace/utils/tuner.h | 10 +++++----- mace/utils/tuner_test.cc | 3 ++- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/mace/kernels/opencl/cl/batch_norm.cl b/mace/kernels/opencl/cl/batch_norm.cl index 3b7dcf08..3fc449ce 100644 --- a/mace/kernels/opencl/cl/batch_norm.cl +++ b/mace/kernels/opencl/cl/batch_norm.cl @@ -22,12 +22,12 @@ void kernel batch_norm(global const float *input, barrier(CLK_LOCAL_MEM_FENCE); - const int sample_offset = (batch * channels + channel) * pixels + pixel_offset*4; - const float *input_ptr = input + sample_offset; - float *output_ptr = output + sample_offset; + const int image_offset = (batch * channels + channel) * pixels + pixel_offset*4; + const float *input_ptr = input + image_offset; + float *output_ptr = output + image_offset; const int end = (batch * channels + channel + 1) * pixels; - if ((sample_offset+4) > end) { - for (int i = sample_offset; i < end; ++i) { + if ((image_offset+4) > end) { + for (int i = image_offset; i < end; ++i) { *output_ptr = new_scale[local_channel].x * *input_ptr + new_offset[local_channel].x; ++input_ptr; ++output_ptr; diff --git a/mace/utils/BUILD b/mace/utils/BUILD index 50f65f4e..cd5583df 100644 --- a/mace/utils/BUILD +++ b/mace/utils/BUILD @@ -39,6 +39,7 @@ cc_library( copts = ["-std=c++11"], deps = [ "//mace/core", + "//mace/core:opencl_runtime", ], ) diff --git a/mace/utils/tuner.h b/mace/utils/tuner.h index de96f87e..1d36f7f5 100644 --- a/mace/utils/tuner.h +++ b/mace/utils/tuner.h @@ -33,7 +33,7 @@ class Tuner { const std::function>()> ¶m_generator, const std::function &)> &func) { - if (IsTuning()) { + if (IsTuning() && param_generator != nullptr) { // tune std::vector opt_param = default_param; RetType res = Tune(param_generator, func, opt_param); @@ -68,7 +68,7 @@ class Tuner { } inline void WriteRunParameters() { - VLOG(0) << path_; + VLOG(1) << path_; if (path_ != nullptr) { std::ofstream ofs(path_, std::ios::binary | std::ios::out); if (ofs.is_open()) { @@ -78,14 +78,14 @@ class Tuner { int32_t key_size = kp.first.size(); ofs.write(reinterpret_cast(&key_size), sizeof(key_size)); ofs.write(kp.first.c_str(), key_size); - VLOG(0) << kp.first.c_str(); + VLOG(1) << kp.first.c_str(); auto ¶ms = kp.second; int32_t params_size = params.size() * sizeof(param_type); ofs.write(reinterpret_cast(¶ms_size), sizeof(params_size)); for (auto ¶m : params) { ofs.write(reinterpret_cast(¶m), sizeof(params_size)); - VLOG(0) << param; + VLOG(1) << param; } } ofs.close(); @@ -144,7 +144,7 @@ class Tuner { } template - inline RetType Tune(std::function>()> param_generator, + inline RetType Tune(const std::function>()> ¶m_generator, const std::function &)> &func, std::vector &opt_params) { RetType res; diff --git a/mace/utils/tuner_test.cc b/mace/utils/tuner_test.cc index bcb5c620..ea80dd4d 100644 --- a/mace/utils/tuner_test.cc +++ b/mace/utils/tuner_test.cc @@ -13,7 +13,8 @@ class TunerTest: public ::testing::Test { protected: virtual void SetUp() { remove( "/data/local/tmp/mace.config" ); - setenv("MACE_RUN_PARAMTER_PATH", "/data/local/tmp/mace.config", 1); + setenv("MACE_RUN_PARAMETER_PATH", "/data/local/tmp/mace.config", 1); + setenv("MACE_TUNING", "1", 1); } }; -- GitLab