diff --git a/mace/kernels/activation.h b/mace/kernels/activation.h index 0a856fc945b3ce2eebc7eaa8e9d0f6fc4985629a..83acf4fb70311c710bcde1d7b08c1e6c4630879f 100644 --- a/mace/kernels/activation.h +++ b/mace/kernels/activation.h @@ -135,6 +135,7 @@ class ActivationFunctor { T relux_max_limit_; T prelu_alpha_; cl::Kernel kernel_; + std::string tuning_key_prefix_; }; } // namespace kernels diff --git a/mace/kernels/opencl/activation_opencl.cc b/mace/kernels/opencl/activation_opencl.cc index 50ad306368f503860cf4d6688bd10b74fd678d9b..934ce8da666b8e9910815b9f56de250a3511fd68 100644 --- a/mace/kernels/opencl/activation_opencl.cc +++ b/mace/kernels/opencl/activation_opencl.cc @@ -22,7 +22,6 @@ void ActivationFunctor::operator()(const Tensor *input, const index_t channels = input->dim(3); const index_t channel_blocks = RoundUpDiv4(channels); - std::string tuning_key_prefix; if (kernel_.get() == nullptr) { auto runtime = OpenCLRuntime::Global(); @@ -35,23 +34,23 @@ void ActivationFunctor::operator()(const Tensor *input, built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); switch (activation_) { case RELU: - tuning_key_prefix = "relu_opencl_kernel_"; + tuning_key_prefix_ = "relu_opencl_kernel_"; built_options.emplace("-DUSE_RELU"); break; case RELUX: - tuning_key_prefix = "relux_opencl_kernel_"; + tuning_key_prefix_ = "relux_opencl_kernel_"; built_options.emplace("-DUSE_RELUX"); break; case PRELU: - tuning_key_prefix = "prelu_opencl_kernel_"; + tuning_key_prefix_ = "prelu_opencl_kernel_"; built_options.emplace("-DUSE_PRELU"); break; case TANH: - tuning_key_prefix = "tanh_opencl_kernel_"; + tuning_key_prefix_ = "tanh_opencl_kernel_"; built_options.emplace("-DUSE_TANH"); break; case SIGMOID: - tuning_key_prefix = "sigmoid_opencl_kernel_"; + tuning_key_prefix_ = "sigmoid_opencl_kernel_"; built_options.emplace("-DUSE_SIGMOID"); break; default: @@ -60,12 +59,10 @@ void ActivationFunctor::operator()(const Tensor *input, kernel_ = runtime->BuildKernel("activation", kernel_name, built_options); int idx = 0; - kernel_.setArg( - idx++, *(static_cast(input->buffer()))); + kernel_.setArg(idx++, *(static_cast(input->buffer()))); kernel_.setArg(idx++, static_cast(relux_max_limit_)); kernel_.setArg(idx++, static_cast(prelu_alpha_)); - kernel_.setArg(idx++, - *(static_cast(output->buffer()))); + kernel_.setArg(idx++, *(static_cast(output->buffer()))); } const uint32_t gws[3] = {static_cast(channel_blocks), @@ -73,7 +70,7 @@ void ActivationFunctor::operator()(const Tensor *input, static_cast(height * batch)}; const std::vector lws = {8, 16, 8, 1}; std::string tuning_key = - Concat(tuning_key_prefix, output->dim(0), output->dim(1), + Concat(tuning_key_prefix_, output->dim(0), output->dim(1), output->dim(2), output->dim(3)); TuningOrRun3DKernel(kernel_, tuning_key, gws, lws, future); }