diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index 02fa29a0c64ab3faa2ced1f6a9ada0d9ff0fc44d..d5ba7010c8b58c809e98ea7b1ba9afb840a948e5 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -361,7 +361,7 @@ const GPU_TYPE OpenCLRuntime::GetGPUType() const { return gpu_type_; } -const std::string &OpenCLRuntime::GetOpenclVersion() { +const std::string &OpenCLRuntime::GetOpenclVersion() const { return opencl_version_; } diff --git a/mace/core/runtime/opencl/opencl_runtime.h b/mace/core/runtime/opencl/opencl_runtime.h index 1b257e6bdbb06bbe7fccc1e9646ee674228f7e23..d3cc5cc7037ddea9a717ab537e8889aa6ce50bd3 100644 --- a/mace/core/runtime/opencl/opencl_runtime.h +++ b/mace/core/runtime/opencl/opencl_runtime.h @@ -56,7 +56,7 @@ class OpenCLRuntime { uint64_t GetKernelMaxWorkGroupSize(const cl::Kernel &kernel); uint64_t GetKernelWaveSize(const cl::Kernel &kernel); const GPU_TYPE GetGPUType() const; - const std::string &GetOpenclVersion(); + const std::string &GetOpenclVersion() const; cl::Kernel BuildKernel(const std::string &program_name, const std::string &kernel_name, const std::set &build_options); diff --git a/mace/kernels/opencl/bias_add_opencl.cc b/mace/kernels/opencl/bias_add_opencl.cc index ce0e596558ed76ee36dd84e40082aec718555948..2fb1252b1309b72b0218396e049e3ff68d89b874 100644 --- a/mace/kernels/opencl/bias_add_opencl.cc +++ b/mace/kernels/opencl/bias_add_opencl.cc @@ -71,7 +71,8 @@ void BiasAddFunctor::operator()(const Tensor *input, } error = runtime->command_queue().enqueueNDRangeKernel( - kernel_, cl::NullRange, cl::NDRange(roundup_gws[0], roundup_gws[1], roundup_gws[2]), + kernel_, cl::NullRange, + cl::NDRange(roundup_gws[0], roundup_gws[1], roundup_gws[2]), cl::NDRange(lws[0], lws[1], lws[2]), nullptr, &event); } MACE_CHECK(error == CL_SUCCESS); diff --git a/mace/kernels/opencl/concat.cc b/mace/kernels/opencl/concat.cc index 1ddf37bc901e6456e8bc5ace3742fd07f5ee788c..ce5a77182ff74522b168300aae9e1b3b2914f6d4 100644 --- a/mace/kernels/opencl/concat.cc +++ b/mace/kernels/opencl/concat.cc @@ -122,7 +122,7 @@ static void ConcatN(cl::Kernel *kernel, kernel->setArg(idx++, gws[2]); chan_blk_offset += input_channel_blk; - const uint32_t kwg_size = + const uint32_t kwg_size = static_cast(runtime->GetKernelMaxWorkGroupSize(*kernel)); const std::vector lws = {8, kwg_size / 64, 8, 1}; std::stringstream ss; diff --git a/mace/kernels/opencl/fully_connected_opencl.cc b/mace/kernels/opencl/fully_connected_opencl.cc index 208f402595d638dde161d90159c526615b473f54..e49226d0d9d18c92898beb06192c5f5cee77b7bc 100644 --- a/mace/kernels/opencl/fully_connected_opencl.cc +++ b/mace/kernels/opencl/fully_connected_opencl.cc @@ -111,7 +111,8 @@ void FCWXKernel(cl::Kernel *kernel, cl::Event event; cl_int error = runtime->command_queue().enqueueNDRangeKernel( - *kernel, cl::NullRange, cl::NDRange(roundup_gws[0], roundup_gws[1], roundup_gws[2]), + *kernel, cl::NullRange, + cl::NDRange(roundup_gws[0], roundup_gws[1], roundup_gws[2]), cl::NDRange((*lws)[0], (*lws)[1], (*lws)[2]), nullptr, &event); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; diff --git a/mace/kernels/opencl/helper.cc b/mace/kernels/opencl/helper.cc index 641abd664c190026314cc4419d8ee240ee031118..b386cfc6e057fe3d82e0fb306f1291a1947bf898 100644 --- a/mace/kernels/opencl/helper.cc +++ b/mace/kernels/opencl/helper.cc @@ -249,7 +249,7 @@ void TuningOrRun3DKernel(const cl::Kernel &kernel, << "Tuning parameters of 3D kernel must be 4D"; cl_int error = CL_SUCCESS; std::vector roundup_gws(3); - if(!is_qualcomm_opencl200) { + if (!is_qualcomm_opencl200) { for (size_t i = 0; i < 3; ++i) { roundup_gws[i] = RoundUp(gws[i], params[i]); } @@ -284,7 +284,8 @@ void TuningOrRun3DKernel(const cl::Kernel &kernel, cl::NDRange(params[0], params[1], params[2]), nullptr, &event); } else { error = runtime->command_queue().enqueueNDRangeKernel( - kernel, cl::NullRange, cl::NDRange(roundup_gws[0], roundup_gws[1], roundup_gws[2]), + kernel, cl::NullRange, + cl::NDRange(roundup_gws[0], roundup_gws[1], roundup_gws[2]), cl::NDRange(params[0], params[1], params[2]), nullptr, &event); } MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; @@ -387,7 +388,8 @@ void TuningOrRun2DKernel(const cl::Kernel &kernel, } else { uint32_t roundup_gws1 = RoundUp(gws1, params[1]); error = runtime->command_queue().enqueueNDRangeKernel( - kernel, cl::NDRange(0, i * block_size), cl::NDRange(roundup_gws[0], roundup_gws1), + kernel, cl::NDRange(0, i * block_size), + cl::NDRange(roundup_gws[0], roundup_gws1), cl::NDRange(params[0], params[1]), nullptr, &event); } MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; @@ -420,12 +422,14 @@ void TuningOrRun2DKernel(const cl::Kernel &kernel, (i == num_blocks - 1) ? (gws[1] - (i * block_size)) : block_size; if (is_qualcomm_opencl200) { error = runtime->command_queue().enqueueNDRangeKernel( - kernel, cl::NDRange(0, i * block_size), cl::NDRange(gws[0], gws1), - cl::NDRange(params[0], params[1]), nullptr, &event); + kernel, cl::NDRange(0, i * block_size), + cl::NDRange(gws[0], gws1), cl::NDRange(params[0], params[1]), + nullptr, &event); } else { uint32_t roundup_gws1 = RoundUp(gws1, params[1]); error = runtime->command_queue().enqueueNDRangeKernel( - kernel, cl::NDRange(0, i * block_size), cl::NDRange(roundup_gws[0], roundup_gws1), + kernel, cl::NDRange(0, i * block_size), + cl::NDRange(roundup_gws[0], roundup_gws1), cl::NDRange(params[0], params[1]), nullptr, &event); } MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; diff --git a/mace/kernels/reorganize.h b/mace/kernels/reorganize.h index 68c772090d5db75c5cf609da23ea82f2ccc844eb..a64d55b97400188dd99ff4cccbec2b8e92287dc7 100644 --- a/mace/kernels/reorganize.h +++ b/mace/kernels/reorganize.h @@ -74,7 +74,6 @@ struct ReOrganizeFunctor { } } } - } };