提交 1794dae4 编写于 作者: L liuqi

Add more strategy for convolution opencl default lws.

上级 cf5cae14
......@@ -23,6 +23,8 @@ namespace kernels {
namespace {
// (inputs + weights + outputs) * array_size * sizeof(float)
const uint32_t kernel_cache_size = (4 + 4 + 4) * 4 * 4;
// TODO(liuqi): Fix the specific value.
const uint32_t lws_limit = 128;
std::vector<uint32_t> LocalWS(const uint32_t *gws,
const uint32_t kwg_size) {
std::vector<uint32_t> lws(4, 0);
......@@ -33,6 +35,8 @@ std::vector<uint32_t> LocalWS(const uint32_t *gws,
lws[1] = std::min<uint32_t>(gws[1], kwg_size);
if (lws[1] >= base) {
lws[0] = std::min<uint32_t>(gws[0], base);
} else if ((1 < lws[1] && lws[1] < base) && gws[0] >= lws_limit) {
lws[0] = std::min<uint32_t>(gws[0], base);
} else {
lws[0] = gws[0] / 8;
if (lws[0] < base) {
......
......@@ -21,7 +21,6 @@
namespace mace {
namespace kernels {
namespace {
// (inputs + weights + outputs) * array_size * sizeof(float)
const uint32_t kernel_cache_size = (5 + 4 + 5) * 4 * 4;
......@@ -157,7 +156,7 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel,
*prev_input_shape = input->shape();
}
const std::vector<uint32_t> lws = LocalWS(gws, *kwg_size);
std::vector<uint32_t> lws = LocalWS(gws, *kwg_size);
std::string tuning_key =
Concat("conv2d_3x3_opencl_kernel", output->dim(0),
output->dim(1), output->dim(2), output->dim(3));
......
......@@ -215,7 +215,6 @@ std::vector<uint32_t> Default2DLocalWS(const uint32_t *gws,
lws[0] = std::min<uint32_t>(base, kwg_size);
lws[1] = kwg_size / lws[1];
return lws;
}
std::vector<uint32_t> Default3DLocalWS(const uint32_t *gws,
......
......@@ -118,7 +118,6 @@ std::vector<uint32_t> Default2DLocalWS(const uint32_t *gws,
const uint32_t kwg_size);
std::vector<uint32_t> Default3DLocalWS(const uint32_t *gws,
const uint32_t kwg_size);
} // namespace kernels
} // namespace mace
#endif // MACE_KERNELS_OPENCL_HELPER_H_
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册