提交 487231a8 编写于 作者: U Unknown

fix depth to space opencl bugs

上级 7e1bb423
...@@ -65,7 +65,7 @@ struct EltwiseFunctor : EltwiseFunctorBase { ...@@ -65,7 +65,7 @@ struct EltwiseFunctor : EltwiseFunctorBase {
#pragma omp parallel for #pragma omp parallel for
for (index_t i = 0; i < size; ++i) { for (index_t i = 0; i < size; ++i) {
output_ptr[i] = output_ptr[i] =
coeff_[0] * input0_ptr[i] + coeff_[1] * input1_ptr[i]; coeff_[0] * input0_ptr[i] + coeff_[1] * input1_ptr[i];
} }
} }
break; break;
......
...@@ -15,16 +15,6 @@ __kernel void depth_to_space( ...@@ -15,16 +15,6 @@ __kernel void depth_to_space(
const int out_w = get_global_id(1); const int out_w = get_global_id(1);
const int out_h = get_global_id(2); const int out_h = get_global_id(2);
#ifndef NON_UNIFORM_WORK_GROUP
if (out_d >= global_size_dim0 || out_w >= global_size_dim1
|| out_h >= global_size_dim2) {
return;
}
const int output_width = global_size_dim1;
#else
const int output_width = get_global_size(1);
#endif
if (out_d >= output_depth_blocks || out_h >= output_height || out_w >= output_width) if (out_d >= output_depth_blocks || out_h >= output_height || out_w >= output_width)
return; return;
...@@ -61,15 +51,8 @@ __kernel void space_to_depth( ...@@ -61,15 +51,8 @@ __kernel void space_to_depth(
const int w = get_global_id(1); const int w = get_global_id(1);
const int h = get_global_id(2); const int h = get_global_id(2);
#ifndef NON_UNIFORM_WORK_GROUP if (h >= input_height || w >= input_width || d >= input_depth_blocks)
if (d >= global_size_dim0 || w >= global_size_dim1
|| h >= global_size_dim2) {
return; return;
}
const int input_width = global_size_dim1;
#else
const int input_width = get_global_size(1);
#endif
const int in_pos = mad24(d, input_width, w); const int in_pos = mad24(d, input_width, w);
...@@ -84,8 +67,6 @@ __kernel void space_to_depth( ...@@ -84,8 +67,6 @@ __kernel void space_to_depth(
return; return;
const int out_pos = mad24(out_d, output_width, out_w); const int out_pos = mad24(out_d, output_width, out_w);
DATA_TYPE4 in_data = READ_IMAGET(input, SAMPLER, (int2)(in_pos, h)); DATA_TYPE4 in_data = READ_IMAGET(input, SAMPLER, (int2)(in_pos, h));
WRITE_IMAGET(output, (int2)(out_pos, out_h), in_data); WRITE_IMAGET(output, (int2)(out_pos, out_h), in_data);
} }
...@@ -70,13 +70,13 @@ void DepthToSpaceOpFunctor<DeviceType::OPENCL, T>::operator()( ...@@ -70,13 +70,13 @@ void DepthToSpaceOpFunctor<DeviceType::OPENCL, T>::operator()(
std::stringstream ss; std::stringstream ss;
if (!IsVecEqual(input_shape_, input->shape())) { if (!IsVecEqual(input_shape_, input->shape())) {
if (d2s_) { if (d2s_) {
gws[0] = static_cast<uint32_t>(depth_blocks); gws[0] = static_cast<uint32_t>(output_depth_blocks);
gws[1] = static_cast<uint32_t>(output_width); gws[1] = static_cast<uint32_t>(output_width);
gws[2] = static_cast<uint32_t>(output_height * batch); gws[2] = static_cast<uint32_t>(output_height * batch);
ss << "depth_to_space_opencl_kernel_" << output->dim(0) << "_" ss << "depth_to_space_opencl_kernel_" << output->dim(0) << "_"
<< output->dim(1) << "_" << output->dim(2) << "_" << output->dim(3); << output->dim(1) << "_" << output->dim(2) << "_" << output->dim(3);
} else { } else {
gws[0] = static_cast<uint32_t>(depth_blocks); gws[0] = static_cast<uint32_t>(input_depth_blocks);
gws[1] = static_cast<uint32_t>(input_width); gws[1] = static_cast<uint32_t>(input_width);
gws[2] = static_cast<uint32_t>(input_height * batch); gws[2] = static_cast<uint32_t>(input_height * batch);
ss << "space_to_depth_opencl_kernel_" << input->dim(0) << "_" ss << "space_to_depth_opencl_kernel_" << input->dim(0) << "_"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册