提交 487231a8 编写于 作者: U Unknown

fix depth to space opencl bugs

上级 7e1bb423
......@@ -65,7 +65,7 @@ struct EltwiseFunctor : EltwiseFunctorBase {
#pragma omp parallel for
for (index_t i = 0; i < size; ++i) {
output_ptr[i] =
coeff_[0] * input0_ptr[i] + coeff_[1] * input1_ptr[i];
coeff_[0] * input0_ptr[i] + coeff_[1] * input1_ptr[i];
}
}
break;
......
......@@ -15,16 +15,6 @@ __kernel void depth_to_space(
const int out_w = get_global_id(1);
const int out_h = get_global_id(2);
#ifndef NON_UNIFORM_WORK_GROUP
if (out_d >= global_size_dim0 || out_w >= global_size_dim1
|| out_h >= global_size_dim2) {
return;
}
const int output_width = global_size_dim1;
#else
const int output_width = get_global_size(1);
#endif
if (out_d >= output_depth_blocks || out_h >= output_height || out_w >= output_width)
return;
......@@ -61,15 +51,8 @@ __kernel void space_to_depth(
const int w = get_global_id(1);
const int h = get_global_id(2);
#ifndef NON_UNIFORM_WORK_GROUP
if (d >= global_size_dim0 || w >= global_size_dim1
|| h >= global_size_dim2) {
if (h >= input_height || w >= input_width || d >= input_depth_blocks)
return;
}
const int input_width = global_size_dim1;
#else
const int input_width = get_global_size(1);
#endif
const int in_pos = mad24(d, input_width, w);
......@@ -84,8 +67,6 @@ __kernel void space_to_depth(
return;
const int out_pos = mad24(out_d, output_width, out_w);
DATA_TYPE4 in_data = READ_IMAGET(input, SAMPLER, (int2)(in_pos, h));
WRITE_IMAGET(output, (int2)(out_pos, out_h), in_data);
}
......@@ -70,13 +70,13 @@ void DepthToSpaceOpFunctor<DeviceType::OPENCL, T>::operator()(
std::stringstream ss;
if (!IsVecEqual(input_shape_, input->shape())) {
if (d2s_) {
gws[0] = static_cast<uint32_t>(depth_blocks);
gws[0] = static_cast<uint32_t>(output_depth_blocks);
gws[1] = static_cast<uint32_t>(output_width);
gws[2] = static_cast<uint32_t>(output_height * batch);
ss << "depth_to_space_opencl_kernel_" << output->dim(0) << "_"
<< output->dim(1) << "_" << output->dim(2) << "_" << output->dim(3);
} else {
gws[0] = static_cast<uint32_t>(depth_blocks);
gws[0] = static_cast<uint32_t>(input_depth_blocks);
gws[1] = static_cast<uint32_t>(input_width);
gws[2] = static_cast<uint32_t>(input_height * batch);
ss << "space_to_depth_opencl_kernel_" << input->dim(0) << "_"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册