diff --git a/mace/kernels/opencl/cl/slice.cl b/mace/kernels/opencl/cl/slice.cl index d1d4a55427e6fd0a969a406a0dfc1603ea305e82..d8d45bcbcfa4fd6416ab6ea417841e379082af50 100644 --- a/mace/kernels/opencl/cl/slice.cl +++ b/mace/kernels/opencl/cl/slice.cl @@ -8,6 +8,8 @@ __kernel void slice(__read_only image2d_t input, const int width = get_global_size(1); const int hb_idx = get_global_id(2); DATA_TYPE4 data = READ_IMAGET(input, SAMPLER, - (int2)(mad24(chan_blk_idx + chan_blk_offset, width, width_idx), hb_idx)); - WRITE_IMAGET(output, (int2)(mad24(chan_blk_idx, width, width_idx), hb_idx), data); + (int2)(mad24(chan_blk_idx + chan_blk_offset, + width, width_idx), hb_idx)); + WRITE_IMAGET(output, + (int2)(mad24(chan_blk_idx, width, width_idx), hb_idx), data); } diff --git a/mace/kernels/opencl/slice.cc b/mace/kernels/opencl/slice.cc index 15201f13253c9f2636f54466c4ae91c65cc6f7b3..63efc555dbf8a743e3fc6881a06e0202480bbd16 100644 --- a/mace/kernels/opencl/slice.cc +++ b/mace/kernels/opencl/slice.cc @@ -11,13 +11,15 @@ namespace mace { namespace kernels { template -void SliceFunctor::operator()(const Tensor *input, - const std::vector &output_list, - StatsFuture *future) { +void SliceFunctor::operator()( + const Tensor *input, + const std::vector &output_list, + StatsFuture *future) { const index_t input_channels = input->dim(3); const size_t outputs_count = output_list.size(); const index_t output_channels = input_channels / outputs_count; - MACE_CHECK(output_channels % 4 == 0) << "output channels of slice op must be divisible by 4"; + MACE_CHECK(output_channels % 4 == 0) + << "output channels of slice op must be divisible by 4"; std::vector output_shape({input->dim(0), input->dim(1), input->dim(2), output_channels}); @@ -33,7 +35,8 @@ void SliceFunctor::operator()(const Tensor *input, std::string kernel_name = MACE_OBFUSCATE_SYMBOL("slice"); built_options.emplace("-Dslice=" + kernel_name); built_options.emplace("-DDATA_TYPE=" + DtToCLDt(DataTypeToEnum::value)); - built_options.emplace("-DCMD_DATA_TYPE=" + DtToCLCMDDt(DataTypeToEnum::value)); + built_options.emplace("-DCMD_DATA_TYPE=" + + DtToCLCMDDt(DataTypeToEnum::value)); kernel_ = runtime->BuildKernel("slice", kernel_name, built_options); } const index_t channel_blk = RoundUpDiv4(output_channels); @@ -53,9 +56,9 @@ void SliceFunctor::operator()(const Tensor *input, << outputs_count; for (int i = 0; i < outputs_count; ++i) { uint32_t idx = 0; - kernel_.setArg(idx++, *(static_cast(input->opencl_image()))); + kernel_.setArg(idx++, *(input->opencl_image())); kernel_.setArg(idx++, static_cast(channel_blk * i)); - kernel_.setArg(idx++, *(static_cast(output_list[i]->opencl_image()))); + kernel_.setArg(idx++, *(output_list[i]->opencl_image())); TuningOrRun3DKernel(kernel_, ss.str(), gws, lws, future); } diff --git a/mace/kernels/slice.h b/mace/kernels/slice.h index 619e28ab2d7f03aae196e1328b4fc195a4bc505c..b08ea7ef4fcd1e235375952085e9965c7f897334 100644 --- a/mace/kernels/slice.h +++ b/mace/kernels/slice.h @@ -41,7 +41,8 @@ struct SliceFunctor { int output_idx = outer_idx * output_channels; for (size_t i = 0; i < outputs_count; ++i) { if (DataTypeCanUseMemcpy(DataTypeToEnum::v())) { - memcpy(output_ptrs[i]+output_idx, input_ptr+input_idx, output_channels * sizeof(T)); + memcpy(output_ptrs[i]+output_idx, input_ptr+input_idx, + output_channels * sizeof(T)); } else { for (index_t k = 0; k < output_channels; ++k) { *(output_ptrs[i] + output_idx + k) = *(input_ptr + input_idx + k);