diff --git a/mace/kernels/opencl/cl/slice.cl b/mace/kernels/opencl/cl/slice.cl
index d1d4a55427e6fd0a969a406a0dfc1603ea305e82..d8d45bcbcfa4fd6416ab6ea417841e379082af50 100644
--- a/mace/kernels/opencl/cl/slice.cl
+++ b/mace/kernels/opencl/cl/slice.cl
@@ -8,6 +8,8 @@ __kernel void slice(__read_only image2d_t input,
   const int width = get_global_size(1);
   const int hb_idx = get_global_id(2);
   DATA_TYPE4 data = READ_IMAGET(input, SAMPLER,
-                                (int2)(mad24(chan_blk_idx + chan_blk_offset, width, width_idx), hb_idx));
-  WRITE_IMAGET(output, (int2)(mad24(chan_blk_idx, width, width_idx), hb_idx), data);
+                                (int2)(mad24(chan_blk_idx + chan_blk_offset,
+                                             width, width_idx), hb_idx));
+  WRITE_IMAGET(output,
+               (int2)(mad24(chan_blk_idx, width, width_idx), hb_idx), data);
 }
diff --git a/mace/kernels/opencl/slice.cc b/mace/kernels/opencl/slice.cc
index 15201f13253c9f2636f54466c4ae91c65cc6f7b3..63efc555dbf8a743e3fc6881a06e0202480bbd16 100644
--- a/mace/kernels/opencl/slice.cc
+++ b/mace/kernels/opencl/slice.cc
@@ -11,13 +11,15 @@ namespace mace {
 namespace kernels {
 
 template<typename T>
-void SliceFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
-                                                     const std::vector<Tensor *> &output_list,
-                                                     StatsFuture *future) {
+void SliceFunctor<DeviceType::OPENCL, T>::operator()(
+    const Tensor *input,
+    const std::vector<Tensor *> &output_list,
+    StatsFuture *future) {
   const index_t input_channels = input->dim(3);
   const size_t outputs_count = output_list.size();
   const index_t output_channels = input_channels / outputs_count;
-  MACE_CHECK(output_channels % 4 == 0) << "output channels of slice op must be divisible by 4";
+  MACE_CHECK(output_channels % 4 == 0)
+    << "output channels of slice op must be divisible by 4";
   std::vector<index_t> output_shape({input->dim(0), input->dim(1),
                                      input->dim(2), output_channels});
 
@@ -33,7 +35,8 @@ void SliceFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
     std::string kernel_name = MACE_OBFUSCATE_SYMBOL("slice");
     built_options.emplace("-Dslice=" + kernel_name);
     built_options.emplace("-DDATA_TYPE=" + DtToCLDt(DataTypeToEnum<T>::value));
-    built_options.emplace("-DCMD_DATA_TYPE=" + DtToCLCMDDt(DataTypeToEnum<T>::value));
+    built_options.emplace("-DCMD_DATA_TYPE="
+                           + DtToCLCMDDt(DataTypeToEnum<T>::value));
     kernel_ = runtime->BuildKernel("slice", kernel_name, built_options);
   }
   const index_t channel_blk = RoundUpDiv4(output_channels);
@@ -53,9 +56,9 @@ void SliceFunctor<DeviceType::OPENCL, T>::operator()(const Tensor *input,
      << outputs_count;
   for (int i = 0; i < outputs_count; ++i) {
     uint32_t idx = 0;
-    kernel_.setArg(idx++, *(static_cast<const cl::Image2D *>(input->opencl_image())));
+    kernel_.setArg(idx++, *(input->opencl_image()));
     kernel_.setArg(idx++, static_cast<int32_t>(channel_blk * i));
-    kernel_.setArg(idx++, *(static_cast<cl::Image2D *>(output_list[i]->opencl_image())));
+    kernel_.setArg(idx++, *(output_list[i]->opencl_image()));
 
     TuningOrRun3DKernel(kernel_, ss.str(), gws, lws, future);
   }
diff --git a/mace/kernels/slice.h b/mace/kernels/slice.h
index 619e28ab2d7f03aae196e1328b4fc195a4bc505c..b08ea7ef4fcd1e235375952085e9965c7f897334 100644
--- a/mace/kernels/slice.h
+++ b/mace/kernels/slice.h
@@ -41,7 +41,8 @@ struct SliceFunctor {
       int output_idx = outer_idx * output_channels;
       for (size_t i = 0; i < outputs_count; ++i) {
         if (DataTypeCanUseMemcpy(DataTypeToEnum<T>::v())) {
-          memcpy(output_ptrs[i]+output_idx, input_ptr+input_idx, output_channels * sizeof(T));
+          memcpy(output_ptrs[i]+output_idx, input_ptr+input_idx,
+                 output_channels * sizeof(T));
         } else {
           for (index_t k = 0; k < output_channels; ++k) {
             *(output_ptrs[i] + output_idx + k) = *(input_ptr + input_idx + k);