From 16703420eaa64af52a18efd29d59a7be17399edb Mon Sep 17 00:00:00 2001 From: wuchenghui Date: Tue, 20 Mar 2018 10:35:52 +0800 Subject: [PATCH] fix cpplint for mace/kernels --- mace/kernels/activation.h | 4 + mace/kernels/addn.h | 3 +- mace/kernels/batch_norm.h | 3 +- mace/kernels/bias_add.h | 4 +- mace/kernels/buffer_to_image.h | 10 ++- mace/kernels/channel_shuffle.h | 6 +- mace/kernels/concat.h | 10 ++- mace/kernels/conv_2d.h | 18 ++-- mace/kernels/conv_pool_2d_util.cc | 8 +- mace/kernels/depthwise_conv2d.h | 6 +- mace/kernels/eltwise.h | 3 + mace/kernels/fully_connected.h | 2 + mace/kernels/matmul.h | 3 - mace/kernels/neon/batch_norm_neon.cc | 2 +- mace/kernels/neon/conv_2d_neon_1x1.cc | 8 +- mace/kernels/opencl/addn.cc | 6 +- mace/kernels/opencl/bias_add_opencl.cc | 1 - mace/kernels/opencl/buffer_to_image.cc | 2 +- mace/kernels/opencl/channel_shuffle.cc | 4 +- mace/kernels/opencl/concat.cc | 5 +- mace/kernels/opencl/conv_2d_opencl.cc | 3 +- mace/kernels/opencl/conv_2d_opencl_1x1.cc | 1 - mace/kernels/opencl/depthwise_conv_opencl.cc | 25 +++--- mace/kernels/opencl/eltwise_opencl.cc | 1 - mace/kernels/opencl/fully_connected_opencl.cc | 46 +++++----- mace/kernels/opencl/helper.cc | 86 ++++++++++--------- mace/kernels/opencl/helper.h | 11 +-- mace/kernels/opencl/matmul.cc | 4 +- mace/kernels/opencl/pooling_opencl.cc | 13 +-- mace/kernels/opencl/resize_bilinear_opencl.cc | 4 +- mace/kernels/opencl/slice.cc | 2 +- mace/kernels/opencl/softmax_opencl.cc | 1 - mace/kernels/opencl/space_to_batch_opencl.cc | 4 +- mace/kernels/opencl/winograd_transform.cc | 6 +- mace/kernels/pooling.h | 9 +- mace/kernels/reshape.h | 4 +- mace/kernels/resize_bilinear.h | 5 +- mace/kernels/slice.h | 7 +- mace/kernels/softmax.h | 8 +- mace/kernels/space_to_batch.h | 8 +- mace/kernels/winograd_transform.h | 2 + 41 files changed, 194 insertions(+), 164 deletions(-) diff --git a/mace/kernels/activation.h b/mace/kernels/activation.h index 1e3601a4..55368c3c 100644 --- a/mace/kernels/activation.h +++ b/mace/kernels/activation.h @@ -5,6 +5,10 @@ #ifndef MACE_KERNELS_ACTIVATION_H_ #define MACE_KERNELS_ACTIVATION_H_ +#include +#include +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" diff --git a/mace/kernels/addn.h b/mace/kernels/addn.h index 3a5a45df..70d9583b 100644 --- a/mace/kernels/addn.h +++ b/mace/kernels/addn.h @@ -8,6 +8,7 @@ #if defined(MACE_ENABLE_NEON) && defined(__aarch64__) #include #endif +#include #include #include "mace/core/future.h" @@ -17,9 +18,7 @@ namespace mace { namespace kernels { -namespace { constexpr int kCostPerGroup = 1024; -} // namespace template struct AddNFunctor { diff --git a/mace/kernels/batch_norm.h b/mace/kernels/batch_norm.h index 57f0f4d6..28b8d776 100644 --- a/mace/kernels/batch_norm.h +++ b/mace/kernels/batch_norm.h @@ -8,6 +8,7 @@ #if defined(MACE_ENABLE_NEON) && defined(__aarch64__) #include #endif +#include #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" @@ -159,7 +160,7 @@ struct BatchNormFunctor : BatchNormFunctorBase { std::vector input_shape_; }; -} // namepsace kernels +} // namespace kernels } // namespace mace #endif // MACE_KERNELS_BATCH_NORM_H_ diff --git a/mace/kernels/bias_add.h b/mace/kernels/bias_add.h index d8e411ef..d5372850 100644 --- a/mace/kernels/bias_add.h +++ b/mace/kernels/bias_add.h @@ -5,6 +5,8 @@ #ifndef MACE_KERNELS_BIAS_ADD_H_ #define MACE_KERNELS_BIAS_ADD_H_ +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" @@ -65,7 +67,7 @@ struct BiasAddFunctor { std::vector input_shape_; }; -} // namepsace kernels +} // namespace kernels } // namespace mace #endif // MACE_KERNELS_BIAS_ADD_H_ diff --git a/mace/kernels/buffer_to_image.h b/mace/kernels/buffer_to_image.h index 3292e993..2956762d 100644 --- a/mace/kernels/buffer_to_image.h +++ b/mace/kernels/buffer_to_image.h @@ -13,13 +13,14 @@ namespace mace { namespace kernels { struct BufferToImageFunctorBase { - BufferToImageFunctorBase(bool i2b) : i2b_(i2b) {} + explicit BufferToImageFunctorBase(bool i2b) : i2b_(i2b) {} bool i2b_; }; template struct BufferToImageFunctor : BufferToImageFunctorBase { - BufferToImageFunctor(bool i2b = false) : BufferToImageFunctorBase(i2b) {} + explicit BufferToImageFunctor(bool i2b = false) + : BufferToImageFunctorBase(i2b) {} void operator()(Tensor *input, const BufferType type, Tensor *output, @@ -30,14 +31,15 @@ struct BufferToImageFunctor : BufferToImageFunctorBase { template struct BufferToImageFunctor : BufferToImageFunctorBase { - BufferToImageFunctor(bool i2b = false) : BufferToImageFunctorBase(i2b) {} + explicit BufferToImageFunctor(bool i2b = false) + : BufferToImageFunctorBase(i2b) {} void operator()(Tensor *input, const BufferType type, Tensor *output, StatsFuture *future); }; -} // namepsace kernels +} // namespace kernels } // namespace mace #endif // MACE_KERNELS_BUFFER_TO_IMAGE_H_ diff --git a/mace/kernels/channel_shuffle.h b/mace/kernels/channel_shuffle.h index da2ce094..f1e25833 100644 --- a/mace/kernels/channel_shuffle.h +++ b/mace/kernels/channel_shuffle.h @@ -5,6 +5,8 @@ #ifndef MACE_KERNELS_CHANNEL_SHUFFLE_H_ #define MACE_KERNELS_CHANNEL_SHUFFLE_H_ +#include + #include "mace/core/future.h" #include "mace/core/tensor.h" @@ -13,7 +15,7 @@ namespace kernels { template struct ChannelShuffleFunctor { - ChannelShuffleFunctor(const int groups) : groups_(groups) {} + explicit ChannelShuffleFunctor(const int groups) : groups_(groups) {} void operator()(const Tensor *input, Tensor *output, @@ -49,7 +51,7 @@ struct ChannelShuffleFunctor { template struct ChannelShuffleFunctor { - ChannelShuffleFunctor(const int groups) : groups_(groups) {} + explicit ChannelShuffleFunctor(const int groups) : groups_(groups) {} void operator()(const Tensor *input, Tensor *output, StatsFuture *future); diff --git a/mace/kernels/concat.h b/mace/kernels/concat.h index 68705946..de34ed69 100644 --- a/mace/kernels/concat.h +++ b/mace/kernels/concat.h @@ -5,6 +5,8 @@ #ifndef MACE_KERNELS_CONCAT_H_ #define MACE_KERNELS_CONCAT_H_ +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" @@ -15,14 +17,14 @@ namespace mace { namespace kernels { struct ConcatFunctorBase { - ConcatFunctorBase(const int32_t axis) : axis_(axis) {} + explicit ConcatFunctorBase(const int32_t axis) : axis_(axis) {} int32_t axis_; }; template struct ConcatFunctor : ConcatFunctorBase { - ConcatFunctor(const int32_t axis) : ConcatFunctorBase(axis) {} + explicit ConcatFunctor(const int32_t axis) : ConcatFunctorBase(axis) {} void operator()(const std::vector &input_list, Tensor *output, @@ -77,7 +79,7 @@ struct ConcatFunctor : ConcatFunctorBase { template struct ConcatFunctor : ConcatFunctorBase { - ConcatFunctor(const int32_t axis) : ConcatFunctorBase(axis) {} + explicit ConcatFunctor(const int32_t axis) : ConcatFunctorBase(axis) {} void operator()(const std::vector &input_list, Tensor *output, @@ -86,7 +88,7 @@ struct ConcatFunctor : ConcatFunctorBase { std::vector input_shape_; }; -} // namepsace kernels +} // namespace kernels } // namespace mace #endif // MACE_KERNELS_CONCAT_H_ diff --git a/mace/kernels/conv_2d.h b/mace/kernels/conv_2d.h index b107d332..47516291 100644 --- a/mace/kernels/conv_2d.h +++ b/mace/kernels/conv_2d.h @@ -8,6 +8,8 @@ #if defined(MACE_ENABLE_NEON) && defined(__aarch64__) #include #endif +#include +#include #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" @@ -18,7 +20,6 @@ namespace mace { namespace kernels { -namespace { template mutable_data(); constexpr int inc_tile_size = 4; -// TODO Auto tuning these parameters +// TODO(heliangliang) Auto tuning these parameters #if defined(MACE_ENABLE_NEON) && defined(__aarch64__) const int c_tile_size = 4; const int h_tile_size = 2; diff --git a/mace/kernels/conv_pool_2d_util.cc b/mace/kernels/conv_pool_2d_util.cc index b1a83782..9bbbdcf1 100644 --- a/mace/kernels/conv_pool_2d_util.cc +++ b/mace/kernels/conv_pool_2d_util.cc @@ -4,6 +4,8 @@ #include "mace/kernels/conv_pool_2d_util.h" +#include + namespace mace { namespace kernels { @@ -56,7 +58,7 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW } // Note: TensorFlow may padded one more on the right/bottom side - // TODO may be it's better to also truncate the left/top to + // TODO(liuqi): may be it's better to also truncate the left/top to // utilize the more centered features. We need to benchmark // based on the model accuracy. @@ -120,7 +122,7 @@ void CalcNHWCPaddingAndOutputSize(const index_t *input_shape, // NHWC } // Note: TensorFlow may padded one more on the right/bottom side - // TODO may be it's better to also truncate the left/top to + // TODO(liuqi): may be it's better to also truncate the left/top to // utilize the more centered features. We need to benchmark // based on the model accuracy. @@ -219,7 +221,7 @@ void CalPaddingSize(const index_t *input_shape, // NCHW } // Note: TensorFlow may padded one more on the right/bottom side - // TODO may be it's better to also truncate the left/top to + // TODO(liuqi): may be it's better to also truncate the left/top to // utilize the more centered features. We need to benchmark // based on the model accuracy. padding_size[0] = std::max( diff --git a/mace/kernels/depthwise_conv2d.h b/mace/kernels/depthwise_conv2d.h index dc6b7370..166ea18a 100644 --- a/mace/kernels/depthwise_conv2d.h +++ b/mace/kernels/depthwise_conv2d.h @@ -8,6 +8,8 @@ #if defined(MACE_ENABLE_NEON) && defined(__aarch64__) #include #endif +#include +#include #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" @@ -17,8 +19,6 @@ namespace mace { namespace kernels { -namespace { - template void DepthwiseConv2dKernel(const T *input_ptr, const T *filter_ptr, @@ -233,8 +233,6 @@ void DepthwiseConv2dNoOOBCheckKernel(const T *input_ptr, } } -} // namespace - struct DepthwiseConv2dFunctorBase { DepthwiseConv2dFunctorBase(const int *strides, const Padding padding_type, diff --git a/mace/kernels/eltwise.h b/mace/kernels/eltwise.h index 1aa883d5..0f9e9b40 100644 --- a/mace/kernels/eltwise.h +++ b/mace/kernels/eltwise.h @@ -4,6 +4,9 @@ #ifndef MACE_KERNELS_ELTWISE_H_ #define MACE_KERNELS_ELTWISE_H_ +#include +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" diff --git a/mace/kernels/fully_connected.h b/mace/kernels/fully_connected.h index 5c527d45..4ab38529 100644 --- a/mace/kernels/fully_connected.h +++ b/mace/kernels/fully_connected.h @@ -5,6 +5,8 @@ #ifndef MACE_KERNELS_FULLY_CONNECTED_H_ #define MACE_KERNELS_FULLY_CONNECTED_H_ +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" diff --git a/mace/kernels/matmul.h b/mace/kernels/matmul.h index 88452bfe..62590400 100644 --- a/mace/kernels/matmul.h +++ b/mace/kernels/matmul.h @@ -21,7 +21,6 @@ namespace mace { namespace kernels { -namespace { template(a_ptr_batch_base, \ @@ -118,7 +116,6 @@ switch (k_count) { \ LOG(FATAL) << "Unsupported k tile: " << k_count; \ } - #define MACE_CASE_W_MATMUL(HC) \ switch (w_count) { \ case 1: \ diff --git a/mace/kernels/neon/batch_norm_neon.cc b/mace/kernels/neon/batch_norm_neon.cc index 19094ef7..930a0c5d 100644 --- a/mace/kernels/neon/batch_norm_neon.cc +++ b/mace/kernels/neon/batch_norm_neon.cc @@ -78,7 +78,7 @@ void BatchNormFunctor::operator()( } } } -}; +} } // namespace kernels } // namespace mace diff --git a/mace/kernels/neon/conv_2d_neon_1x1.cc b/mace/kernels/neon/conv_2d_neon_1x1.cc index c098587c..14c20cc3 100644 --- a/mace/kernels/neon/conv_2d_neon_1x1.cc +++ b/mace/kernels/neon/conv_2d_neon_1x1.cc @@ -296,7 +296,7 @@ void Conv2dNeonK1x1S1(const float *input, // NCHW } } } -}; +} void Conv2dNeonPixelK1x1S1( const float *input, // NCHW @@ -321,7 +321,7 @@ void Conv2dNeonPixelK1x1S1( const index_t total_pixels = height * width; // Process 4 * 2 = 8 pixels for each innermost loop - // TODO Does 64 bit v.s. 32 bit index matters? need benchmark + // TODO(heliangliang): Does 64 bit v.s. 32 bit index matters? need benchmark const index_t total_loops = total_pixels >> 3; const index_t loop_remaining = total_pixels & 7; @@ -329,7 +329,7 @@ void Conv2dNeonPixelK1x1S1( for (index_t n = 0; n < batch; ++n) { for (index_t c = 0; c < channels; ++c) { const float *filter_ptr = filter + c * input_channels; - // TODO Will GCC opt these out? + // TODO(heliangliang): Will GCC opt these out? float *channel_output_start = output + n * channels * height * width + c * height * width; const float *input_ptr = @@ -469,7 +469,7 @@ void Conv2dNeonPixelK1x1S1( } } } -}; +} } // namespace kernels } // namespace mace diff --git a/mace/kernels/opencl/addn.cc b/mace/kernels/opencl/addn.cc index 9f9571d0..e7869bb2 100644 --- a/mace/kernels/opencl/addn.cc +++ b/mace/kernels/opencl/addn.cc @@ -45,7 +45,6 @@ void AddNFunctor::operator()( built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); built_options.emplace(MakeString("-DINPUT_NUM=", input_tensors.size())); kernel_ = runtime->BuildKernel("addn", kernel_name, built_options); - } std::vector output_shape = input_tensors[0]->shape(); @@ -56,7 +55,8 @@ void AddNFunctor::operator()( if (!IsVecEqual(input_shape_, input_tensors[0]->shape())) { std::vector output_image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, + &output_image_shape); output_tensor->ResizeImage(output_shape, output_image_shape); uint32_t idx = 0; @@ -75,7 +75,7 @@ void AddNFunctor::operator()( ss << "addn_opencl_kernel_" << output_shape[0] << "_" << output_shape[1] << "_" << output_shape[2] << "_" << output_shape[3]; TuningOrRun2DKernel(kernel_, ss.str(), gws, lws, future); -}; +} template struct AddNFunctor; diff --git a/mace/kernels/opencl/bias_add_opencl.cc b/mace/kernels/opencl/bias_add_opencl.cc index d2490000..3d4c4ec5 100644 --- a/mace/kernels/opencl/bias_add_opencl.cc +++ b/mace/kernels/opencl/bias_add_opencl.cc @@ -32,7 +32,6 @@ void BiasAddFunctor::operator()(const Tensor *input, built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); kernel_ = runtime->BuildKernel("bias_add", kernel_name, built_options); - } if (!IsVecEqual(input_shape_, input->shape())) { uint32_t idx = 0; diff --git a/mace/kernels/opencl/buffer_to_image.cc b/mace/kernels/opencl/buffer_to_image.cc index b0fa30a5..126fda77 100644 --- a/mace/kernels/opencl/buffer_to_image.cc +++ b/mace/kernels/opencl/buffer_to_image.cc @@ -14,7 +14,7 @@ void BufferToImageFunctor::operator()( Tensor *buffer, const BufferType type, Tensor *image, StatsFuture *future) { std::vector image_shape; if (!i2b_) { - CalImage2DShape(buffer->shape(), type, image_shape); + CalImage2DShape(buffer->shape(), type, &image_shape); if (type == WINOGRAD_FILTER) { std::vector new_shape = CalWinogradShape(buffer->shape(), type); image->ResizeImage(new_shape, image_shape); diff --git a/mace/kernels/opencl/channel_shuffle.cc b/mace/kernels/opencl/channel_shuffle.cc index a88b3b05..78d855e2 100644 --- a/mace/kernels/opencl/channel_shuffle.cc +++ b/mace/kernels/opencl/channel_shuffle.cc @@ -39,7 +39,8 @@ void ChannelShuffleFunctor::operator()( auto dt = DataTypeToEnum::value; built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); - kernel_ = runtime->BuildKernel("channel_shuffle", kernel_name, built_options); + kernel_ = runtime->BuildKernel("channel_shuffle", kernel_name, + built_options); } if (!IsVecEqual(input_shape_, input->shape())) { uint32_t idx = 0; @@ -61,7 +62,6 @@ void ChannelShuffleFunctor::operator()( << output->dim(2) << "_" << output->dim(3); TuningOrRun3DKernel(kernel_, ss.str(), gws, lws, future); - } template diff --git a/mace/kernels/opencl/concat.cc b/mace/kernels/opencl/concat.cc index e99ab060..da8671db 100644 --- a/mace/kernels/opencl/concat.cc +++ b/mace/kernels/opencl/concat.cc @@ -41,7 +41,6 @@ static void Concat2(cl::Kernel *kernel, built_options.emplace("-DDIVISIBLE_FOUR"); } *kernel = runtime->BuildKernel("concat", kernel_name, built_options); - } if (!IsVecEqual(*prev_input_shape, input0->shape())) { uint32_t idx = 0; @@ -140,7 +139,7 @@ void ConcatFunctor::operator()( inputs_count == 2 || divisible_four, "Dimensions of inputs should be divisible by 4 when inputs_count > 2."); std::vector image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &image_shape); output->ResizeImage(output_shape, image_shape); switch (inputs_count) { @@ -155,7 +154,7 @@ void ConcatFunctor::operator()( MACE_NOT_IMPLEMENTED; } } -}; +} template struct ConcatFunctor; template struct ConcatFunctor; diff --git a/mace/kernels/opencl/conv_2d_opencl.cc b/mace/kernels/opencl/conv_2d_opencl.cc index 46683fd1..468d80f0 100644 --- a/mace/kernels/opencl/conv_2d_opencl.cc +++ b/mace/kernels/opencl/conv_2d_opencl.cc @@ -92,7 +92,8 @@ void Conv2dFunctor::operator()(const Tensor *input, } std::vector output_image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, + &output_image_shape); output->ResizeImage(output_shape, output_image_shape); if (kernel_h == kernel_w && kernel_h <= 5 && diff --git a/mace/kernels/opencl/conv_2d_opencl_1x1.cc b/mace/kernels/opencl/conv_2d_opencl_1x1.cc index 4109a979..62f8b09a 100644 --- a/mace/kernels/opencl/conv_2d_opencl_1x1.cc +++ b/mace/kernels/opencl/conv_2d_opencl_1x1.cc @@ -68,7 +68,6 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, auto runtime = OpenCLRuntime::Global(); *kernel = runtime->BuildKernel("conv_2d_1x1", kernel_name, built_options); - } if (!IsVecEqual(*prev_input_shape, input->shape())) { uint32_t idx = 0; diff --git a/mace/kernels/opencl/depthwise_conv_opencl.cc b/mace/kernels/opencl/depthwise_conv_opencl.cc index 37b587dc..ecb109d1 100644 --- a/mace/kernels/opencl/depthwise_conv_opencl.cc +++ b/mace/kernels/opencl/depthwise_conv_opencl.cc @@ -91,18 +91,18 @@ void DepthwiseConv2d(cl::Kernel *kernel, } kernel->setArg(idx++, *(output->opencl_image())); kernel->setArg(idx++, relux_max_limit); - kernel->setArg(idx++, static_cast(input_height)); - kernel->setArg(idx++, static_cast(input_width)); - kernel->setArg(idx++, static_cast(input_channel_blocks)); - kernel->setArg(idx++, static_cast(height)); - kernel->setArg(idx++, static_cast(width)); - kernel->setArg(idx++, static_cast(filter_height)); - kernel->setArg(idx++, static_cast(filter_width)); - kernel->setArg(idx++, static_cast(paddings[0] / 2)); - kernel->setArg(idx++, static_cast(paddings[1] / 2)); + kernel->setArg(idx++, static_cast(input_height)); + kernel->setArg(idx++, static_cast(input_width)); + kernel->setArg(idx++, static_cast(input_channel_blocks)); + kernel->setArg(idx++, static_cast(height)); + kernel->setArg(idx++, static_cast(width)); + kernel->setArg(idx++, static_cast(filter_height)); + kernel->setArg(idx++, static_cast(filter_width)); + kernel->setArg(idx++, static_cast(paddings[0] / 2)); + kernel->setArg(idx++, static_cast(paddings[1] / 2)); if (stride != 1 || dilations[0] != 1 || dilations[1] != 1) { - kernel->setArg(idx++, static_cast(dilations[0])); - kernel->setArg(idx++, static_cast(dilations[1])); + kernel->setArg(idx++, static_cast(dilations[0])); + kernel->setArg(idx++, static_cast(dilations[1])); } *prev_input_shape = input->shape(); } @@ -159,7 +159,8 @@ void DepthwiseConv2dFunctor::operator()( } std::vector output_image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, + &output_image_shape); output->ResizeImage(output_shape, output_image_shape); DepthwiseConv2d(&kernel_, input, filter, bias, strides_[0], paddings.data(), diff --git a/mace/kernels/opencl/eltwise_opencl.cc b/mace/kernels/opencl/eltwise_opencl.cc index dde05b29..548d907d 100644 --- a/mace/kernels/opencl/eltwise_opencl.cc +++ b/mace/kernels/opencl/eltwise_opencl.cc @@ -35,7 +35,6 @@ void EltwiseFunctor::operator()(const Tensor *input0, built_options.emplace(MakeString("-DELTWISE_TYPE=", type_)); if (!coeff_.empty()) built_options.emplace("-DCOEFF_SUM"); kernel_ = runtime->BuildKernel("eltwise", kernel_name, built_options); - } if (!IsVecEqual(input_shape_, input0->shape())) { uint32_t idx = 0; diff --git a/mace/kernels/opencl/fully_connected_opencl.cc b/mace/kernels/opencl/fully_connected_opencl.cc index d5db5190..772a6d8d 100644 --- a/mace/kernels/opencl/fully_connected_opencl.cc +++ b/mace/kernels/opencl/fully_connected_opencl.cc @@ -16,12 +16,14 @@ void FCWXKernel(cl::Kernel *kernel, std::vector *prev_input_shape, Tensor *output, const ActivationType activation, - std::vector &gws, - std::vector &lws, + std::vector *gws, + std::vector *lws, const float relux_max_limit, StatsFuture *future) { MACE_CHECK(input->dim(3) % 4 == 0) << "FC width kernel only support input with 4x channel."; + MACE_CHECK_NOTNULL(gws); + MACE_CHECK_NOTNULL(lws); auto runtime = OpenCLRuntime::Global(); if (kernel->get() == nullptr) { @@ -62,12 +64,11 @@ void FCWXKernel(cl::Kernel *kernel, const index_t output_blocks = RoundUpDiv4(output_size); const uint32_t wave_size = runtime->GetKernelWaveSize(*kernel); - gws = {4, (wave_size / 4), static_cast(batch * output_blocks)}; + *gws = {4, (wave_size / 4), static_cast(batch * output_blocks)}; const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(*kernel); - const uint32_t inter_local_blks = kwg_size / (gws[0] * gws[1]); - lws = {gws[0], gws[1], inter_local_blks}; - + const uint32_t inter_local_blks = kwg_size / ((*gws)[0] * (*gws)[1]); + *lws = {(*gws)[0], (*gws)[1], inter_local_blks}; } if (!IsVecEqual(*prev_input_shape, input->shape())) { const index_t batch = output->dim(0); @@ -80,21 +81,22 @@ void FCWXKernel(cl::Kernel *kernel, kernel->setArg(idx++, *(bias->opencl_image())); } kernel->setArg(idx++, *(output->opencl_image())); - kernel->setArg(idx++, (lws[0] * lws[1] * lws[2] * sizeof(float)), nullptr); + kernel->setArg(idx++, ((*lws)[0] * (*lws)[1] * (*lws)[2] * sizeof(float)), + nullptr); kernel->setArg(idx++, static_cast(input->dim(1))); kernel->setArg(idx++, static_cast(input->dim(2))); kernel->setArg(idx++, static_cast(RoundUpDiv4(input->dim(3)))); kernel->setArg(idx++, static_cast(output_blocks)); kernel->setArg(idx++, relux_max_limit); - gws[2] = static_cast(batch * output_blocks); + (*gws)[2] = static_cast(batch * output_blocks); *prev_input_shape = input->shape(); } cl::Event event; cl_int error = runtime->command_queue().enqueueNDRangeKernel( - *kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), - cl::NDRange(lws[0], lws[1], lws[2]), nullptr, &event); + *kernel, cl::NullRange, cl::NDRange((*gws)[0], (*gws)[1], (*gws)[2]), + cl::NDRange((*lws)[0], (*lws)[1], (*lws)[2]), nullptr, &event); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; if (future != nullptr) { @@ -105,7 +107,6 @@ void FCWXKernel(cl::Kernel *kernel, } }; } - } template @@ -116,10 +117,12 @@ void FCWTXKernel(cl::Kernel *kernel, std::vector *prev_input_shape, Tensor *output, const ActivationType activation, - std::vector &gws, - std::vector &lws, + std::vector *gws, + std::vector *lws, const float relux_max_limit, StatsFuture *future) { + MACE_CHECK_NOTNULL(gws); + MACE_CHECK_NOTNULL(lws); if (kernel->get() == nullptr) { auto runtime = OpenCLRuntime::Global(); std::set built_options; @@ -152,7 +155,7 @@ void FCWTXKernel(cl::Kernel *kernel, *kernel = runtime->BuildKernel("fully_connected", kernel_name, built_options); - lws = {16, 64, 1}; + *lws = {16, 64, 1}; } if (!IsVecEqual(*prev_input_shape, input->shape())) { uint32_t idx = 0; @@ -171,18 +174,16 @@ void FCWTXKernel(cl::Kernel *kernel, const index_t batch = output->dim(0); const index_t output_blocks = RoundUpDiv4(output->dim(3)); - gws = { + *gws = { static_cast(batch), static_cast(output_blocks), }; - *prev_input_shape = input->shape(); } std::stringstream ss; ss << "fc_opencl_kernel_" << output->dim(0) << "_" << output->dim(1) << "_" << output->dim(2) << "_" << output->dim(3); - TuningOrRun2DKernel(*kernel, ss.str(), gws.data(), lws, future); - + TuningOrRun2DKernel(*kernel, ss.str(), gws->data(), *lws, future); } template @@ -194,17 +195,18 @@ void FullyConnectedFunctor::operator()( StatsFuture *future) { std::vector output_shape = {input->dim(0), 1, 1, weight->dim(0)}; std::vector output_image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, + &output_image_shape); output->ResizeImage(output_shape, output_image_shape); if (weight_type_ == BufferType::WEIGHT_HEIGHT) { FCWTXKernel(&kernel_, input, weight, bias, &input_shape_, output, - activation_, gws_, lws_, relux_max_limit_, future); + activation_, &gws_, &lws_, relux_max_limit_, future); } else { FCWXKernel(&kernel_, input, weight, bias, &input_shape_, output, - activation_, gws_, lws_, relux_max_limit_, future); + activation_, &gws_, &lws_, relux_max_limit_, future); } -}; +} template struct FullyConnectedFunctor; diff --git a/mace/kernels/opencl/helper.cc b/mace/kernels/opencl/helper.cc index 3f419662..e3cadbc6 100644 --- a/mace/kernels/opencl/helper.cc +++ b/mace/kernels/opencl/helper.cc @@ -3,6 +3,11 @@ // #include "mace/kernels/opencl/helper.h" + +#include +#include +#include + #include "mace/utils/tuner.h" #include "mace/utils/utils.h" @@ -11,91 +16,92 @@ namespace kernels { // [(C + 3) / 4 * W, N * H] void CalInOutputImageShape(const std::vector &shape, /* NHWC */ - std::vector &image_shape) { + std::vector *image_shape) { MACE_CHECK(shape.size() == 4); - image_shape.resize(2); - image_shape[0] = RoundUpDiv4(shape[3]) * shape[2]; - image_shape[1] = shape[0] * shape[1]; + image_shape->resize(2); + (*image_shape)[0] = RoundUpDiv4(shape[3]) * shape[2]; + (*image_shape)[1] = shape[0] * shape[1]; } // [RoundUp<4>(Ic) * H * W, (Oc + 3) / 4] void CalConv2dFilterImageShape(const std::vector &shape, /* HWOI */ - std::vector &image_shape) { + std::vector *image_shape) { MACE_CHECK(shape.size() == 4); - image_shape.resize(2); - image_shape[0] = shape[0] * shape[1] * RoundUp(shape[3], 4); - image_shape[1] = RoundUpDiv4(shape[2]); + image_shape->resize(2); + (*image_shape)[0] = shape[0] * shape[1] * RoundUp(shape[3], 4); + (*image_shape)[1] = RoundUpDiv4(shape[2]); } // [H * W * M, (Ic + 3) / 4] void CalDepthwiseConv2dFilterImageShape( const std::vector &shape, /* HWIM */ - std::vector &image_shape) { + std::vector *image_shape) { MACE_CHECK(shape.size() == 4); - image_shape.resize(2); - image_shape[0] = shape[0] * shape[1] * shape[3]; - image_shape[1] = RoundUpDiv4(shape[2]); + image_shape->resize(2); + (*image_shape)[0] = shape[0] * shape[1] * shape[3]; + (*image_shape)[1] = RoundUpDiv4(shape[2]); } // [(size + 3) / 4, 1] void CalArgImageShape(const std::vector &shape, - std::vector &image_shape) { + std::vector *image_shape) { MACE_CHECK(shape.size() == 1); - image_shape.resize(2); - image_shape[0] = RoundUpDiv4(shape[0]); - image_shape[1] = 1; + image_shape->resize(2); + (*image_shape)[0] = RoundUpDiv4(shape[0]); + (*image_shape)[1] = 1; } // Only support 3x3 now // [ (Ic + 3) / 4, 16 * Oc] void CalWinogradFilterImageShape( const std::vector &shape, /* Oc, Ic, H, W*/ - std::vector &image_shape) { + std::vector *image_shape) { MACE_CHECK(shape.size() == 4); - image_shape.resize(2); - image_shape[0] = RoundUpDiv4(shape[1]); - image_shape[1] = (shape[0] << 4); + image_shape->resize(2); + (*image_shape)[0] = RoundUpDiv4(shape[1]); + (*image_shape)[1] = (shape[0] << 4); } // [W * C, N * RoundUp<4>(H)] void CalInOutHeightImageShape(const std::vector &shape, /* NHWC */ - std::vector &image_shape) { + std::vector *image_shape) { MACE_CHECK(shape.size() == 4); - image_shape.resize(2); - image_shape[0] = shape[2] * shape[3]; - image_shape[1] = shape[0] * RoundUpDiv4(shape[1]); + image_shape->resize(2); + (*image_shape)[0] = shape[2] * shape[3]; + (*image_shape)[1] = shape[0] * RoundUpDiv4(shape[1]); } // [RoundUp<4>(W) * C, N * H] void CalInOutWidthImageShape(const std::vector &shape, /* NHWC */ - std::vector &image_shape) { + std::vector *image_shape) { MACE_CHECK(shape.size() == 4); - image_shape.resize(2); - image_shape[0] = RoundUpDiv4(shape[2]) * shape[3]; - image_shape[1] = shape[0] * shape[1]; + image_shape->resize(2); + (*image_shape)[0] = RoundUpDiv4(shape[2]) * shape[3]; + (*image_shape)[1] = shape[0] * shape[1]; } // [W, (H + 3) / 4] void CalWeightHeightImageShape(const std::vector &shape, /* HW */ - std::vector &image_shape) { + std::vector *image_shape) { MACE_CHECK(shape.size() == 2); - image_shape.resize(2); - image_shape[0] = shape[1]; - image_shape[1] = RoundUpDiv4(shape[0]); + image_shape->resize(2); + (*image_shape)[0] = shape[1]; + (*image_shape)[1] = RoundUpDiv4(shape[0]); } // [(W + 3) / 4, H] void CalWeightWidthImageShape(const std::vector &shape, /* HW */ - std::vector &image_shape) { + std::vector *image_shape) { MACE_CHECK(shape.size() == 2); - image_shape.resize(2); - image_shape[0] = RoundUpDiv4(shape[1]); - image_shape[1] = shape[0]; + image_shape->resize(2); + (*image_shape)[0] = RoundUpDiv4(shape[1]); + (*image_shape)[1] = shape[0]; } void CalImage2DShape(const std::vector &shape, /* NHWC */ const BufferType type, - std::vector &image_shape) { + std::vector *image_shape) { + MACE_CHECK_NOTNULL(image_shape); switch (type) { case CONV2D_FILTER: CalConv2dFilterImageShape(shape, image_shape); @@ -188,7 +194,7 @@ std::string DtToUpstreamCLCMDDt(const DataType dt) { } } -void TuningOrRun3DKernel(cl::Kernel &kernel, +void TuningOrRun3DKernel(const cl::Kernel &kernel, const std::string tuning_key, const uint32_t *gws, const std::vector &lws, @@ -202,7 +208,7 @@ void TuningOrRun3DKernel(cl::Kernel &kernel, local_ws[2] = std::min(gws[2], kwg_size / (local_ws[0] * local_ws[1])); return { - // TODO tuning these magic numbers + // TODO(heliangliang): tuning these magic numbers {local_ws[0], local_ws[1], local_ws[2], 1}, {kwg_size / 16, 4, 4, 1}, {kwg_size / 32, 4, 8, 1}, @@ -291,7 +297,7 @@ void TuningOrRun3DKernel(cl::Kernel &kernel, } } -void TuningOrRun2DKernel(cl::Kernel &kernel, +void TuningOrRun2DKernel(const cl::Kernel &kernel, const std::string tuning_key, const uint32_t *gws, const std::vector &lws, diff --git a/mace/kernels/opencl/helper.h b/mace/kernels/opencl/helper.h index 56bf295e..89712c9b 100644 --- a/mace/kernels/opencl/helper.h +++ b/mace/kernels/opencl/helper.h @@ -5,6 +5,9 @@ #ifndef MACE_KERNELS_OPENCL_HELPER_H_ #define MACE_KERNELS_OPENCL_HELPER_H_ +#include +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/runtime/opencl/opencl_runtime.h" @@ -30,7 +33,7 @@ enum BufferType { void CalImage2DShape(const std::vector &shape, /* NHWC */ const BufferType type, - std::vector &image_shape); + std::vector *image_shape); std::vector CalWinogradShape(const std::vector &shape, const BufferType type); @@ -43,13 +46,13 @@ std::string DtToCLDt(const DataType dt); std::string DtToUpstreamCLDt(const DataType dt); -void TuningOrRun3DKernel(cl::Kernel &kernel, +void TuningOrRun3DKernel(const cl::Kernel &kernel, const std::string tuning_key, const uint32_t *gws, const std::vector &lws, StatsFuture *future); -void TuningOrRun2DKernel(cl::Kernel &kernel, +void TuningOrRun2DKernel(const cl::Kernel &kernel, const std::string tuning_key, const uint32_t *gws, const std::vector &lws, @@ -78,7 +81,6 @@ bool IsVecEqual(const std::vector &input0, (std::equal(input0.begin(), input0.end(), input1.begin()))); } -namespace { template void AppendToStream(std::stringstream *ss, const std::string &delimiter, T v) { (*ss) << v; @@ -92,7 +94,6 @@ void AppendToStream(std::stringstream *ss, (*ss) << first << delimiter; AppendToStream(ss, delimiter, args...); } -} // namespace template std::string Concat(Args... args) { diff --git a/mace/kernels/opencl/matmul.cc b/mace/kernels/opencl/matmul.cc index 4b61edb2..c5bd2b0b 100644 --- a/mace/kernels/opencl/matmul.cc +++ b/mace/kernels/opencl/matmul.cc @@ -17,7 +17,7 @@ void MatMulFunctor::operator()(const Tensor *A, StatsFuture *future) { std::vector c_shape = {A->dim(0), A->dim(1), B->dim(2), 1}; std::vector c_image_shape; - CalImage2DShape(c_shape, BufferType::IN_OUT_HEIGHT, c_image_shape); + CalImage2DShape(c_shape, BufferType::IN_OUT_HEIGHT, &c_image_shape); C->ResizeImage(c_shape, c_image_shape); const index_t batch = C->dim(0); @@ -56,7 +56,7 @@ void MatMulFunctor::operator()(const Tensor *A, ss << "matmul_opencl_kernel_" << C->dim(0) << "_" << C->dim(1) << "_" << C->dim(2) << "_" << C->dim(3); TuningOrRun2DKernel(kernel_, ss.str(), gws, lws, future); -}; +} template struct MatMulFunctor; diff --git a/mace/kernels/opencl/pooling_opencl.cc b/mace/kernels/opencl/pooling_opencl.cc index d8a6d675..5b52a093 100644 --- a/mace/kernels/opencl/pooling_opencl.cc +++ b/mace/kernels/opencl/pooling_opencl.cc @@ -36,12 +36,11 @@ void PoolingFunctor::operator()(const Tensor *input, built_options.emplace("-DPOOL_AVG"); } kernel_ = runtime->BuildKernel("pooling", kernel_name, built_options); - } if (!IsVecEqual(input_shape_, input->shape())) { std::vector output_shape(4); - std::vector filter_shape = {kernels_[0], kernels_[1], input->dim(3), - input->dim(3)}; + std::vector filter_shape = {kernels_[0], kernels_[1], + input->dim(3), input->dim(3)}; std::vector paddings(2); if (paddings_.empty()) { @@ -50,12 +49,14 @@ void PoolingFunctor::operator()(const Tensor *input, padding_type_, output_shape.data(), paddings.data()); } else { paddings = paddings_; - CalcOutputSize(input->shape().data(), filter_shape.data(), paddings_.data(), - dilations_, strides_, RoundType::CEIL, output_shape.data()); + CalcOutputSize(input->shape().data(), filter_shape.data(), + paddings_.data(), dilations_, strides_, RoundType::CEIL, + output_shape.data()); } std::vector output_image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, + &output_image_shape); output->ResizeImage(output_shape, output_image_shape); uint32_t idx = 0; diff --git a/mace/kernels/opencl/resize_bilinear_opencl.cc b/mace/kernels/opencl/resize_bilinear_opencl.cc index a3bb2ee1..37370916 100644 --- a/mace/kernels/opencl/resize_bilinear_opencl.cc +++ b/mace/kernels/opencl/resize_bilinear_opencl.cc @@ -34,7 +34,6 @@ void ResizeBilinearFunctor::operator()( built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); kernel_ = runtime->BuildKernel("resize_bilinear", kernel_name, built_options); - } if (!IsVecEqual(input_shape_, input->shape())) { MACE_CHECK(out_height > 0 && out_width > 0); @@ -42,7 +41,7 @@ void ResizeBilinearFunctor::operator()( std::vector output_image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, - output_image_shape); + &output_image_shape); output->ResizeImage(output_shape, output_image_shape); float height_scale = @@ -60,7 +59,6 @@ void ResizeBilinearFunctor::operator()( kernel_.setArg(idx++, static_cast(out_height)); input_shape_ = input->shape(); - } const uint32_t gws[3] = {static_cast(channel_blocks), diff --git a/mace/kernels/opencl/slice.cc b/mace/kernels/opencl/slice.cc index 63efc555..6bc9ae3b 100644 --- a/mace/kernels/opencl/slice.cc +++ b/mace/kernels/opencl/slice.cc @@ -24,7 +24,7 @@ void SliceFunctor::operator()( input->dim(2), output_channels}); std::vector image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &image_shape); for (size_t i= 0; i < outputs_count; ++i) { output_list[i]->ResizeImage(output_shape, image_shape); } diff --git a/mace/kernels/opencl/softmax_opencl.cc b/mace/kernels/opencl/softmax_opencl.cc index 4aabe901..077db9dd 100644 --- a/mace/kernels/opencl/softmax_opencl.cc +++ b/mace/kernels/opencl/softmax_opencl.cc @@ -33,7 +33,6 @@ void SoftmaxFunctor::operator()(const Tensor *logits, built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); kernel_ = runtime->BuildKernel("softmax", kernel_name, built_options); - } if (!IsVecEqual(input_shape_, logits->shape())) { uint32_t idx = 0; diff --git a/mace/kernels/opencl/space_to_batch_opencl.cc b/mace/kernels/opencl/space_to_batch_opencl.cc index 91f5564d..fe911fbd 100644 --- a/mace/kernels/opencl/space_to_batch_opencl.cc +++ b/mace/kernels/opencl/space_to_batch_opencl.cc @@ -22,7 +22,8 @@ void SpaceToBatchFunctor::operator()( StatsFuture *future) { const char *kernel_name = nullptr; std::vector output_image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, + &output_image_shape); if (b2s_) { space_tensor->ResizeImage(output_shape, output_image_shape); kernel_name = "batch_to_space"; @@ -42,7 +43,6 @@ void SpaceToBatchFunctor::operator()( DtToCLCMDDt(DataTypeToEnum::value)); kernel_ = runtime->BuildKernel("space_to_batch", kernel_name, built_options); - } if (!IsVecEqual(space_shape_, space_tensor->shape())) { uint32_t idx = 0; diff --git a/mace/kernels/opencl/winograd_transform.cc b/mace/kernels/opencl/winograd_transform.cc index c07ccc99..3b866408 100644 --- a/mace/kernels/opencl/winograd_transform.cc +++ b/mace/kernels/opencl/winograd_transform.cc @@ -27,7 +27,6 @@ void WinogradTransformFunctor::operator()( auto runtime = OpenCLRuntime::Global(); kernel_ = runtime->BuildKernel("winograd_transform", obfuscated_kernel_name, built_options); - } std::vector output_shape(4); std::vector filter_shape = {3, 3, input_tensor->dim(3), 1}; @@ -49,7 +48,7 @@ void WinogradTransformFunctor::operator()( if (!IsVecEqual(input_shape_, input_tensor->shape())) { output_shape = {16, input_tensor->dim(3), out_width, 1}; std::vector image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_HEIGHT, image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_HEIGHT, &image_shape); output_tensor->ResizeImage(output_shape, image_shape); uint32_t idx = 0; @@ -83,7 +82,6 @@ void WinogradInverseTransformFunctor::operator()( const Tensor *bias, Tensor *output_tensor, StatsFuture *future) { - if (kernel_.get() == nullptr) { std::string obfuscated_kernel_name = MACE_OBFUSCATE_SYMBOL("winograd_inverse_transform_2x2"); @@ -125,7 +123,7 @@ void WinogradInverseTransformFunctor::operator()( std::vector output_shape = {batch_, height_, width_, input_tensor->dim(1)}; std::vector image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &image_shape); output_tensor->ResizeImage(output_shape, image_shape); const uint32_t round_h = (height_ + 1) / 2; diff --git a/mace/kernels/pooling.h b/mace/kernels/pooling.h index bc9892e5..15cc691e 100644 --- a/mace/kernels/pooling.h +++ b/mace/kernels/pooling.h @@ -2,10 +2,13 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#ifndef MACE_KERNELS_POOLING_H -#define MACE_KERNELS_POOLING_H +#ifndef MACE_KERNELS_POOLING_H_ +#define MACE_KERNELS_POOLING_H_ +#include #include +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" @@ -188,4 +191,4 @@ struct PoolingFunctor : PoolingFunctorBase { } // namespace kernels } // namespace mace -#endif // MACE_KERNELS_POOLING_H +#endif // MACE_KERNELS_POOLING_H_ diff --git a/mace/kernels/reshape.h b/mace/kernels/reshape.h index 544ba360..14e56078 100644 --- a/mace/kernels/reshape.h +++ b/mace/kernels/reshape.h @@ -4,6 +4,8 @@ #ifndef MACE_KERNELS_RESHAPE_H_ #define MACE_KERNELS_RESHAPE_H_ +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" @@ -20,7 +22,7 @@ struct ReshapeFunctor { Tensor *output, StatsFuture *future) { output->Resize(out_shape); - // TODO copy on write to avoid this copy. + // TODO(liuqi): copy on write to avoid this copy. output->CopyBytes(input->raw_data(), input->size() * sizeof(T)); } }; diff --git a/mace/kernels/resize_bilinear.h b/mace/kernels/resize_bilinear.h index 52c1da10..65e51212 100644 --- a/mace/kernels/resize_bilinear.h +++ b/mace/kernels/resize_bilinear.h @@ -4,6 +4,9 @@ #ifndef MACE_KERNELS_RESIZE_BILINEAR_H_ #define MACE_KERNELS_RESIZE_BILINEAR_H_ +#include +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" @@ -11,7 +14,6 @@ namespace mace { namespace kernels { -namespace { struct CachedInterpolation { index_t lower; // Lower source index used in the interpolation index_t upper; // Upper source index used in the interpolation @@ -101,7 +103,6 @@ void ResizeImage(const T *images, } } } -} struct ResizeBilinearFunctorBase { ResizeBilinearFunctorBase(const std::vector &size, diff --git a/mace/kernels/slice.h b/mace/kernels/slice.h index b08ea7ef..59d9d667 100644 --- a/mace/kernels/slice.h +++ b/mace/kernels/slice.h @@ -5,6 +5,8 @@ #ifndef MACE_KERNELS_SLICE_H_ #define MACE_KERNELS_SLICE_H_ +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" @@ -16,7 +18,6 @@ namespace kernels { template struct SliceFunctor { - void operator()(const Tensor *input, const std::vector &output_list, StatsFuture *future) { @@ -56,15 +57,13 @@ struct SliceFunctor { template struct SliceFunctor { - void operator()(const Tensor *input, const std::vector &output_list, StatsFuture *future); cl::Kernel kernel_; - }; -} // namepsace kernels +} // namespace kernels } // namespace mace #endif // MACE_KERNELS_SLICE_H_ diff --git a/mace/kernels/softmax.h b/mace/kernels/softmax.h index d5bc5717..a1c4ea2f 100644 --- a/mace/kernels/softmax.h +++ b/mace/kernels/softmax.h @@ -5,6 +5,10 @@ #ifndef MACE_KERNELS_SOFTMAX_H_ #define MACE_KERNELS_SOFTMAX_H_ +#include +#include +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" @@ -38,7 +42,7 @@ struct SoftmaxFunctor { for (index_t c = 1; c < num_classes; ++c) { max_value = std::max(max_value, logits_ptr[pos + c]); } - // TODO: check overflow? + // TODO(liuqi): check overflow? T sum = 0; for (index_t c = 0; c < num_classes; ++c) { exp_data[c] = ::exp((logits_ptr[pos + c] - max_value)); @@ -60,7 +64,7 @@ struct SoftmaxFunctor { std::vector input_shape_; }; -} // namepsace kernels +} // namespace kernels } // namespace mace #endif // MACE_KERNELS_SOFTMAX_H_ diff --git a/mace/kernels/space_to_batch.h b/mace/kernels/space_to_batch.h index ef7467b5..757f7848 100644 --- a/mace/kernels/space_to_batch.h +++ b/mace/kernels/space_to_batch.h @@ -2,8 +2,10 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#ifndef MACE_KERNELS_CONV_2D_H_ -#define MACE_KERNELS_CONV_2D_H_ +#ifndef MACE_KERNELS_SPACE_TO_BATCH_H_ +#define MACE_KERNELS_SPACE_TO_BATCH_H_ + +#include #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" @@ -60,4 +62,4 @@ struct SpaceToBatchFunctor : SpaceToBatchFunctorBase { } // namespace kernels } // namespace mace -#endif // MACE_KERNELS_CONV_2D_H_ +#endif // MACE_KERNELS_SPACE_TO_BATCH_H_ diff --git a/mace/kernels/winograd_transform.h b/mace/kernels/winograd_transform.h index f3b7f7d6..6f483dac 100644 --- a/mace/kernels/winograd_transform.h +++ b/mace/kernels/winograd_transform.h @@ -5,6 +5,8 @@ #ifndef MACE_KERNELS_WINOGRAD_TRANSFORM_H_ #define MACE_KERNELS_WINOGRAD_TRANSFORM_H_ +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" -- GitLab