diff --git a/mace/kernels/activation.h b/mace/kernels/activation.h index 1e3601a4a5f2f38dbf0bfa6d5acc8dbd21c2fa4d..55368c3ca83c8aa7dd9e8d76efb47bde568ec4ce 100644 --- a/mace/kernels/activation.h +++ b/mace/kernels/activation.h @@ -5,6 +5,10 @@ #ifndef MACE_KERNELS_ACTIVATION_H_ #define MACE_KERNELS_ACTIVATION_H_ +#include +#include +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" diff --git a/mace/kernels/addn.h b/mace/kernels/addn.h index 3a5a45df4dd3476e4ab7a2f58bae658b461e206a..70d9583ba798babd3a27737c9ed7487913441bf6 100644 --- a/mace/kernels/addn.h +++ b/mace/kernels/addn.h @@ -8,6 +8,7 @@ #if defined(MACE_ENABLE_NEON) && defined(__aarch64__) #include #endif +#include #include #include "mace/core/future.h" @@ -17,9 +18,7 @@ namespace mace { namespace kernels { -namespace { constexpr int kCostPerGroup = 1024; -} // namespace template struct AddNFunctor { diff --git a/mace/kernels/batch_norm.h b/mace/kernels/batch_norm.h index 57f0f4d66a0107ef6e907e4c5579bed0feef2be3..28b8d776c967e48a4af835ee55913c437aa3d3ea 100644 --- a/mace/kernels/batch_norm.h +++ b/mace/kernels/batch_norm.h @@ -8,6 +8,7 @@ #if defined(MACE_ENABLE_NEON) && defined(__aarch64__) #include #endif +#include #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" @@ -159,7 +160,7 @@ struct BatchNormFunctor : BatchNormFunctorBase { std::vector input_shape_; }; -} // namepsace kernels +} // namespace kernels } // namespace mace #endif // MACE_KERNELS_BATCH_NORM_H_ diff --git a/mace/kernels/bias_add.h b/mace/kernels/bias_add.h index d8e411efa8a70ebd2ca850f6ac91fa1bd2198fe6..d5372850bcf604b0f1e01e630c0c30b59e95abc0 100644 --- a/mace/kernels/bias_add.h +++ b/mace/kernels/bias_add.h @@ -5,6 +5,8 @@ #ifndef MACE_KERNELS_BIAS_ADD_H_ #define MACE_KERNELS_BIAS_ADD_H_ +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" @@ -65,7 +67,7 @@ struct BiasAddFunctor { std::vector input_shape_; }; -} // namepsace kernels +} // namespace kernels } // namespace mace #endif // MACE_KERNELS_BIAS_ADD_H_ diff --git a/mace/kernels/buffer_to_image.h b/mace/kernels/buffer_to_image.h index 3292e993ab107dad1cb0ce5a66632d21370c7302..2956762d5d70fb089e8e2bee34f114693fb1cc12 100644 --- a/mace/kernels/buffer_to_image.h +++ b/mace/kernels/buffer_to_image.h @@ -13,13 +13,14 @@ namespace mace { namespace kernels { struct BufferToImageFunctorBase { - BufferToImageFunctorBase(bool i2b) : i2b_(i2b) {} + explicit BufferToImageFunctorBase(bool i2b) : i2b_(i2b) {} bool i2b_; }; template struct BufferToImageFunctor : BufferToImageFunctorBase { - BufferToImageFunctor(bool i2b = false) : BufferToImageFunctorBase(i2b) {} + explicit BufferToImageFunctor(bool i2b = false) + : BufferToImageFunctorBase(i2b) {} void operator()(Tensor *input, const BufferType type, Tensor *output, @@ -30,14 +31,15 @@ struct BufferToImageFunctor : BufferToImageFunctorBase { template struct BufferToImageFunctor : BufferToImageFunctorBase { - BufferToImageFunctor(bool i2b = false) : BufferToImageFunctorBase(i2b) {} + explicit BufferToImageFunctor(bool i2b = false) + : BufferToImageFunctorBase(i2b) {} void operator()(Tensor *input, const BufferType type, Tensor *output, StatsFuture *future); }; -} // namepsace kernels +} // namespace kernels } // namespace mace #endif // MACE_KERNELS_BUFFER_TO_IMAGE_H_ diff --git a/mace/kernels/channel_shuffle.h b/mace/kernels/channel_shuffle.h index da2ce094a141984c49ad21a208bdaafb8a97311e..f1e258337a2d9a871bbb3ac4aec70faf1a18edf9 100644 --- a/mace/kernels/channel_shuffle.h +++ b/mace/kernels/channel_shuffle.h @@ -5,6 +5,8 @@ #ifndef MACE_KERNELS_CHANNEL_SHUFFLE_H_ #define MACE_KERNELS_CHANNEL_SHUFFLE_H_ +#include + #include "mace/core/future.h" #include "mace/core/tensor.h" @@ -13,7 +15,7 @@ namespace kernels { template struct ChannelShuffleFunctor { - ChannelShuffleFunctor(const int groups) : groups_(groups) {} + explicit ChannelShuffleFunctor(const int groups) : groups_(groups) {} void operator()(const Tensor *input, Tensor *output, @@ -49,7 +51,7 @@ struct ChannelShuffleFunctor { template struct ChannelShuffleFunctor { - ChannelShuffleFunctor(const int groups) : groups_(groups) {} + explicit ChannelShuffleFunctor(const int groups) : groups_(groups) {} void operator()(const Tensor *input, Tensor *output, StatsFuture *future); diff --git a/mace/kernels/concat.h b/mace/kernels/concat.h index 6870594641baaab5aae866c033107a7b6df0507c..de34ed69fa5803f61e9f6785b9d4b7185be2cccc 100644 --- a/mace/kernels/concat.h +++ b/mace/kernels/concat.h @@ -5,6 +5,8 @@ #ifndef MACE_KERNELS_CONCAT_H_ #define MACE_KERNELS_CONCAT_H_ +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" @@ -15,14 +17,14 @@ namespace mace { namespace kernels { struct ConcatFunctorBase { - ConcatFunctorBase(const int32_t axis) : axis_(axis) {} + explicit ConcatFunctorBase(const int32_t axis) : axis_(axis) {} int32_t axis_; }; template struct ConcatFunctor : ConcatFunctorBase { - ConcatFunctor(const int32_t axis) : ConcatFunctorBase(axis) {} + explicit ConcatFunctor(const int32_t axis) : ConcatFunctorBase(axis) {} void operator()(const std::vector &input_list, Tensor *output, @@ -77,7 +79,7 @@ struct ConcatFunctor : ConcatFunctorBase { template struct ConcatFunctor : ConcatFunctorBase { - ConcatFunctor(const int32_t axis) : ConcatFunctorBase(axis) {} + explicit ConcatFunctor(const int32_t axis) : ConcatFunctorBase(axis) {} void operator()(const std::vector &input_list, Tensor *output, @@ -86,7 +88,7 @@ struct ConcatFunctor : ConcatFunctorBase { std::vector input_shape_; }; -} // namepsace kernels +} // namespace kernels } // namespace mace #endif // MACE_KERNELS_CONCAT_H_ diff --git a/mace/kernels/conv_2d.h b/mace/kernels/conv_2d.h index b107d33229c0b77be24e0702db9cf0585801b06f..47516291d14ec21ba2202e2089bee03d6387c433 100644 --- a/mace/kernels/conv_2d.h +++ b/mace/kernels/conv_2d.h @@ -8,6 +8,8 @@ #if defined(MACE_ENABLE_NEON) && defined(__aarch64__) #include #endif +#include +#include #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" @@ -18,7 +20,6 @@ namespace mace { namespace kernels { -namespace { template mutable_data(); constexpr int inc_tile_size = 4; -// TODO Auto tuning these parameters +// TODO(heliangliang) Auto tuning these parameters #if defined(MACE_ENABLE_NEON) && defined(__aarch64__) const int c_tile_size = 4; const int h_tile_size = 2; diff --git a/mace/kernels/conv_pool_2d_util.cc b/mace/kernels/conv_pool_2d_util.cc index b1a83782a8ab5dcd96c834fdcff937ecea55d844..9bbbdcf1d96852744de1e073e67e9b4a15dc2c1f 100644 --- a/mace/kernels/conv_pool_2d_util.cc +++ b/mace/kernels/conv_pool_2d_util.cc @@ -4,6 +4,8 @@ #include "mace/kernels/conv_pool_2d_util.h" +#include + namespace mace { namespace kernels { @@ -56,7 +58,7 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW } // Note: TensorFlow may padded one more on the right/bottom side - // TODO may be it's better to also truncate the left/top to + // TODO(liuqi): may be it's better to also truncate the left/top to // utilize the more centered features. We need to benchmark // based on the model accuracy. @@ -120,7 +122,7 @@ void CalcNHWCPaddingAndOutputSize(const index_t *input_shape, // NHWC } // Note: TensorFlow may padded one more on the right/bottom side - // TODO may be it's better to also truncate the left/top to + // TODO(liuqi): may be it's better to also truncate the left/top to // utilize the more centered features. We need to benchmark // based on the model accuracy. @@ -219,7 +221,7 @@ void CalPaddingSize(const index_t *input_shape, // NCHW } // Note: TensorFlow may padded one more on the right/bottom side - // TODO may be it's better to also truncate the left/top to + // TODO(liuqi): may be it's better to also truncate the left/top to // utilize the more centered features. We need to benchmark // based on the model accuracy. padding_size[0] = std::max( diff --git a/mace/kernels/depthwise_conv2d.h b/mace/kernels/depthwise_conv2d.h index dc6b737077ab16b093d3993c5f414430fa17d186..166ea18a644ead1d53af2a7c3b83c73c617554d6 100644 --- a/mace/kernels/depthwise_conv2d.h +++ b/mace/kernels/depthwise_conv2d.h @@ -8,6 +8,8 @@ #if defined(MACE_ENABLE_NEON) && defined(__aarch64__) #include #endif +#include +#include #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" @@ -17,8 +19,6 @@ namespace mace { namespace kernels { -namespace { - template void DepthwiseConv2dKernel(const T *input_ptr, const T *filter_ptr, @@ -233,8 +233,6 @@ void DepthwiseConv2dNoOOBCheckKernel(const T *input_ptr, } } -} // namespace - struct DepthwiseConv2dFunctorBase { DepthwiseConv2dFunctorBase(const int *strides, const Padding padding_type, diff --git a/mace/kernels/eltwise.h b/mace/kernels/eltwise.h index 1aa883d568ff493fa092e23637b2b6accf1d8a38..0f9e9b40061890a62e36104746bcaf0120bfab0f 100644 --- a/mace/kernels/eltwise.h +++ b/mace/kernels/eltwise.h @@ -4,6 +4,9 @@ #ifndef MACE_KERNELS_ELTWISE_H_ #define MACE_KERNELS_ELTWISE_H_ +#include +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" diff --git a/mace/kernels/fully_connected.h b/mace/kernels/fully_connected.h index 5c527d4593e02bead0e55998674690a4c5864e50..4ab385291da1854808f73cd0bdd926c7cc17c616 100644 --- a/mace/kernels/fully_connected.h +++ b/mace/kernels/fully_connected.h @@ -5,6 +5,8 @@ #ifndef MACE_KERNELS_FULLY_CONNECTED_H_ #define MACE_KERNELS_FULLY_CONNECTED_H_ +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" diff --git a/mace/kernels/matmul.h b/mace/kernels/matmul.h index 88452bfe83dde8c0d05e1ff61a55410863c1b31a..62590400bf038773c9f16fae68f4c42de4ee9130 100644 --- a/mace/kernels/matmul.h +++ b/mace/kernels/matmul.h @@ -21,7 +21,6 @@ namespace mace { namespace kernels { -namespace { template(a_ptr_batch_base, \ @@ -118,7 +116,6 @@ switch (k_count) { \ LOG(FATAL) << "Unsupported k tile: " << k_count; \ } - #define MACE_CASE_W_MATMUL(HC) \ switch (w_count) { \ case 1: \ diff --git a/mace/kernels/neon/batch_norm_neon.cc b/mace/kernels/neon/batch_norm_neon.cc index 19094ef7a15496d3ff65dffcd57d5927b20338e1..930a0c5d5692a120c71bfd962c569443aa90d805 100644 --- a/mace/kernels/neon/batch_norm_neon.cc +++ b/mace/kernels/neon/batch_norm_neon.cc @@ -78,7 +78,7 @@ void BatchNormFunctor::operator()( } } } -}; +} } // namespace kernels } // namespace mace diff --git a/mace/kernels/neon/conv_2d_neon_1x1.cc b/mace/kernels/neon/conv_2d_neon_1x1.cc index c098587c94610d2f38f44cbbebd7fc01da91bfc7..14c20cc387c03b41f2b190e038a693f9236514ab 100644 --- a/mace/kernels/neon/conv_2d_neon_1x1.cc +++ b/mace/kernels/neon/conv_2d_neon_1x1.cc @@ -296,7 +296,7 @@ void Conv2dNeonK1x1S1(const float *input, // NCHW } } } -}; +} void Conv2dNeonPixelK1x1S1( const float *input, // NCHW @@ -321,7 +321,7 @@ void Conv2dNeonPixelK1x1S1( const index_t total_pixels = height * width; // Process 4 * 2 = 8 pixels for each innermost loop - // TODO Does 64 bit v.s. 32 bit index matters? need benchmark + // TODO(heliangliang): Does 64 bit v.s. 32 bit index matters? need benchmark const index_t total_loops = total_pixels >> 3; const index_t loop_remaining = total_pixels & 7; @@ -329,7 +329,7 @@ void Conv2dNeonPixelK1x1S1( for (index_t n = 0; n < batch; ++n) { for (index_t c = 0; c < channels; ++c) { const float *filter_ptr = filter + c * input_channels; - // TODO Will GCC opt these out? + // TODO(heliangliang): Will GCC opt these out? float *channel_output_start = output + n * channels * height * width + c * height * width; const float *input_ptr = @@ -469,7 +469,7 @@ void Conv2dNeonPixelK1x1S1( } } } -}; +} } // namespace kernels } // namespace mace diff --git a/mace/kernels/opencl/addn.cc b/mace/kernels/opencl/addn.cc index 9f9571d0637026330e1d2f5ad2dea31f116eeefc..e7869bb2fba3959c0fc810cbeb81f44f8f6ab00b 100644 --- a/mace/kernels/opencl/addn.cc +++ b/mace/kernels/opencl/addn.cc @@ -45,7 +45,6 @@ void AddNFunctor::operator()( built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); built_options.emplace(MakeString("-DINPUT_NUM=", input_tensors.size())); kernel_ = runtime->BuildKernel("addn", kernel_name, built_options); - } std::vector output_shape = input_tensors[0]->shape(); @@ -56,7 +55,8 @@ void AddNFunctor::operator()( if (!IsVecEqual(input_shape_, input_tensors[0]->shape())) { std::vector output_image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, + &output_image_shape); output_tensor->ResizeImage(output_shape, output_image_shape); uint32_t idx = 0; @@ -75,7 +75,7 @@ void AddNFunctor::operator()( ss << "addn_opencl_kernel_" << output_shape[0] << "_" << output_shape[1] << "_" << output_shape[2] << "_" << output_shape[3]; TuningOrRun2DKernel(kernel_, ss.str(), gws, lws, future); -}; +} template struct AddNFunctor; diff --git a/mace/kernels/opencl/bias_add_opencl.cc b/mace/kernels/opencl/bias_add_opencl.cc index d2490000b71a034a8cbe19f9ada9e5f5e1ed08fa..3d4c4ec5c7a64406ead61439a52d155689236240 100644 --- a/mace/kernels/opencl/bias_add_opencl.cc +++ b/mace/kernels/opencl/bias_add_opencl.cc @@ -32,7 +32,6 @@ void BiasAddFunctor::operator()(const Tensor *input, built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); kernel_ = runtime->BuildKernel("bias_add", kernel_name, built_options); - } if (!IsVecEqual(input_shape_, input->shape())) { uint32_t idx = 0; diff --git a/mace/kernels/opencl/buffer_to_image.cc b/mace/kernels/opencl/buffer_to_image.cc index b0fa30a5cf146fd0da2ccd0ea9bc9ea419349f32..126fda7773f3613161d186a445f94b467ddf120c 100644 --- a/mace/kernels/opencl/buffer_to_image.cc +++ b/mace/kernels/opencl/buffer_to_image.cc @@ -14,7 +14,7 @@ void BufferToImageFunctor::operator()( Tensor *buffer, const BufferType type, Tensor *image, StatsFuture *future) { std::vector image_shape; if (!i2b_) { - CalImage2DShape(buffer->shape(), type, image_shape); + CalImage2DShape(buffer->shape(), type, &image_shape); if (type == WINOGRAD_FILTER) { std::vector new_shape = CalWinogradShape(buffer->shape(), type); image->ResizeImage(new_shape, image_shape); diff --git a/mace/kernels/opencl/channel_shuffle.cc b/mace/kernels/opencl/channel_shuffle.cc index a88b3b059cfacd31249f09dffd72f8ddee230c00..78d855e2088c292cc15468c00a6730870a69f740 100644 --- a/mace/kernels/opencl/channel_shuffle.cc +++ b/mace/kernels/opencl/channel_shuffle.cc @@ -39,7 +39,8 @@ void ChannelShuffleFunctor::operator()( auto dt = DataTypeToEnum::value; built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); - kernel_ = runtime->BuildKernel("channel_shuffle", kernel_name, built_options); + kernel_ = runtime->BuildKernel("channel_shuffle", kernel_name, + built_options); } if (!IsVecEqual(input_shape_, input->shape())) { uint32_t idx = 0; @@ -61,7 +62,6 @@ void ChannelShuffleFunctor::operator()( << output->dim(2) << "_" << output->dim(3); TuningOrRun3DKernel(kernel_, ss.str(), gws, lws, future); - } template diff --git a/mace/kernels/opencl/concat.cc b/mace/kernels/opencl/concat.cc index e99ab0605d02714b6851cb3cb8cf96f865ae5e1c..da8671db72ec89ebdc93ae43f64049ea0bcd41ee 100644 --- a/mace/kernels/opencl/concat.cc +++ b/mace/kernels/opencl/concat.cc @@ -41,7 +41,6 @@ static void Concat2(cl::Kernel *kernel, built_options.emplace("-DDIVISIBLE_FOUR"); } *kernel = runtime->BuildKernel("concat", kernel_name, built_options); - } if (!IsVecEqual(*prev_input_shape, input0->shape())) { uint32_t idx = 0; @@ -140,7 +139,7 @@ void ConcatFunctor::operator()( inputs_count == 2 || divisible_four, "Dimensions of inputs should be divisible by 4 when inputs_count > 2."); std::vector image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &image_shape); output->ResizeImage(output_shape, image_shape); switch (inputs_count) { @@ -155,7 +154,7 @@ void ConcatFunctor::operator()( MACE_NOT_IMPLEMENTED; } } -}; +} template struct ConcatFunctor; template struct ConcatFunctor; diff --git a/mace/kernels/opencl/conv_2d_opencl.cc b/mace/kernels/opencl/conv_2d_opencl.cc index 46683fd1709eda83be6826d4e2519d28bf4956b1..468d80f09c60bd9584225d2c263766cef6c790e5 100644 --- a/mace/kernels/opencl/conv_2d_opencl.cc +++ b/mace/kernels/opencl/conv_2d_opencl.cc @@ -92,7 +92,8 @@ void Conv2dFunctor::operator()(const Tensor *input, } std::vector output_image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, + &output_image_shape); output->ResizeImage(output_shape, output_image_shape); if (kernel_h == kernel_w && kernel_h <= 5 && diff --git a/mace/kernels/opencl/conv_2d_opencl_1x1.cc b/mace/kernels/opencl/conv_2d_opencl_1x1.cc index 4109a97932163919e436a1847549c44ef8d60e31..62f8b09acc3458784cb3506f31dbbbdad51ef7ae 100644 --- a/mace/kernels/opencl/conv_2d_opencl_1x1.cc +++ b/mace/kernels/opencl/conv_2d_opencl_1x1.cc @@ -68,7 +68,6 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, auto runtime = OpenCLRuntime::Global(); *kernel = runtime->BuildKernel("conv_2d_1x1", kernel_name, built_options); - } if (!IsVecEqual(*prev_input_shape, input->shape())) { uint32_t idx = 0; diff --git a/mace/kernels/opencl/depthwise_conv_opencl.cc b/mace/kernels/opencl/depthwise_conv_opencl.cc index 37b587dcef2caddae7ae5d73254a8c87dbf9f5a1..ecb109d1fbc456f8e9cefebcc6d29c35604770c1 100644 --- a/mace/kernels/opencl/depthwise_conv_opencl.cc +++ b/mace/kernels/opencl/depthwise_conv_opencl.cc @@ -91,18 +91,18 @@ void DepthwiseConv2d(cl::Kernel *kernel, } kernel->setArg(idx++, *(output->opencl_image())); kernel->setArg(idx++, relux_max_limit); - kernel->setArg(idx++, static_cast(input_height)); - kernel->setArg(idx++, static_cast(input_width)); - kernel->setArg(idx++, static_cast(input_channel_blocks)); - kernel->setArg(idx++, static_cast(height)); - kernel->setArg(idx++, static_cast(width)); - kernel->setArg(idx++, static_cast(filter_height)); - kernel->setArg(idx++, static_cast(filter_width)); - kernel->setArg(idx++, static_cast(paddings[0] / 2)); - kernel->setArg(idx++, static_cast(paddings[1] / 2)); + kernel->setArg(idx++, static_cast(input_height)); + kernel->setArg(idx++, static_cast(input_width)); + kernel->setArg(idx++, static_cast(input_channel_blocks)); + kernel->setArg(idx++, static_cast(height)); + kernel->setArg(idx++, static_cast(width)); + kernel->setArg(idx++, static_cast(filter_height)); + kernel->setArg(idx++, static_cast(filter_width)); + kernel->setArg(idx++, static_cast(paddings[0] / 2)); + kernel->setArg(idx++, static_cast(paddings[1] / 2)); if (stride != 1 || dilations[0] != 1 || dilations[1] != 1) { - kernel->setArg(idx++, static_cast(dilations[0])); - kernel->setArg(idx++, static_cast(dilations[1])); + kernel->setArg(idx++, static_cast(dilations[0])); + kernel->setArg(idx++, static_cast(dilations[1])); } *prev_input_shape = input->shape(); } @@ -159,7 +159,8 @@ void DepthwiseConv2dFunctor::operator()( } std::vector output_image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, + &output_image_shape); output->ResizeImage(output_shape, output_image_shape); DepthwiseConv2d(&kernel_, input, filter, bias, strides_[0], paddings.data(), diff --git a/mace/kernels/opencl/eltwise_opencl.cc b/mace/kernels/opencl/eltwise_opencl.cc index dde05b29e2b2a6c8264ced78dea7d8fb3a37ef65..548d907de08ba8d25c884a5098f4da8b82db70ee 100644 --- a/mace/kernels/opencl/eltwise_opencl.cc +++ b/mace/kernels/opencl/eltwise_opencl.cc @@ -35,7 +35,6 @@ void EltwiseFunctor::operator()(const Tensor *input0, built_options.emplace(MakeString("-DELTWISE_TYPE=", type_)); if (!coeff_.empty()) built_options.emplace("-DCOEFF_SUM"); kernel_ = runtime->BuildKernel("eltwise", kernel_name, built_options); - } if (!IsVecEqual(input_shape_, input0->shape())) { uint32_t idx = 0; diff --git a/mace/kernels/opencl/fully_connected_opencl.cc b/mace/kernels/opencl/fully_connected_opencl.cc index d5db519025514be82b5101de3c25c74c444c9b59..772a6d8d0c17774de35dca46e96fd9a15c94c38c 100644 --- a/mace/kernels/opencl/fully_connected_opencl.cc +++ b/mace/kernels/opencl/fully_connected_opencl.cc @@ -16,12 +16,14 @@ void FCWXKernel(cl::Kernel *kernel, std::vector *prev_input_shape, Tensor *output, const ActivationType activation, - std::vector &gws, - std::vector &lws, + std::vector *gws, + std::vector *lws, const float relux_max_limit, StatsFuture *future) { MACE_CHECK(input->dim(3) % 4 == 0) << "FC width kernel only support input with 4x channel."; + MACE_CHECK_NOTNULL(gws); + MACE_CHECK_NOTNULL(lws); auto runtime = OpenCLRuntime::Global(); if (kernel->get() == nullptr) { @@ -62,12 +64,11 @@ void FCWXKernel(cl::Kernel *kernel, const index_t output_blocks = RoundUpDiv4(output_size); const uint32_t wave_size = runtime->GetKernelWaveSize(*kernel); - gws = {4, (wave_size / 4), static_cast(batch * output_blocks)}; + *gws = {4, (wave_size / 4), static_cast(batch * output_blocks)}; const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(*kernel); - const uint32_t inter_local_blks = kwg_size / (gws[0] * gws[1]); - lws = {gws[0], gws[1], inter_local_blks}; - + const uint32_t inter_local_blks = kwg_size / ((*gws)[0] * (*gws)[1]); + *lws = {(*gws)[0], (*gws)[1], inter_local_blks}; } if (!IsVecEqual(*prev_input_shape, input->shape())) { const index_t batch = output->dim(0); @@ -80,21 +81,22 @@ void FCWXKernel(cl::Kernel *kernel, kernel->setArg(idx++, *(bias->opencl_image())); } kernel->setArg(idx++, *(output->opencl_image())); - kernel->setArg(idx++, (lws[0] * lws[1] * lws[2] * sizeof(float)), nullptr); + kernel->setArg(idx++, ((*lws)[0] * (*lws)[1] * (*lws)[2] * sizeof(float)), + nullptr); kernel->setArg(idx++, static_cast(input->dim(1))); kernel->setArg(idx++, static_cast(input->dim(2))); kernel->setArg(idx++, static_cast(RoundUpDiv4(input->dim(3)))); kernel->setArg(idx++, static_cast(output_blocks)); kernel->setArg(idx++, relux_max_limit); - gws[2] = static_cast(batch * output_blocks); + (*gws)[2] = static_cast(batch * output_blocks); *prev_input_shape = input->shape(); } cl::Event event; cl_int error = runtime->command_queue().enqueueNDRangeKernel( - *kernel, cl::NullRange, cl::NDRange(gws[0], gws[1], gws[2]), - cl::NDRange(lws[0], lws[1], lws[2]), nullptr, &event); + *kernel, cl::NullRange, cl::NDRange((*gws)[0], (*gws)[1], (*gws)[2]), + cl::NDRange((*lws)[0], (*lws)[1], (*lws)[2]), nullptr, &event); MACE_CHECK(error == CL_SUCCESS) << "Error code: " << error; if (future != nullptr) { @@ -105,7 +107,6 @@ void FCWXKernel(cl::Kernel *kernel, } }; } - } template @@ -116,10 +117,12 @@ void FCWTXKernel(cl::Kernel *kernel, std::vector *prev_input_shape, Tensor *output, const ActivationType activation, - std::vector &gws, - std::vector &lws, + std::vector *gws, + std::vector *lws, const float relux_max_limit, StatsFuture *future) { + MACE_CHECK_NOTNULL(gws); + MACE_CHECK_NOTNULL(lws); if (kernel->get() == nullptr) { auto runtime = OpenCLRuntime::Global(); std::set built_options; @@ -152,7 +155,7 @@ void FCWTXKernel(cl::Kernel *kernel, *kernel = runtime->BuildKernel("fully_connected", kernel_name, built_options); - lws = {16, 64, 1}; + *lws = {16, 64, 1}; } if (!IsVecEqual(*prev_input_shape, input->shape())) { uint32_t idx = 0; @@ -171,18 +174,16 @@ void FCWTXKernel(cl::Kernel *kernel, const index_t batch = output->dim(0); const index_t output_blocks = RoundUpDiv4(output->dim(3)); - gws = { + *gws = { static_cast(batch), static_cast(output_blocks), }; - *prev_input_shape = input->shape(); } std::stringstream ss; ss << "fc_opencl_kernel_" << output->dim(0) << "_" << output->dim(1) << "_" << output->dim(2) << "_" << output->dim(3); - TuningOrRun2DKernel(*kernel, ss.str(), gws.data(), lws, future); - + TuningOrRun2DKernel(*kernel, ss.str(), gws->data(), *lws, future); } template @@ -194,17 +195,18 @@ void FullyConnectedFunctor::operator()( StatsFuture *future) { std::vector output_shape = {input->dim(0), 1, 1, weight->dim(0)}; std::vector output_image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, + &output_image_shape); output->ResizeImage(output_shape, output_image_shape); if (weight_type_ == BufferType::WEIGHT_HEIGHT) { FCWTXKernel(&kernel_, input, weight, bias, &input_shape_, output, - activation_, gws_, lws_, relux_max_limit_, future); + activation_, &gws_, &lws_, relux_max_limit_, future); } else { FCWXKernel(&kernel_, input, weight, bias, &input_shape_, output, - activation_, gws_, lws_, relux_max_limit_, future); + activation_, &gws_, &lws_, relux_max_limit_, future); } -}; +} template struct FullyConnectedFunctor; diff --git a/mace/kernels/opencl/helper.cc b/mace/kernels/opencl/helper.cc index 3f41966299f2b8ec4b61e65d1191eaef1d94b533..e3cadbc6f5d1cd73b7f5b6a2de02c370a19ce0c1 100644 --- a/mace/kernels/opencl/helper.cc +++ b/mace/kernels/opencl/helper.cc @@ -3,6 +3,11 @@ // #include "mace/kernels/opencl/helper.h" + +#include +#include +#include + #include "mace/utils/tuner.h" #include "mace/utils/utils.h" @@ -11,91 +16,92 @@ namespace kernels { // [(C + 3) / 4 * W, N * H] void CalInOutputImageShape(const std::vector &shape, /* NHWC */ - std::vector &image_shape) { + std::vector *image_shape) { MACE_CHECK(shape.size() == 4); - image_shape.resize(2); - image_shape[0] = RoundUpDiv4(shape[3]) * shape[2]; - image_shape[1] = shape[0] * shape[1]; + image_shape->resize(2); + (*image_shape)[0] = RoundUpDiv4(shape[3]) * shape[2]; + (*image_shape)[1] = shape[0] * shape[1]; } // [RoundUp<4>(Ic) * H * W, (Oc + 3) / 4] void CalConv2dFilterImageShape(const std::vector &shape, /* HWOI */ - std::vector &image_shape) { + std::vector *image_shape) { MACE_CHECK(shape.size() == 4); - image_shape.resize(2); - image_shape[0] = shape[0] * shape[1] * RoundUp(shape[3], 4); - image_shape[1] = RoundUpDiv4(shape[2]); + image_shape->resize(2); + (*image_shape)[0] = shape[0] * shape[1] * RoundUp(shape[3], 4); + (*image_shape)[1] = RoundUpDiv4(shape[2]); } // [H * W * M, (Ic + 3) / 4] void CalDepthwiseConv2dFilterImageShape( const std::vector &shape, /* HWIM */ - std::vector &image_shape) { + std::vector *image_shape) { MACE_CHECK(shape.size() == 4); - image_shape.resize(2); - image_shape[0] = shape[0] * shape[1] * shape[3]; - image_shape[1] = RoundUpDiv4(shape[2]); + image_shape->resize(2); + (*image_shape)[0] = shape[0] * shape[1] * shape[3]; + (*image_shape)[1] = RoundUpDiv4(shape[2]); } // [(size + 3) / 4, 1] void CalArgImageShape(const std::vector &shape, - std::vector &image_shape) { + std::vector *image_shape) { MACE_CHECK(shape.size() == 1); - image_shape.resize(2); - image_shape[0] = RoundUpDiv4(shape[0]); - image_shape[1] = 1; + image_shape->resize(2); + (*image_shape)[0] = RoundUpDiv4(shape[0]); + (*image_shape)[1] = 1; } // Only support 3x3 now // [ (Ic + 3) / 4, 16 * Oc] void CalWinogradFilterImageShape( const std::vector &shape, /* Oc, Ic, H, W*/ - std::vector &image_shape) { + std::vector *image_shape) { MACE_CHECK(shape.size() == 4); - image_shape.resize(2); - image_shape[0] = RoundUpDiv4(shape[1]); - image_shape[1] = (shape[0] << 4); + image_shape->resize(2); + (*image_shape)[0] = RoundUpDiv4(shape[1]); + (*image_shape)[1] = (shape[0] << 4); } // [W * C, N * RoundUp<4>(H)] void CalInOutHeightImageShape(const std::vector &shape, /* NHWC */ - std::vector &image_shape) { + std::vector *image_shape) { MACE_CHECK(shape.size() == 4); - image_shape.resize(2); - image_shape[0] = shape[2] * shape[3]; - image_shape[1] = shape[0] * RoundUpDiv4(shape[1]); + image_shape->resize(2); + (*image_shape)[0] = shape[2] * shape[3]; + (*image_shape)[1] = shape[0] * RoundUpDiv4(shape[1]); } // [RoundUp<4>(W) * C, N * H] void CalInOutWidthImageShape(const std::vector &shape, /* NHWC */ - std::vector &image_shape) { + std::vector *image_shape) { MACE_CHECK(shape.size() == 4); - image_shape.resize(2); - image_shape[0] = RoundUpDiv4(shape[2]) * shape[3]; - image_shape[1] = shape[0] * shape[1]; + image_shape->resize(2); + (*image_shape)[0] = RoundUpDiv4(shape[2]) * shape[3]; + (*image_shape)[1] = shape[0] * shape[1]; } // [W, (H + 3) / 4] void CalWeightHeightImageShape(const std::vector &shape, /* HW */ - std::vector &image_shape) { + std::vector *image_shape) { MACE_CHECK(shape.size() == 2); - image_shape.resize(2); - image_shape[0] = shape[1]; - image_shape[1] = RoundUpDiv4(shape[0]); + image_shape->resize(2); + (*image_shape)[0] = shape[1]; + (*image_shape)[1] = RoundUpDiv4(shape[0]); } // [(W + 3) / 4, H] void CalWeightWidthImageShape(const std::vector &shape, /* HW */ - std::vector &image_shape) { + std::vector *image_shape) { MACE_CHECK(shape.size() == 2); - image_shape.resize(2); - image_shape[0] = RoundUpDiv4(shape[1]); - image_shape[1] = shape[0]; + image_shape->resize(2); + (*image_shape)[0] = RoundUpDiv4(shape[1]); + (*image_shape)[1] = shape[0]; } void CalImage2DShape(const std::vector &shape, /* NHWC */ const BufferType type, - std::vector &image_shape) { + std::vector *image_shape) { + MACE_CHECK_NOTNULL(image_shape); switch (type) { case CONV2D_FILTER: CalConv2dFilterImageShape(shape, image_shape); @@ -188,7 +194,7 @@ std::string DtToUpstreamCLCMDDt(const DataType dt) { } } -void TuningOrRun3DKernel(cl::Kernel &kernel, +void TuningOrRun3DKernel(const cl::Kernel &kernel, const std::string tuning_key, const uint32_t *gws, const std::vector &lws, @@ -202,7 +208,7 @@ void TuningOrRun3DKernel(cl::Kernel &kernel, local_ws[2] = std::min(gws[2], kwg_size / (local_ws[0] * local_ws[1])); return { - // TODO tuning these magic numbers + // TODO(heliangliang): tuning these magic numbers {local_ws[0], local_ws[1], local_ws[2], 1}, {kwg_size / 16, 4, 4, 1}, {kwg_size / 32, 4, 8, 1}, @@ -291,7 +297,7 @@ void TuningOrRun3DKernel(cl::Kernel &kernel, } } -void TuningOrRun2DKernel(cl::Kernel &kernel, +void TuningOrRun2DKernel(const cl::Kernel &kernel, const std::string tuning_key, const uint32_t *gws, const std::vector &lws, diff --git a/mace/kernels/opencl/helper.h b/mace/kernels/opencl/helper.h index 56bf295ee2dec5451f9d142ccd0e63441b37e545..89712c9b96aa043f5019cde6eae23aa07109f6f7 100644 --- a/mace/kernels/opencl/helper.h +++ b/mace/kernels/opencl/helper.h @@ -5,6 +5,9 @@ #ifndef MACE_KERNELS_OPENCL_HELPER_H_ #define MACE_KERNELS_OPENCL_HELPER_H_ +#include +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/runtime/opencl/opencl_runtime.h" @@ -30,7 +33,7 @@ enum BufferType { void CalImage2DShape(const std::vector &shape, /* NHWC */ const BufferType type, - std::vector &image_shape); + std::vector *image_shape); std::vector CalWinogradShape(const std::vector &shape, const BufferType type); @@ -43,13 +46,13 @@ std::string DtToCLDt(const DataType dt); std::string DtToUpstreamCLDt(const DataType dt); -void TuningOrRun3DKernel(cl::Kernel &kernel, +void TuningOrRun3DKernel(const cl::Kernel &kernel, const std::string tuning_key, const uint32_t *gws, const std::vector &lws, StatsFuture *future); -void TuningOrRun2DKernel(cl::Kernel &kernel, +void TuningOrRun2DKernel(const cl::Kernel &kernel, const std::string tuning_key, const uint32_t *gws, const std::vector &lws, @@ -78,7 +81,6 @@ bool IsVecEqual(const std::vector &input0, (std::equal(input0.begin(), input0.end(), input1.begin()))); } -namespace { template void AppendToStream(std::stringstream *ss, const std::string &delimiter, T v) { (*ss) << v; @@ -92,7 +94,6 @@ void AppendToStream(std::stringstream *ss, (*ss) << first << delimiter; AppendToStream(ss, delimiter, args...); } -} // namespace template std::string Concat(Args... args) { diff --git a/mace/kernels/opencl/matmul.cc b/mace/kernels/opencl/matmul.cc index 4b61edb271df814b4bdcea251d28b2ca03cf3be4..c5bd2b0ba3f789f28992a49e10ffa7b4a357a8c5 100644 --- a/mace/kernels/opencl/matmul.cc +++ b/mace/kernels/opencl/matmul.cc @@ -17,7 +17,7 @@ void MatMulFunctor::operator()(const Tensor *A, StatsFuture *future) { std::vector c_shape = {A->dim(0), A->dim(1), B->dim(2), 1}; std::vector c_image_shape; - CalImage2DShape(c_shape, BufferType::IN_OUT_HEIGHT, c_image_shape); + CalImage2DShape(c_shape, BufferType::IN_OUT_HEIGHT, &c_image_shape); C->ResizeImage(c_shape, c_image_shape); const index_t batch = C->dim(0); @@ -56,7 +56,7 @@ void MatMulFunctor::operator()(const Tensor *A, ss << "matmul_opencl_kernel_" << C->dim(0) << "_" << C->dim(1) << "_" << C->dim(2) << "_" << C->dim(3); TuningOrRun2DKernel(kernel_, ss.str(), gws, lws, future); -}; +} template struct MatMulFunctor; diff --git a/mace/kernels/opencl/pooling_opencl.cc b/mace/kernels/opencl/pooling_opencl.cc index d8a6d675a8da5749d3a2cf02360e3ec619a809ff..5b52a0934facd4b4f14affb9bafb819d258fa444 100644 --- a/mace/kernels/opencl/pooling_opencl.cc +++ b/mace/kernels/opencl/pooling_opencl.cc @@ -36,12 +36,11 @@ void PoolingFunctor::operator()(const Tensor *input, built_options.emplace("-DPOOL_AVG"); } kernel_ = runtime->BuildKernel("pooling", kernel_name, built_options); - } if (!IsVecEqual(input_shape_, input->shape())) { std::vector output_shape(4); - std::vector filter_shape = {kernels_[0], kernels_[1], input->dim(3), - input->dim(3)}; + std::vector filter_shape = {kernels_[0], kernels_[1], + input->dim(3), input->dim(3)}; std::vector paddings(2); if (paddings_.empty()) { @@ -50,12 +49,14 @@ void PoolingFunctor::operator()(const Tensor *input, padding_type_, output_shape.data(), paddings.data()); } else { paddings = paddings_; - CalcOutputSize(input->shape().data(), filter_shape.data(), paddings_.data(), - dilations_, strides_, RoundType::CEIL, output_shape.data()); + CalcOutputSize(input->shape().data(), filter_shape.data(), + paddings_.data(), dilations_, strides_, RoundType::CEIL, + output_shape.data()); } std::vector output_image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, + &output_image_shape); output->ResizeImage(output_shape, output_image_shape); uint32_t idx = 0; diff --git a/mace/kernels/opencl/resize_bilinear_opencl.cc b/mace/kernels/opencl/resize_bilinear_opencl.cc index a3bb2ee1951f433ae41a4c2dc41367fe77d1e497..373709168f190a6122d29bbaee457a2b356b4833 100644 --- a/mace/kernels/opencl/resize_bilinear_opencl.cc +++ b/mace/kernels/opencl/resize_bilinear_opencl.cc @@ -34,7 +34,6 @@ void ResizeBilinearFunctor::operator()( built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); kernel_ = runtime->BuildKernel("resize_bilinear", kernel_name, built_options); - } if (!IsVecEqual(input_shape_, input->shape())) { MACE_CHECK(out_height > 0 && out_width > 0); @@ -42,7 +41,7 @@ void ResizeBilinearFunctor::operator()( std::vector output_image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, - output_image_shape); + &output_image_shape); output->ResizeImage(output_shape, output_image_shape); float height_scale = @@ -60,7 +59,6 @@ void ResizeBilinearFunctor::operator()( kernel_.setArg(idx++, static_cast(out_height)); input_shape_ = input->shape(); - } const uint32_t gws[3] = {static_cast(channel_blocks), diff --git a/mace/kernels/opencl/slice.cc b/mace/kernels/opencl/slice.cc index 63efc555dbf8a743e3fc6881a06e0202480bbd16..6bc9ae3bf57d8c4f3df9ea41cad9bf5f283ce01a 100644 --- a/mace/kernels/opencl/slice.cc +++ b/mace/kernels/opencl/slice.cc @@ -24,7 +24,7 @@ void SliceFunctor::operator()( input->dim(2), output_channels}); std::vector image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &image_shape); for (size_t i= 0; i < outputs_count; ++i) { output_list[i]->ResizeImage(output_shape, image_shape); } diff --git a/mace/kernels/opencl/softmax_opencl.cc b/mace/kernels/opencl/softmax_opencl.cc index 4aabe9017f06073ddffe7e04871b62b76da15dc6..077db9ddc1ecf2d72f71511349945ea53fe0eb73 100644 --- a/mace/kernels/opencl/softmax_opencl.cc +++ b/mace/kernels/opencl/softmax_opencl.cc @@ -33,7 +33,6 @@ void SoftmaxFunctor::operator()(const Tensor *logits, built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); kernel_ = runtime->BuildKernel("softmax", kernel_name, built_options); - } if (!IsVecEqual(input_shape_, logits->shape())) { uint32_t idx = 0; diff --git a/mace/kernels/opencl/space_to_batch_opencl.cc b/mace/kernels/opencl/space_to_batch_opencl.cc index 91f5564d520de9e11ad832231060f37ea3f64191..fe911fbddb49687c74edf1e29f0276c86a249ccc 100644 --- a/mace/kernels/opencl/space_to_batch_opencl.cc +++ b/mace/kernels/opencl/space_to_batch_opencl.cc @@ -22,7 +22,8 @@ void SpaceToBatchFunctor::operator()( StatsFuture *future) { const char *kernel_name = nullptr; std::vector output_image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, + &output_image_shape); if (b2s_) { space_tensor->ResizeImage(output_shape, output_image_shape); kernel_name = "batch_to_space"; @@ -42,7 +43,6 @@ void SpaceToBatchFunctor::operator()( DtToCLCMDDt(DataTypeToEnum::value)); kernel_ = runtime->BuildKernel("space_to_batch", kernel_name, built_options); - } if (!IsVecEqual(space_shape_, space_tensor->shape())) { uint32_t idx = 0; diff --git a/mace/kernels/opencl/winograd_transform.cc b/mace/kernels/opencl/winograd_transform.cc index c07ccc9944786e8cbcd8dde4aa6ada7794542019..3b86640866a307ba97d7b0f064a1df099c021be4 100644 --- a/mace/kernels/opencl/winograd_transform.cc +++ b/mace/kernels/opencl/winograd_transform.cc @@ -27,7 +27,6 @@ void WinogradTransformFunctor::operator()( auto runtime = OpenCLRuntime::Global(); kernel_ = runtime->BuildKernel("winograd_transform", obfuscated_kernel_name, built_options); - } std::vector output_shape(4); std::vector filter_shape = {3, 3, input_tensor->dim(3), 1}; @@ -49,7 +48,7 @@ void WinogradTransformFunctor::operator()( if (!IsVecEqual(input_shape_, input_tensor->shape())) { output_shape = {16, input_tensor->dim(3), out_width, 1}; std::vector image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_HEIGHT, image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_HEIGHT, &image_shape); output_tensor->ResizeImage(output_shape, image_shape); uint32_t idx = 0; @@ -83,7 +82,6 @@ void WinogradInverseTransformFunctor::operator()( const Tensor *bias, Tensor *output_tensor, StatsFuture *future) { - if (kernel_.get() == nullptr) { std::string obfuscated_kernel_name = MACE_OBFUSCATE_SYMBOL("winograd_inverse_transform_2x2"); @@ -125,7 +123,7 @@ void WinogradInverseTransformFunctor::operator()( std::vector output_shape = {batch_, height_, width_, input_tensor->dim(1)}; std::vector image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, image_shape); + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &image_shape); output_tensor->ResizeImage(output_shape, image_shape); const uint32_t round_h = (height_ + 1) / 2; diff --git a/mace/kernels/pooling.h b/mace/kernels/pooling.h index bc9892e5864d420f9505de9462df5a17eedb4241..15cc691e71927300bec48224a7666f1468eb74c1 100644 --- a/mace/kernels/pooling.h +++ b/mace/kernels/pooling.h @@ -2,10 +2,13 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#ifndef MACE_KERNELS_POOLING_H -#define MACE_KERNELS_POOLING_H +#ifndef MACE_KERNELS_POOLING_H_ +#define MACE_KERNELS_POOLING_H_ +#include #include +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" @@ -188,4 +191,4 @@ struct PoolingFunctor : PoolingFunctorBase { } // namespace kernels } // namespace mace -#endif // MACE_KERNELS_POOLING_H +#endif // MACE_KERNELS_POOLING_H_ diff --git a/mace/kernels/reshape.h b/mace/kernels/reshape.h index 544ba360a4e1c751dc802381fb99ae977f749a26..14e560789db709464400136116ba02d373207c65 100644 --- a/mace/kernels/reshape.h +++ b/mace/kernels/reshape.h @@ -4,6 +4,8 @@ #ifndef MACE_KERNELS_RESHAPE_H_ #define MACE_KERNELS_RESHAPE_H_ +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" @@ -20,7 +22,7 @@ struct ReshapeFunctor { Tensor *output, StatsFuture *future) { output->Resize(out_shape); - // TODO copy on write to avoid this copy. + // TODO(liuqi): copy on write to avoid this copy. output->CopyBytes(input->raw_data(), input->size() * sizeof(T)); } }; diff --git a/mace/kernels/resize_bilinear.h b/mace/kernels/resize_bilinear.h index 52c1da102926870d6e65dfa52ee68c7ff5a43f76..65e5121211d4d836d6d17809a843e0778defaecb 100644 --- a/mace/kernels/resize_bilinear.h +++ b/mace/kernels/resize_bilinear.h @@ -4,6 +4,9 @@ #ifndef MACE_KERNELS_RESIZE_BILINEAR_H_ #define MACE_KERNELS_RESIZE_BILINEAR_H_ +#include +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" @@ -11,7 +14,6 @@ namespace mace { namespace kernels { -namespace { struct CachedInterpolation { index_t lower; // Lower source index used in the interpolation index_t upper; // Upper source index used in the interpolation @@ -101,7 +103,6 @@ void ResizeImage(const T *images, } } } -} struct ResizeBilinearFunctorBase { ResizeBilinearFunctorBase(const std::vector &size, diff --git a/mace/kernels/slice.h b/mace/kernels/slice.h index b08ea7ef4fcd1e235375952085e9965c7f897334..59d9d667b0a63da1e1d3ee471aecec9efd9be1e9 100644 --- a/mace/kernels/slice.h +++ b/mace/kernels/slice.h @@ -5,6 +5,8 @@ #ifndef MACE_KERNELS_SLICE_H_ #define MACE_KERNELS_SLICE_H_ +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" @@ -16,7 +18,6 @@ namespace kernels { template struct SliceFunctor { - void operator()(const Tensor *input, const std::vector &output_list, StatsFuture *future) { @@ -56,15 +57,13 @@ struct SliceFunctor { template struct SliceFunctor { - void operator()(const Tensor *input, const std::vector &output_list, StatsFuture *future); cl::Kernel kernel_; - }; -} // namepsace kernels +} // namespace kernels } // namespace mace #endif // MACE_KERNELS_SLICE_H_ diff --git a/mace/kernels/softmax.h b/mace/kernels/softmax.h index d5bc5717d8cfdbfc391de634f08d8fd427e5ca9d..a1c4ea2f6e5b9200f17d54906316a83cbefaa49a 100644 --- a/mace/kernels/softmax.h +++ b/mace/kernels/softmax.h @@ -5,6 +5,10 @@ #ifndef MACE_KERNELS_SOFTMAX_H_ #define MACE_KERNELS_SOFTMAX_H_ +#include +#include +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h" @@ -38,7 +42,7 @@ struct SoftmaxFunctor { for (index_t c = 1; c < num_classes; ++c) { max_value = std::max(max_value, logits_ptr[pos + c]); } - // TODO: check overflow? + // TODO(liuqi): check overflow? T sum = 0; for (index_t c = 0; c < num_classes; ++c) { exp_data[c] = ::exp((logits_ptr[pos + c] - max_value)); @@ -60,7 +64,7 @@ struct SoftmaxFunctor { std::vector input_shape_; }; -} // namepsace kernels +} // namespace kernels } // namespace mace #endif // MACE_KERNELS_SOFTMAX_H_ diff --git a/mace/kernels/space_to_batch.h b/mace/kernels/space_to_batch.h index ef7467b57acd0fc1d3563148ec53dd1ea4869a9f..757f784820f90fee842fc385606db4755cb52293 100644 --- a/mace/kernels/space_to_batch.h +++ b/mace/kernels/space_to_batch.h @@ -2,8 +2,10 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#ifndef MACE_KERNELS_CONV_2D_H_ -#define MACE_KERNELS_CONV_2D_H_ +#ifndef MACE_KERNELS_SPACE_TO_BATCH_H_ +#define MACE_KERNELS_SPACE_TO_BATCH_H_ + +#include #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" @@ -60,4 +62,4 @@ struct SpaceToBatchFunctor : SpaceToBatchFunctorBase { } // namespace kernels } // namespace mace -#endif // MACE_KERNELS_CONV_2D_H_ +#endif // MACE_KERNELS_SPACE_TO_BATCH_H_ diff --git a/mace/kernels/winograd_transform.h b/mace/kernels/winograd_transform.h index f3b7f7d640328860f5ffdc5dc6b065e78e324896..6f483dacb06f920c54b14930dba3fd05ff845e44 100644 --- a/mace/kernels/winograd_transform.h +++ b/mace/kernels/winograd_transform.h @@ -5,6 +5,8 @@ #ifndef MACE_KERNELS_WINOGRAD_TRANSFORM_H_ #define MACE_KERNELS_WINOGRAD_TRANSFORM_H_ +#include + #include "mace/core/future.h" #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/tensor.h"