diff --git a/mace/core/operator.cc b/mace/core/operator.cc index ae6ca107e1f13e72958f401e88cdde5af6005d98..ad3c8e5820d469802cbe0c9cea3da2c12661c227 100644 --- a/mace/core/operator.cc +++ b/mace/core/operator.cc @@ -62,6 +62,8 @@ std::unique_ptr OperatorRegistry::CreateOperator( } } +namespace ops { + extern void Register_Activation(OperatorRegistry *op_registry); extern void Register_AddN(OperatorRegistry *op_registry); extern void Register_BatchNorm(OperatorRegistry *op_registry); @@ -88,32 +90,34 @@ extern void Register_Eltwise(OperatorRegistry *op_registry); extern void Register_FullyConnected(OperatorRegistry *op_registry); extern void Register_Slice(OperatorRegistry *op_registry); +} // namespace ops + OperatorRegistry::OperatorRegistry() { - Register_Activation(this); - Register_AddN(this); - Register_BatchNorm(this); - Register_BatchToSpaceND(this); - Register_BiasAdd(this); - Register_BufferToImage(this); - Register_ChannelShuffle(this); - Register_Concat(this); - Register_Conv2D(this); - Register_DepthwiseConv2d(this); - Register_FoldedBatchNorm(this); - Register_FusedConv2D(this); - Register_GlobalAvgPooling(this); - Register_ImageToBuffer(this); - Register_Pooling(this); - Register_ResizeBilinear(this); - Register_Softmax(this); - Register_SpaceToBatchND(this); - Register_MatMul(this); - Register_WinogradTransform(this); - Register_WinogradInverseTransform(this); - Register_Reshape(this); - Register_Eltwise(this); - Register_FullyConnected(this); - Register_Slice(this); + ops::Register_Activation(this); + ops::Register_AddN(this); + ops::Register_BatchNorm(this); + ops::Register_BatchToSpaceND(this); + ops::Register_BiasAdd(this); + ops::Register_BufferToImage(this); + ops::Register_ChannelShuffle(this); + ops::Register_Concat(this); + ops::Register_Conv2D(this); + ops::Register_DepthwiseConv2d(this); + ops::Register_FoldedBatchNorm(this); + ops::Register_FusedConv2D(this); + ops::Register_GlobalAvgPooling(this); + ops::Register_ImageToBuffer(this); + ops::Register_Pooling(this); + ops::Register_ResizeBilinear(this); + ops::Register_Softmax(this); + ops::Register_SpaceToBatchND(this); + ops::Register_MatMul(this); + ops::Register_WinogradTransform(this); + ops::Register_WinogradInverseTransform(this); + ops::Register_Reshape(this); + ops::Register_Eltwise(this); + ops::Register_FullyConnected(this); + ops::Register_Slice(this); } } // namespace mace diff --git a/mace/kernels/activation.h b/mace/kernels/activation.h index dd750a389b348223392b49fe06a39c8dda9005b9..1e3601a4a5f2f38dbf0bfa6d5acc8dbd21c2fa4d 100644 --- a/mace/kernels/activation.h +++ b/mace/kernels/activation.h @@ -152,6 +152,7 @@ class ActivationFunctor { T relux_max_limit_; cl::Kernel kernel_; std::string tuning_key_prefix_; + std::vector input_shape_; }; } // namespace kernels diff --git a/mace/kernels/addn.h b/mace/kernels/addn.h index 6e9ba2d4111a1cb9e67a7adb4cc3131d250e352c..3a5a45df4dd3476e4ab7a2f58bae658b461e206a 100644 --- a/mace/kernels/addn.h +++ b/mace/kernels/addn.h @@ -91,6 +91,7 @@ struct AddNFunctor { StatsFuture *future); cl::Kernel kernel_; + std::vector input_shape_; }; } // namespace kernels diff --git a/mace/kernels/batch_norm.h b/mace/kernels/batch_norm.h index 1e6a12bff0f3f0878a5c017f81c21aa4a79c40a0..57f0f4d66a0107ef6e907e4c5579bed0feef2be3 100644 --- a/mace/kernels/batch_norm.h +++ b/mace/kernels/batch_norm.h @@ -156,6 +156,7 @@ struct BatchNormFunctor : BatchNormFunctorBase { Tensor *output, StatsFuture *future); cl::Kernel kernel_; + std::vector input_shape_; }; } // namepsace kernels diff --git a/mace/kernels/bias_add.h b/mace/kernels/bias_add.h index 28adcf8deb2034a0fb7b9812cb6975265e09f3fa..d8e411efa8a70ebd2ca850f6ac91fa1bd2198fe6 100644 --- a/mace/kernels/bias_add.h +++ b/mace/kernels/bias_add.h @@ -62,6 +62,7 @@ struct BiasAddFunctor { Tensor *output, StatsFuture *future); cl::Kernel kernel_; + std::vector input_shape_; }; } // namepsace kernels diff --git a/mace/kernels/channel_shuffle.h b/mace/kernels/channel_shuffle.h index e627121d176a7f36be3f33c9cdc4085048d55f2a..da2ce094a141984c49ad21a208bdaafb8a97311e 100644 --- a/mace/kernels/channel_shuffle.h +++ b/mace/kernels/channel_shuffle.h @@ -55,6 +55,7 @@ struct ChannelShuffleFunctor { cl::Kernel kernel_; const int groups_; + std::vector input_shape_; }; } // namespace kernels diff --git a/mace/kernels/concat.h b/mace/kernels/concat.h index 021b0f6176f7a8c39bc8525215802dee5ea08f24..6870594641baaab5aae866c033107a7b6df0507c 100644 --- a/mace/kernels/concat.h +++ b/mace/kernels/concat.h @@ -83,6 +83,7 @@ struct ConcatFunctor : ConcatFunctorBase { Tensor *output, StatsFuture *future); cl::Kernel kernel_; + std::vector input_shape_; }; } // namepsace kernels diff --git a/mace/kernels/conv_2d.h b/mace/kernels/conv_2d.h index a4a24eedae4b7c92f2ff1b1841c78b6f99d566bc..b107d33229c0b77be24e0702db9cf0585801b06f 100644 --- a/mace/kernels/conv_2d.h +++ b/mace/kernels/conv_2d.h @@ -401,6 +401,7 @@ struct Conv2dFunctor : Conv2dFunctorBase { StatsFuture *future); cl::Kernel kernel_; + std::vector input_shape_; }; } // namespace kernels diff --git a/mace/kernels/depthwise_conv2d.h b/mace/kernels/depthwise_conv2d.h index c0a1719f86f70559a6ab6bd97072b31e21e2ead5..dc6b737077ab16b093d3993c5f414430fa17d186 100644 --- a/mace/kernels/depthwise_conv2d.h +++ b/mace/kernels/depthwise_conv2d.h @@ -439,6 +439,7 @@ struct DepthwiseConv2dFunctor StatsFuture *future); cl::Kernel kernel_; + std::vector input_shape_; }; } // namespace kernels diff --git a/mace/kernels/eltwise.h b/mace/kernels/eltwise.h index 263dfb808a4810d4d1a1ca49c4728d813cda1552..1aa883d568ff493fa092e23637b2b6accf1d8a38 100644 --- a/mace/kernels/eltwise.h +++ b/mace/kernels/eltwise.h @@ -94,6 +94,7 @@ struct EltwiseFunctor : EltwiseFunctorBase { StatsFuture *future); cl::Kernel kernel_; + std::vector input_shape_; }; } // namespace kernels diff --git a/mace/kernels/fully_connected.h b/mace/kernels/fully_connected.h index b8a740215f3c3d23a85cc4d55184ab0b65e4c13e..5c527d4593e02bead0e55998674690a4c5864e50 100644 --- a/mace/kernels/fully_connected.h +++ b/mace/kernels/fully_connected.h @@ -90,6 +90,7 @@ struct FullyConnectedFunctor : FullyConnectedBase { cl::Kernel kernel_; std::vector gws_; std::vector lws_; + std::vector input_shape_; }; } // namespace kernels diff --git a/mace/kernels/opencl/activation_opencl.cc b/mace/kernels/opencl/activation_opencl.cc index 180e38cafad10fc07eb9d51ac7cbde501ef94982..9792cae56889275053362ed6e7d230ff744fd4ac 100644 --- a/mace/kernels/opencl/activation_opencl.cc +++ b/mace/kernels/opencl/activation_opencl.cc @@ -58,6 +58,9 @@ void ActivationFunctor::operator()(const Tensor *input, LOG(FATAL) << "Unknown activation type: " << activation_; } kernel_ = runtime->BuildKernel("activation", kernel_name, built_options); + } + + if (!IsVecEqual(input_shape_, input->shape())) { int idx = 0; kernel_.setArg(idx++, *(input->opencl_image())); if (activation_ == PRELU) { @@ -66,6 +69,8 @@ void ActivationFunctor::operator()(const Tensor *input, } kernel_.setArg(idx++, static_cast(relux_max_limit_)); kernel_.setArg(idx++, *(output->opencl_image())); + + input_shape_ = input->shape(); } const uint32_t gws[3] = {static_cast(channel_blocks), diff --git a/mace/kernels/opencl/addn.cc b/mace/kernels/opencl/addn.cc index a6863a59a9abfa2fa56ac5555f820d6101ca13fb..9f9571d0637026330e1d2f5ad2dea31f116eeefc 100644 --- a/mace/kernels/opencl/addn.cc +++ b/mace/kernels/opencl/addn.cc @@ -32,15 +32,6 @@ void AddNFunctor::operator()( MACE_CHECK(channels == input_tensors[i]->dim(3)); } - std::vector output_shape = input_tensors[0]->shape(); - std::vector output_image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); - output_tensor->ResizeImage(output_shape, output_image_shape); - - const index_t channel_blocks = RoundUpDiv4(channels); - const index_t width_pixels = channel_blocks * width; - const index_t batch_height_pixels = batch * height; - if (kernel_.get() == nullptr) { if (input_tensors.size() > 4) { MACE_NOT_IMPLEMENTED; @@ -55,11 +46,26 @@ void AddNFunctor::operator()( built_options.emplace(MakeString("-DINPUT_NUM=", input_tensors.size())); kernel_ = runtime->BuildKernel("addn", kernel_name, built_options); + } + + std::vector output_shape = input_tensors[0]->shape(); + + const index_t channel_blocks = RoundUpDiv4(channels); + const index_t width_pixels = channel_blocks * width; + const index_t batch_height_pixels = batch * height; + + if (!IsVecEqual(input_shape_, input_tensors[0]->shape())) { + std::vector output_image_shape; + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); + output_tensor->ResizeImage(output_shape, output_image_shape); + uint32_t idx = 0; for (auto input : input_tensors) { kernel_.setArg(idx++, *(input->opencl_image())); } kernel_.setArg(idx++, *(output_tensor->opencl_image())); + + input_shape_ = input_tensors[0]->shape(); } const uint32_t gws[2] = {static_cast(width_pixels), diff --git a/mace/kernels/opencl/batch_norm_opencl.cc b/mace/kernels/opencl/batch_norm_opencl.cc index 8f14f34bedb97c7d9a228eebc0128448db1d4023..d9dfb8254d0bea67c0eb78c673579e5f57301fd5 100644 --- a/mace/kernels/opencl/batch_norm_opencl.cc +++ b/mace/kernels/opencl/batch_norm_opencl.cc @@ -61,7 +61,8 @@ void BatchNormFunctor::operator()(const Tensor *input, } kernel_ = runtime->BuildKernel("batch_norm", kernel_name, built_options); - + } + if (!IsVecEqual(input_shape_, input->shape())) { uint32_t idx = 0; kernel_.setArg(idx++, *(input->opencl_image())); kernel_.setArg(idx++, *(scale->opencl_image())); @@ -73,6 +74,8 @@ void BatchNormFunctor::operator()(const Tensor *input, } kernel_.setArg(idx++, *(output->opencl_image())); kernel_.setArg(idx++, relux_max_limit_); + + input_shape_ = input->shape(); } const uint32_t gws[3] = {static_cast(channel_blocks), diff --git a/mace/kernels/opencl/bias_add_opencl.cc b/mace/kernels/opencl/bias_add_opencl.cc index 613b633bc8ca9a366d88a74dfcfc669b7f1b1ce1..d2490000b71a034a8cbe19f9ada9e5f5e1ed08fa 100644 --- a/mace/kernels/opencl/bias_add_opencl.cc +++ b/mace/kernels/opencl/bias_add_opencl.cc @@ -33,10 +33,13 @@ void BiasAddFunctor::operator()(const Tensor *input, built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); kernel_ = runtime->BuildKernel("bias_add", kernel_name, built_options); + } + if (!IsVecEqual(input_shape_, input->shape())) { uint32_t idx = 0; kernel_.setArg(idx++, *(input->opencl_image())); kernel_.setArg(idx++, *(bias->opencl_image())); kernel_.setArg(idx++, *(output->opencl_image())); + input_shape_ = input->shape(); } const uint32_t gws[3] = {static_cast(channel_blocks), diff --git a/mace/kernels/opencl/channel_shuffle.cc b/mace/kernels/opencl/channel_shuffle.cc index 3325ff24f1eff56a1dea3aec9323f490d40aac3c..a88b3b059cfacd31249f09dffd72f8ddee230c00 100644 --- a/mace/kernels/opencl/channel_shuffle.cc +++ b/mace/kernels/opencl/channel_shuffle.cc @@ -13,9 +13,10 @@ namespace mace { namespace kernels { template -void ChannelShuffleFunctor::operator()(const Tensor *input, - Tensor *output, - StatsFuture *future) { +void ChannelShuffleFunctor::operator()( + const Tensor *input, + Tensor *output, + StatsFuture *future) { output->ResizeLike(input); const index_t batch = input->dim(0); @@ -39,12 +40,15 @@ void ChannelShuffleFunctor::operator()(const Tensor *inpu built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); kernel_ = runtime->BuildKernel("channel_shuffle", kernel_name, built_options); - + } + if (!IsVecEqual(input_shape_, input->shape())) { uint32_t idx = 0; kernel_.setArg(idx++, *(input->opencl_image())); kernel_.setArg(idx++, groups_); kernel_.setArg(idx++, static_cast(channels_per_group)); kernel_.setArg(idx++, *(output->opencl_image())); + + input_shape_ = input->shape(); } const uint32_t gws[3] = {static_cast(group_channel_blocks), static_cast(width), diff --git a/mace/kernels/opencl/concat.cc b/mace/kernels/opencl/concat.cc index 119ec7cd61a99f915ff7ec29443839ea2923d3a4..e99ab0605d02714b6851cb3cb8cf96f865ae5e1c 100644 --- a/mace/kernels/opencl/concat.cc +++ b/mace/kernels/opencl/concat.cc @@ -15,6 +15,7 @@ static void Concat2(cl::Kernel *kernel, const Tensor *input0, const Tensor *input1, const DataType dt, + std::vector *prev_input_shape, Tensor *output, StatsFuture *future) { const index_t batch = output->dim(0); @@ -41,6 +42,8 @@ static void Concat2(cl::Kernel *kernel, } *kernel = runtime->BuildKernel("concat", kernel_name, built_options); + } + if (!IsVecEqual(*prev_input_shape, input0->shape())) { uint32_t idx = 0; kernel->setArg(idx++, *(static_cast(input0->opencl_image()))); @@ -49,6 +52,7 @@ static void Concat2(cl::Kernel *kernel, kernel->setArg(idx++, static_cast(input0->dim(3))); kernel->setArg(idx++, *(static_cast(output->opencl_image()))); + *prev_input_shape = input0->shape(); } const uint32_t gws[3] = { @@ -142,7 +146,7 @@ void ConcatFunctor::operator()( switch (inputs_count) { case 2: Concat2(&kernel_, input_list[0], input_list[1], DataTypeToEnum::value, - output, future); + &input_shape_, output, future); break; default: if (divisible_four) { diff --git a/mace/kernels/opencl/conv_2d_opencl.cc b/mace/kernels/opencl/conv_2d_opencl.cc index 3ed87e7c6b85eb5d7f83ecdd3d2aca90b10a9ed9..46683fd1709eda83be6826d4e2519d28bf4956b1 100644 --- a/mace/kernels/opencl/conv_2d_opencl.cc +++ b/mace/kernels/opencl/conv_2d_opencl.cc @@ -18,6 +18,7 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, const ActivationType activation, const float relux_max_limit, const DataType dt, + std::vector *prev_input_shape, Tensor *output, StatsFuture *future); @@ -31,6 +32,7 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel, const ActivationType activation, const float relux_max_limit, const DataType dt, + std::vector *prev_input_shape, Tensor *output, StatsFuture *future); @@ -44,6 +46,7 @@ extern void Conv2dOpencl(cl::Kernel *kernel, const ActivationType activation, const float relux_max_limit, const DataType dt, + std::vector *prev_input_shape, Tensor *output, StatsFuture *future); @@ -57,8 +60,8 @@ void Conv2dFunctor::operator()(const Tensor *input, cl::Kernel * kernel, const Tensor *input, const Tensor *filter, const Tensor *bias, const int stride, const int *padding, const int *dilations, const ActivationType activation, - const float relux_max_limit, const DataType dt, Tensor *output, - StatsFuture *future); + const float relux_max_limit, const DataType dt, + std::vector *input_shape, Tensor *output, StatsFuture *future); // Selection matrix: kernel_size x stride_size static const Conv2dOpenclFunction selector[5] = { Conv2dOpenclK1x1, nullptr, Conv2dOpenclK3x3, nullptr, nullptr}; @@ -97,11 +100,11 @@ void Conv2dFunctor::operator()(const Tensor *input, auto conv2d_func = selector[kernel_h - 1]; conv2d_func(&kernel_, input, filter, bias, strides_[0], paddings.data(), dilations_, activation_, relux_max_limit_, - DataTypeToEnum::value, output, future); + DataTypeToEnum::value, &input_shape_, output, future); } else { Conv2dOpencl(&kernel_, input, filter, bias, strides_[0], paddings.data(), dilations_, activation_, relux_max_limit_, - DataTypeToEnum::value, output, future); + DataTypeToEnum::value, &input_shape_, output, future); } } diff --git a/mace/kernels/opencl/conv_2d_opencl_1x1.cc b/mace/kernels/opencl/conv_2d_opencl_1x1.cc index 41eaad5633917ece4126506aea31f2bad08afd98..4109a97932163919e436a1847549c44ef8d60e31 100644 --- a/mace/kernels/opencl/conv_2d_opencl_1x1.cc +++ b/mace/kernels/opencl/conv_2d_opencl_1x1.cc @@ -20,6 +20,7 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, const ActivationType activation, const float relux_max_limit, const DataType dt, + std::vector *prev_input_shape, Tensor *output, StatsFuture *future) { const index_t batch = output->dim(0); @@ -68,6 +69,8 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, auto runtime = OpenCLRuntime::Global(); *kernel = runtime->BuildKernel("conv_2d_1x1", kernel_name, built_options); + } + if (!IsVecEqual(*prev_input_shape, input->shape())) { uint32_t idx = 0; kernel->setArg(idx++, *(input->opencl_image())); kernel->setArg(idx++, *(filter->opencl_image())); @@ -83,6 +86,8 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, kernel->setArg(idx++, static_cast(height)); kernel->setArg(idx++, static_cast(width)); kernel->setArg(idx++, stride); + + *prev_input_shape = input->shape(); } const uint32_t gws[3] = {static_cast(channel_blocks), diff --git a/mace/kernels/opencl/conv_2d_opencl_3x3.cc b/mace/kernels/opencl/conv_2d_opencl_3x3.cc index df2672c95a1aa0ac9ab421df1a2e91de039e8f5f..ba047cdfad9e6280020d98d92170ea3c8820aa9d 100644 --- a/mace/kernels/opencl/conv_2d_opencl_3x3.cc +++ b/mace/kernels/opencl/conv_2d_opencl_3x3.cc @@ -22,6 +22,7 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel, const ActivationType activation, const float relux_max_limit, const DataType dt, + std::vector *prev_input_shape, Tensor *output, StatsFuture *future) { const index_t batch = output->dim(0); @@ -62,7 +63,8 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel, auto runtime = OpenCLRuntime::Global(); *kernel = runtime->BuildKernel("conv_2d_3x3", kernel_name, built_options); - + } + if (!IsVecEqual(*prev_input_shape, input->shape())) { uint32_t idx = 0; kernel->setArg(idx++, *(input->opencl_image())); kernel->setArg(idx++, *(filter->opencl_image())); @@ -81,6 +83,8 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel, kernel->setArg(idx++, padding[1] / 2); kernel->setArg(idx++, dilations[0]); kernel->setArg(idx++, dilations[1]); + + *prev_input_shape = input->shape(); } const uint32_t gws[3] = {static_cast(channel_blocks), diff --git a/mace/kernels/opencl/conv_2d_opencl_general.cc b/mace/kernels/opencl/conv_2d_opencl_general.cc index c317aa8c635ca817a30b01213bf5aa7f8355c44e..fd48605f2cfee1827a559af03a799120b9561e52 100644 --- a/mace/kernels/opencl/conv_2d_opencl_general.cc +++ b/mace/kernels/opencl/conv_2d_opencl_general.cc @@ -22,6 +22,7 @@ extern void Conv2dOpencl(cl::Kernel *kernel, const ActivationType activation, const float relux_max_limit, const DataType dt, + std::vector *prev_input_shape, Tensor *output, StatsFuture *future) { const index_t batch = output->dim(0); @@ -62,7 +63,8 @@ extern void Conv2dOpencl(cl::Kernel *kernel, auto runtime = OpenCLRuntime::Global(); *kernel = runtime->BuildKernel("conv_2d", kernel_name, built_options); - + } + if (!IsVecEqual(*prev_input_shape, input->shape())) { uint32_t idx = 0; kernel->setArg(idx++, *(input->opencl_image())); kernel->setArg(idx++, *(filter->opencl_image())); @@ -83,6 +85,8 @@ extern void Conv2dOpencl(cl::Kernel *kernel, kernel->setArg(idx++, padding[1] / 2); kernel->setArg(idx++, dilations[0]); kernel->setArg(idx++, dilations[1]); + + *prev_input_shape = input->shape(); } const uint32_t gws[3] = {static_cast(channel_blocks), diff --git a/mace/kernels/opencl/depthwise_conv_opencl.cc b/mace/kernels/opencl/depthwise_conv_opencl.cc index 1b99188b3acb34fb5c87c5f30a58de6c5b400f15..37b587dcef2caddae7ae5d73254a8c87dbf9f5a1 100644 --- a/mace/kernels/opencl/depthwise_conv_opencl.cc +++ b/mace/kernels/opencl/depthwise_conv_opencl.cc @@ -21,6 +21,7 @@ void DepthwiseConv2d(cl::Kernel *kernel, const ActivationType activation, const float relux_max_limit, const DataType dt, + std::vector *prev_input_shape, Tensor *output, StatsFuture *future) { const index_t batch = output->dim(0); @@ -35,17 +36,6 @@ void DepthwiseConv2d(cl::Kernel *kernel, const index_t input_channel_blocks = RoundUpDiv4(input_channels); const index_t width_blocks = RoundUpDiv4(width); if (kernel->get() == nullptr) { - const index_t input_batch = input->dim(0); - const index_t input_height = input->dim(1); - const index_t input_width = input->dim(2); - - const index_t filter_height = filter->dim(0); - const index_t filter_width = filter->dim(1); - MACE_CHECK(multiplier == 1, "Multiplier > 1 not supported"); - MACE_CHECK(multiplier * input_channels == channels); - MACE_CHECK(filter->dim(2) == input_channels, filter->dim(2), "!=", - input_channels); - auto runtime = OpenCLRuntime::Global(); std::set built_options; std::string kernel_name = MACE_OBFUSCATE_SYMBOL("depthwise_conv2d"); @@ -80,6 +70,18 @@ void DepthwiseConv2d(cl::Kernel *kernel, *kernel = runtime->BuildKernel("depthwise_conv2d", kernel_name, built_options); + } + if (!IsVecEqual(*prev_input_shape, input->shape())) { + const index_t input_batch = input->dim(0); + const index_t input_height = input->dim(1); + const index_t input_width = input->dim(2); + + const index_t filter_height = filter->dim(0); + const index_t filter_width = filter->dim(1); + MACE_CHECK(multiplier == 1, "Multiplier > 1 not supported"); + MACE_CHECK(multiplier * input_channels == channels); + MACE_CHECK(filter->dim(2) == input_channels, filter->dim(2), "!=", + input_channels); uint32_t idx = 0; kernel->setArg(idx++, *(input->opencl_image())); @@ -102,6 +104,7 @@ void DepthwiseConv2d(cl::Kernel *kernel, kernel->setArg(idx++, static_cast(dilations[0])); kernel->setArg(idx++, static_cast(dilations[1])); } + *prev_input_shape = input->shape(); } const uint32_t gws[3] = {static_cast(channel_blocks), @@ -120,9 +123,7 @@ void DepthwiseConv2dFunctor::operator()( const Tensor *bias, Tensor *output, StatsFuture *future) { - typedef void (*Conv2dOpenclFunction)(const Tensor *input, - const Tensor *filter, const Tensor *bias, - Tensor *output, StatsFuture *future); + index_t kernel_h = filter->dim(2); index_t kernel_w = filter->dim(3); if (strides_[0] != strides_[1]) { @@ -163,7 +164,7 @@ void DepthwiseConv2dFunctor::operator()( DepthwiseConv2d(&kernel_, input, filter, bias, strides_[0], paddings.data(), dilations_, activation_, relux_max_limit_, - DataTypeToEnum::value, output, future); + DataTypeToEnum::value, &input_shape_, output, future); } template struct DepthwiseConv2dFunctor; diff --git a/mace/kernels/opencl/eltwise_opencl.cc b/mace/kernels/opencl/eltwise_opencl.cc index 82312c75338d7ba8f79da37194a783beb959da45..dde05b29e2b2a6c8264ced78dea7d8fb3a37ef65 100644 --- a/mace/kernels/opencl/eltwise_opencl.cc +++ b/mace/kernels/opencl/eltwise_opencl.cc @@ -36,6 +36,8 @@ void EltwiseFunctor::operator()(const Tensor *input0, if (!coeff_.empty()) built_options.emplace("-DCOEFF_SUM"); kernel_ = runtime->BuildKernel("eltwise", kernel_name, built_options); + } + if (!IsVecEqual(input_shape_, input0->shape())) { uint32_t idx = 0; kernel_.setArg(idx++, *(input0->opencl_image())); kernel_.setArg(idx++, *(input1->opencl_image())); @@ -44,6 +46,7 @@ void EltwiseFunctor::operator()(const Tensor *input0, kernel_.setArg(idx++, coeff_[1]); } kernel_.setArg(idx++, *(output->opencl_image())); + input_shape_ = input0->shape(); } const uint32_t gws[2] = {static_cast(width_pixels), diff --git a/mace/kernels/opencl/fully_connected_opencl.cc b/mace/kernels/opencl/fully_connected_opencl.cc index abcbfe526349ad64b758cc2897e80cabca4c6a61..d5db519025514be82b5101de3c25c74c444c9b59 100644 --- a/mace/kernels/opencl/fully_connected_opencl.cc +++ b/mace/kernels/opencl/fully_connected_opencl.cc @@ -13,6 +13,7 @@ void FCWXKernel(cl::Kernel *kernel, const Tensor *input, const Tensor *weight, const Tensor *bias, + std::vector *prev_input_shape, Tensor *output, const ActivationType activation, std::vector &gws, @@ -67,6 +68,11 @@ void FCWXKernel(cl::Kernel *kernel, const uint32_t inter_local_blks = kwg_size / (gws[0] * gws[1]); lws = {gws[0], gws[1], inter_local_blks}; + } + if (!IsVecEqual(*prev_input_shape, input->shape())) { + const index_t batch = output->dim(0); + const index_t output_blocks = RoundUpDiv4(output->dim(3)); + uint32_t idx = 0; kernel->setArg(idx++, *(input->opencl_image())); kernel->setArg(idx++, *(weight->opencl_image())); @@ -80,6 +86,10 @@ void FCWXKernel(cl::Kernel *kernel, kernel->setArg(idx++, static_cast(RoundUpDiv4(input->dim(3)))); kernel->setArg(idx++, static_cast(output_blocks)); kernel->setArg(idx++, relux_max_limit); + + gws[2] = static_cast(batch * output_blocks); + + *prev_input_shape = input->shape(); } cl::Event event; cl_int error = runtime->command_queue().enqueueNDRangeKernel( @@ -103,6 +113,7 @@ void FCWTXKernel(cl::Kernel *kernel, const Tensor *input, const Tensor *weight, const Tensor *bias, + std::vector *prev_input_shape, Tensor *output, const ActivationType activation, std::vector &gws, @@ -141,6 +152,9 @@ void FCWTXKernel(cl::Kernel *kernel, *kernel = runtime->BuildKernel("fully_connected", kernel_name, built_options); + lws = {16, 64, 1}; + } + if (!IsVecEqual(*prev_input_shape, input->shape())) { uint32_t idx = 0; kernel->setArg(idx++, *(input->opencl_image())); kernel->setArg(idx++, *(weight->opencl_image())); @@ -155,14 +169,13 @@ void FCWTXKernel(cl::Kernel *kernel, kernel->setArg(idx++, relux_max_limit); const index_t batch = output->dim(0); - const index_t output_size = output->dim(3); - - const index_t output_blocks = RoundUpDiv4(output_size); + const index_t output_blocks = RoundUpDiv4(output->dim(3)); gws = { static_cast(batch), static_cast(output_blocks), }; - lws = {16, 64, 1}; + + *prev_input_shape = input->shape(); } std::stringstream ss; @@ -185,11 +198,11 @@ void FullyConnectedFunctor::operator()( output->ResizeImage(output_shape, output_image_shape); if (weight_type_ == BufferType::WEIGHT_HEIGHT) { - FCWTXKernel(&kernel_, input, weight, bias, output, + FCWTXKernel(&kernel_, input, weight, bias, &input_shape_, output, activation_, gws_, lws_, relux_max_limit_, future); } else { - FCWXKernel(&kernel_, input, weight, bias, output, - activation_, gws_, lws_, relux_max_limit_, future); + FCWXKernel(&kernel_, input, weight, bias, &input_shape_, output, + activation_, gws_, lws_, relux_max_limit_, future); } }; diff --git a/mace/kernels/opencl/helper.h b/mace/kernels/opencl/helper.h index 6513415a02a00574dbfc1b22c1c909e94e6bfd49..56bf295ee2dec5451f9d142ccd0e63441b37e545 100644 --- a/mace/kernels/opencl/helper.h +++ b/mace/kernels/opencl/helper.h @@ -71,6 +71,13 @@ inline bool LimitKernelTime() { return flag != nullptr && strlen(flag) == 1 && flag[0] == '1'; } +template +bool IsVecEqual(const std::vector &input0, + const std::vector &input1) { + return ((input0.size() == input1.size()) && + (std::equal(input0.begin(), input0.end(), input1.begin()))); +} + namespace { template void AppendToStream(std::stringstream *ss, const std::string &delimiter, T v) { diff --git a/mace/kernels/opencl/matmul.cc b/mace/kernels/opencl/matmul.cc index d453c29308133aa81f5b19422b020d06dbba49fc..4b61edb271df814b4bdcea251d28b2ca03cf3be4 100644 --- a/mace/kernels/opencl/matmul.cc +++ b/mace/kernels/opencl/matmul.cc @@ -36,17 +36,16 @@ void MatMulFunctor::operator()(const Tensor *A, built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); kernel_ = runtime->BuildKernel("matmul", kernel_name, built_options); - - uint32_t idx = 0; - kernel_.setArg(idx++, *(A->opencl_image())); - kernel_.setArg(idx++, *(B->opencl_image())); - kernel_.setArg(idx++, *(C->opencl_image())); - kernel_.setArg(idx++, static_cast(height)); - kernel_.setArg(idx++, static_cast(width)); - kernel_.setArg(idx++, static_cast(A->dim(2))); - kernel_.setArg(idx++, static_cast(height_blocks)); - kernel_.setArg(idx++, static_cast(RoundUpDiv4(A->dim(2)))); } + uint32_t idx = 0; + kernel_.setArg(idx++, *(A->opencl_image())); + kernel_.setArg(idx++, *(B->opencl_image())); + kernel_.setArg(idx++, *(C->opencl_image())); + kernel_.setArg(idx++, static_cast(height)); + kernel_.setArg(idx++, static_cast(width)); + kernel_.setArg(idx++, static_cast(A->dim(2))); + kernel_.setArg(idx++, static_cast(height_blocks)); + kernel_.setArg(idx++, static_cast(RoundUpDiv4(A->dim(2)))); const uint32_t gws[2] = { static_cast(width_blocks), diff --git a/mace/kernels/opencl/pooling_opencl.cc b/mace/kernels/opencl/pooling_opencl.cc index d9256776d1f094a505de40a92bf79a1553cd1272..d8a6d675a8da5749d3a2cf02360e3ec619a809ff 100644 --- a/mace/kernels/opencl/pooling_opencl.cc +++ b/mace/kernels/opencl/pooling_opencl.cc @@ -17,31 +17,6 @@ void PoolingFunctor::operator()(const Tensor *input, StatsFuture *future) { MACE_CHECK(dilations_[0] == 1 && dilations_[1] == 1) << "Pooling opencl kernel not support dilation yet"; - std::vector output_shape(4); - std::vector filter_shape = {kernels_[0], kernels_[1], input->dim(3), - input->dim(3)}; - - std::vector paddings(2); - if (paddings_.empty()) { - kernels::CalcNHWCPaddingAndOutputSize( - input->shape().data(), filter_shape.data(), dilations_, strides_, - padding_type_, output_shape.data(), paddings.data()); - } else { - paddings = paddings_; - CalcOutputSize(input->shape().data(), filter_shape.data(), paddings_.data(), - dilations_, strides_, RoundType::CEIL, output_shape.data()); - } - - std::vector output_image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); - output->ResizeImage(output_shape, output_image_shape); - - index_t batch = output->dim(0); - index_t out_height = output->dim(1); - index_t out_width = output->dim(2); - index_t channels = output->dim(3); - - index_t channel_blocks = (channels + 3) / 4; if (kernel_.get() == nullptr) { const DataType dt = DataTypeToEnum::value; @@ -62,18 +37,49 @@ void PoolingFunctor::operator()(const Tensor *input, } kernel_ = runtime->BuildKernel("pooling", kernel_name, built_options); + } + if (!IsVecEqual(input_shape_, input->shape())) { + std::vector output_shape(4); + std::vector filter_shape = {kernels_[0], kernels_[1], input->dim(3), + input->dim(3)}; + + std::vector paddings(2); + if (paddings_.empty()) { + kernels::CalcNHWCPaddingAndOutputSize( + input->shape().data(), filter_shape.data(), dilations_, strides_, + padding_type_, output_shape.data(), paddings.data()); + } else { + paddings = paddings_; + CalcOutputSize(input->shape().data(), filter_shape.data(), paddings_.data(), + dilations_, strides_, RoundType::CEIL, output_shape.data()); + } + + std::vector output_image_shape; + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, output_image_shape); + output->ResizeImage(output_shape, output_image_shape); + uint32_t idx = 0; kernel_.setArg(idx++, *(input->opencl_image())); kernel_.setArg(idx++, static_cast(input->dim(1))); kernel_.setArg(idx++, static_cast(input->dim(2))); - kernel_.setArg(idx++, static_cast(out_height)); + kernel_.setArg(idx++, static_cast(output->dim(1))); kernel_.setArg(idx++, paddings[0] / 2); kernel_.setArg(idx++, paddings[1] / 2); kernel_.setArg(idx++, strides_[0]); kernel_.setArg(idx++, kernels_[0]); kernel_.setArg(idx++, *(output->opencl_image())); + + input_shape_ = input->shape(); } + index_t batch = output->dim(0); + index_t out_height = output->dim(1); + index_t out_width = output->dim(2); + index_t channels = output->dim(3); + + index_t channel_blocks = (channels + 3) / 4; + + const uint32_t gws[3] = { static_cast(channel_blocks), static_cast(out_width), static_cast(batch * out_height), diff --git a/mace/kernels/opencl/resize_bilinear_opencl.cc b/mace/kernels/opencl/resize_bilinear_opencl.cc index 470a335deb264610638cdbfda11f8bffeb974062..a3bb2ee1951f433ae41a4c2dc41367fe77d1e497 100644 --- a/mace/kernels/opencl/resize_bilinear_opencl.cc +++ b/mace/kernels/opencl/resize_bilinear_opencl.cc @@ -25,6 +25,18 @@ void ResizeBilinearFunctor::operator()( const index_t out_width = out_width_; if (kernel_.get() == nullptr) { + auto runtime = OpenCLRuntime::Global(); + std::set built_options; + std::string kernel_name = MACE_OBFUSCATE_SYMBOL("resize_bilinear_nocache"); + built_options.emplace("-Dresize_bilinear_nocache=" + kernel_name); + auto dt = DataTypeToEnum::value; + built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); + built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); + kernel_ = + runtime->BuildKernel("resize_bilinear", kernel_name, built_options); + + } + if (!IsVecEqual(input_shape_, input->shape())) { MACE_CHECK(out_height > 0 && out_width > 0); std::vector output_shape{batch, out_height, out_width, channels}; @@ -38,16 +50,6 @@ void ResizeBilinearFunctor::operator()( float width_scale = CalculateResizeScale(in_width, out_width, align_corners_); - auto runtime = OpenCLRuntime::Global(); - std::set built_options; - std::string kernel_name = MACE_OBFUSCATE_SYMBOL("resize_bilinear_nocache"); - built_options.emplace("-Dresize_bilinear_nocache=" + kernel_name); - auto dt = DataTypeToEnum::value; - built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); - built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); - kernel_ = - runtime->BuildKernel("resize_bilinear", kernel_name, built_options); - uint32_t idx = 0; kernel_.setArg(idx++, *(input->opencl_image())); kernel_.setArg(idx++, *(output->opencl_image())); @@ -56,6 +58,9 @@ void ResizeBilinearFunctor::operator()( kernel_.setArg(idx++, static_cast(in_height)); kernel_.setArg(idx++, static_cast(in_width)); kernel_.setArg(idx++, static_cast(out_height)); + + input_shape_ = input->shape(); + } const uint32_t gws[3] = {static_cast(channel_blocks), diff --git a/mace/kernels/opencl/softmax_opencl.cc b/mace/kernels/opencl/softmax_opencl.cc index 25e1c9e4aa98a354524df81fa92c0c4c21bd5710..4aabe9017f06073ddffe7e04871b62b76da15dc6 100644 --- a/mace/kernels/opencl/softmax_opencl.cc +++ b/mace/kernels/opencl/softmax_opencl.cc @@ -34,11 +34,14 @@ void SoftmaxFunctor::operator()(const Tensor *logits, built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); kernel_ = runtime->BuildKernel("softmax", kernel_name, built_options); + } + if (!IsVecEqual(input_shape_, logits->shape())) { uint32_t idx = 0; kernel_.setArg(idx++, *(logits->opencl_image())); kernel_.setArg(idx++, static_cast(channels)); kernel_.setArg(idx++, remain_channels); kernel_.setArg(idx++, *(output->opencl_image())); + input_shape_ = logits->shape(); } const uint32_t gws[3] = {static_cast(channel_blocks), static_cast(width), diff --git a/mace/kernels/opencl/space_to_batch_opencl.cc b/mace/kernels/opencl/space_to_batch_opencl.cc index 0cecb0a7809d8cc44535af095c53b03959dda28c..91f5564d520de9e11ad832231060f37ea3f64191 100644 --- a/mace/kernels/opencl/space_to_batch_opencl.cc +++ b/mace/kernels/opencl/space_to_batch_opencl.cc @@ -43,6 +43,8 @@ void SpaceToBatchFunctor::operator()( kernel_ = runtime->BuildKernel("space_to_batch", kernel_name, built_options); + } + if (!IsVecEqual(space_shape_, space_tensor->shape())) { uint32_t idx = 0; if (b2s_) { kernel_.setArg(idx++, *(batch_tensor->opencl_image())); @@ -59,6 +61,8 @@ void SpaceToBatchFunctor::operator()( kernel_.setArg(idx++, static_cast(space_tensor->dim(2))); kernel_.setArg(idx++, static_cast(batch_tensor->dim(1))); kernel_.setArg(idx++, static_cast(batch_tensor->dim(2))); + + space_shape_ = space_tensor->shape(); } const uint32_t chan_blk = RoundUpDiv4(batch_tensor->dim(3)); diff --git a/mace/kernels/opencl/winograd_transform.cc b/mace/kernels/opencl/winograd_transform.cc index ee7d5d12e84aec4a878ca4575c4565271eb33e7c..c07ccc9944786e8cbcd8dde4aa6ada7794542019 100644 --- a/mace/kernels/opencl/winograd_transform.cc +++ b/mace/kernels/opencl/winograd_transform.cc @@ -14,6 +14,21 @@ namespace kernels { template void WinogradTransformFunctor::operator()( const Tensor *input_tensor, Tensor *output_tensor, StatsFuture *future) { + + if (kernel_.get() == nullptr) { + std::string obfuscated_kernel_name = + MACE_OBFUSCATE_SYMBOL("winograd_transform_2x2"); + std::set built_options; + built_options.emplace("-Dwinograd_transform_2x2=" + obfuscated_kernel_name); + built_options.emplace("-DDATA_TYPE=" + + DtToUpstreamCLDt(DataTypeToEnum::value)); + built_options.emplace("-DCMD_DATA_TYPE=" + + DtToUpstreamCLCMDDt(DataTypeToEnum::value)); + auto runtime = OpenCLRuntime::Global(); + kernel_ = runtime->BuildKernel("winograd_transform", obfuscated_kernel_name, + built_options); + + } std::vector output_shape(4); std::vector filter_shape = {3, 3, input_tensor->dim(3), 1}; std::vector paddings(2); @@ -27,29 +42,16 @@ void WinogradTransformFunctor::operator()( paddings_.data(), dilations_.data(), strides_.data(), RoundType::FLOOR, output_shape.data()); } - const index_t round_h = (output_shape[1] + 1) / 2; const index_t round_w = (output_shape[2] + 1) / 2; const index_t out_width = input_tensor->dim(0) * round_h * round_w; - if (kernel_.get() == nullptr) { + if (!IsVecEqual(input_shape_, input_tensor->shape())) { output_shape = {16, input_tensor->dim(3), out_width, 1}; std::vector image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_HEIGHT, image_shape); output_tensor->ResizeImage(output_shape, image_shape); - std::string obfuscated_kernel_name = - MACE_OBFUSCATE_SYMBOL("winograd_transform_2x2"); - std::set built_options; - built_options.emplace("-Dwinograd_transform_2x2=" + obfuscated_kernel_name); - built_options.emplace("-DDATA_TYPE=" + - DtToUpstreamCLDt(DataTypeToEnum::value)); - built_options.emplace("-DCMD_DATA_TYPE=" + - DtToUpstreamCLCMDDt(DataTypeToEnum::value)); - auto runtime = OpenCLRuntime::Global(); - kernel_ = runtime->BuildKernel("winograd_transform", obfuscated_kernel_name, - built_options); - uint32_t idx = 0; kernel_.setArg(idx++, *(input_tensor->opencl_image())); kernel_.setArg(idx++, *(output_tensor->opencl_image())); @@ -60,6 +62,8 @@ void WinogradTransformFunctor::operator()( kernel_.setArg(idx++, static_cast(round_w)); kernel_.setArg(idx++, static_cast(paddings[0] / 2)); kernel_.setArg(idx++, static_cast(paddings[1] / 2)); + + input_shape_ = input_tensor->shape(); } const uint32_t gws[2] = { @@ -79,11 +83,6 @@ void WinogradInverseTransformFunctor::operator()( const Tensor *bias, Tensor *output_tensor, StatsFuture *future) { - std::vector output_shape = {batch_, height_, width_, - input_tensor->dim(1)}; - std::vector image_shape; - CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, image_shape); - output_tensor->ResizeImage(output_shape, image_shape); if (kernel_.get() == nullptr) { std::string obfuscated_kernel_name = @@ -121,6 +120,13 @@ void WinogradInverseTransformFunctor::operator()( auto runtime = OpenCLRuntime::Global(); kernel_ = runtime->BuildKernel("winograd_transform", obfuscated_kernel_name, built_options); + } + if (!IsVecEqual(input_shape_, input_tensor->shape())) { + std::vector output_shape = {batch_, height_, width_, + input_tensor->dim(1)}; + std::vector image_shape; + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, image_shape); + output_tensor->ResizeImage(output_shape, image_shape); const uint32_t round_h = (height_ + 1) / 2; const uint32_t round_w = (width_ + 1) / 2; @@ -139,6 +145,8 @@ void WinogradInverseTransformFunctor::operator()( kernel_.setArg(idx++, static_cast(round_h * round_w)); kernel_.setArg(idx++, static_cast(round_w)); kernel_.setArg(idx++, relux_max_limit_); + + input_shape_ = input_tensor->shape(); } const uint32_t gws[2] = { diff --git a/mace/kernels/pooling.h b/mace/kernels/pooling.h index 6bd5d94e1684d5228dcb1a468d05220f904deaae..bc9892e5864d420f9505de9462df5a17eedb4241 100644 --- a/mace/kernels/pooling.h +++ b/mace/kernels/pooling.h @@ -182,6 +182,7 @@ struct PoolingFunctor : PoolingFunctorBase { StatsFuture *future); cl::Kernel kernel_; + std::vector input_shape_; }; } // namespace kernels diff --git a/mace/kernels/resize_bilinear.h b/mace/kernels/resize_bilinear.h index bdd94192b8ae4f91a08c2088a31a7188b22e765e..52c1da102926870d6e65dfa52ee68c7ff5a43f76 100644 --- a/mace/kernels/resize_bilinear.h +++ b/mace/kernels/resize_bilinear.h @@ -172,6 +172,7 @@ struct ResizeBilinearFunctor void operator()(const Tensor *input, Tensor *output, StatsFuture *future); cl::Kernel kernel_; + std::vector input_shape_; }; } // namespace kernels diff --git a/mace/kernels/softmax.h b/mace/kernels/softmax.h index 7ff375d31be90228628dc91b16326eab4079ddd6..d5bc5717d8cfdbfc391de634f08d8fd427e5ca9d 100644 --- a/mace/kernels/softmax.h +++ b/mace/kernels/softmax.h @@ -57,6 +57,7 @@ struct SoftmaxFunctor { void operator()(const Tensor *logits, Tensor *output, StatsFuture *future); cl::Kernel kernel_; + std::vector input_shape_; }; } // namepsace kernels diff --git a/mace/kernels/space_to_batch.h b/mace/kernels/space_to_batch.h index 402bf97cb956a8e7dfcb75645cc1fe395282f8e9..ef7467b57acd0fc1d3563148ec53dd1ea4869a9f 100644 --- a/mace/kernels/space_to_batch.h +++ b/mace/kernels/space_to_batch.h @@ -54,6 +54,7 @@ struct SpaceToBatchFunctor : SpaceToBatchFunctorBase { StatsFuture *future); cl::Kernel kernel_; + std::vector space_shape_; }; } // namespace kernels diff --git a/mace/kernels/winograd_transform.h b/mace/kernels/winograd_transform.h index 464a59ced093d123c2853c37bedeea8879cb68c0..f3b7f7d640328860f5ffdc5dc6b065e78e324896 100644 --- a/mace/kernels/winograd_transform.h +++ b/mace/kernels/winograd_transform.h @@ -49,6 +49,7 @@ struct WinogradTransformFunctor void operator()(const Tensor *input, Tensor *output, StatsFuture *future); cl::Kernel kernel_; + std::vector input_shape_; }; struct WinogradInverseTransformFunctorBase { @@ -105,6 +106,7 @@ struct WinogradInverseTransformFunctor StatsFuture *future); cl::Kernel kernel_; + std::vector input_shape_; }; } // namespace kernels diff --git a/mace/ops/activation.cc b/mace/ops/activation.cc index 204896c3cb6538843254b99b7316a903a551aadb..d7a000807654b576199482d63b7249d0049dc1f1 100644 --- a/mace/ops/activation.cc +++ b/mace/ops/activation.cc @@ -5,6 +5,7 @@ #include "mace/ops/activation.h" namespace mace { +namespace ops { void Register_Activation(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("Activation") @@ -26,4 +27,5 @@ void Register_Activation(OperatorRegistry *op_registry) { ActivationOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/activation.h b/mace/ops/activation.h index 12761927a954c2ffd3e94b1a086bc28911e4aae5..bfe91591ed0fab88011de3187cf523c00d3f0fd1 100644 --- a/mace/ops/activation.h +++ b/mace/ops/activation.h @@ -5,10 +5,13 @@ #ifndef MACE_OPS_ACTIVATION_H_ #define MACE_OPS_ACTIVATION_H_ +#include + #include "mace/core/operator.h" #include "mace/kernels/activation.h" namespace mace { +namespace ops { template class ActivationOp : public Operator { @@ -36,6 +39,7 @@ class ActivationOp : public Operator { kernels::ActivationFunctor functor_; }; +} // namespace ops } // namespace mace #endif // MACE_OPS_ACTIVATION_H_ diff --git a/mace/ops/activation_benchmark.cc b/mace/ops/activation_benchmark.cc index 5674f630959ef30fa9949546eeae704e2c83b7e5..d8a8cb726a5e8398052dec9e397c8a830fef8219 100644 --- a/mace/ops/activation_benchmark.cc +++ b/mace/ops/activation_benchmark.cc @@ -3,11 +3,15 @@ // #include + #include "mace/core/operator.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { + template static void ReluBenchmark( int iters, int batch, int channels, int height, int width) { @@ -316,4 +320,6 @@ BM_SIGMOID(1, 3, 512, 512); BM_SIGMOID(1, 32, 112, 112); BM_SIGMOID(1, 64, 256, 256); +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/activation_test.cc b/mace/ops/activation_test.cc index 63b309109fc85fbb37bbe9fcfcf3c6824fcb74a6..18bddcc75e4555cdc16d3cd63e828281ce7dd0e8 100644 --- a/mace/ops/activation_test.cc +++ b/mace/ops/activation_test.cc @@ -6,6 +6,8 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { class ActivationOpTest : public OpsTestBase {}; @@ -365,4 +367,6 @@ TEST_F(ActivationOpTest, OPENCLSimpleSigmoid) { TestSimpleSigmoid(); } +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/addn.cc b/mace/ops/addn.cc index 2a18e458ac049bb17fe48309b745541b501dffd8..8e253cf1dbbd45791e72b65352af60d389e44b87 100644 --- a/mace/ops/addn.cc +++ b/mace/ops/addn.cc @@ -5,6 +5,7 @@ #include "mace/ops/addn.h" namespace mace { +namespace ops { void Register_AddN(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("AddN") @@ -26,4 +27,5 @@ void Register_AddN(OperatorRegistry *op_registry) { AddNOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/addn.h b/mace/ops/addn.h index 5824844b380fc220d11d399f4b306a1065c77186..24a91660e7d49af683fada81e90e8215ec2e699c 100644 --- a/mace/ops/addn.h +++ b/mace/ops/addn.h @@ -11,6 +11,7 @@ #include "mace/kernels/addn.h" namespace mace { +namespace ops { template class AddNOp : public Operator { @@ -40,6 +41,7 @@ class AddNOp : public Operator { kernels::AddNFunctor functor_; }; +} // namespace ops } // namespace mace #endif // MACE_OPS_ADDN_H_ diff --git a/mace/ops/addn_benchmark.cc b/mace/ops/addn_benchmark.cc index b0ceec8edb31335d26933d69c5e7f53cf10992fb..a5c5a114101ca0e00db24574906f25c664c1f742 100644 --- a/mace/ops/addn_benchmark.cc +++ b/mace/ops/addn_benchmark.cc @@ -3,11 +3,15 @@ // #include + #include "mace/core/operator.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { + template static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) { mace::testing::StopTiming(); @@ -75,4 +79,6 @@ BM_ADDN(4, 1, 128, 128, 3); BM_ADDN(2, 1, 256, 256, 3); BM_ADDN(2, 1, 512, 512, 3); +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/addn_test.cc b/mace/ops/addn_test.cc index bc248044a8c912f7f411ff12290129114c829a47..068932dbfd38d79a072a1f8a8b04ab0687c206ec 100644 --- a/mace/ops/addn_test.cc +++ b/mace/ops/addn_test.cc @@ -6,6 +6,8 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { class AddnOpTest : public OpsTestBase {}; @@ -62,15 +64,15 @@ TEST_F(AddnOpTest, CPUSimpleAdd3) { SimpleAdd3(); } template void RandomTest() { testing::internal::LogToStderr(); - srand(time(NULL)); + static unsigned int seed = time(NULL); for (int round = 0; round < 10; ++round) { // generate random input - index_t n = 1 + (rand() % 5); - index_t h = 1 + (rand() % 100); - index_t w = 1 + (rand() % 100); - index_t c = 1 + (rand() % 32); - int input_num = 2 + rand() % 3; + index_t n = 1 + (rand_r(&seed) % 5); + index_t h = 1 + (rand_r(&seed) % 100); + index_t w = 1 + (rand_r(&seed) % 100); + index_t c = 1 + (rand_r(&seed) % 32); + int input_num = 2 + rand_r(&seed) % 3; // Construct graph OpsTestNet net; auto op_def = OpDefBuilder("AddN", "AddNTest"); @@ -117,4 +119,6 @@ void RandomTest() { TEST_F(AddnOpTest, OPENCLRandom) { RandomTest(); } +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/batch_norm.cc b/mace/ops/batch_norm.cc index e0754fee8c502f542a25ff17eb2bfc3b828885e0..6e804cd1ae3535acfe6f402683d8ac9550f702dd 100644 --- a/mace/ops/batch_norm.cc +++ b/mace/ops/batch_norm.cc @@ -5,6 +5,7 @@ #include "mace/ops/batch_norm.h" namespace mace { +namespace ops { void Register_BatchNorm(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchNorm") @@ -26,4 +27,5 @@ void Register_BatchNorm(OperatorRegistry *op_registry) { BatchNormOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/batch_norm.h b/mace/ops/batch_norm.h index 96b1af133b2532f1fbf9166219547d079d2637ed..f22c52b573ffd1d9ef0c1f0f5de3d603c84e7277 100644 --- a/mace/ops/batch_norm.h +++ b/mace/ops/batch_norm.h @@ -10,6 +10,7 @@ #include "mace/kernels/batch_norm.h" namespace mace { +namespace ops { template class BatchNormOp : public Operator { @@ -55,6 +56,7 @@ class BatchNormOp : public Operator { OP_OUTPUT_TAGS(OUTPUT); }; +} // namespace ops } // namespace mace #endif // MACE_OPS_BATCH_NORM_H_ diff --git a/mace/ops/batch_norm_benchmark.cc b/mace/ops/batch_norm_benchmark.cc index 76d8c01a49ed13e81a82ba68cc66d4c9606a54ac..6e9f20c85b6ba5c94213c3da7ef74adabd15c843 100644 --- a/mace/ops/batch_norm_benchmark.cc +++ b/mace/ops/batch_norm_benchmark.cc @@ -8,6 +8,9 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { + template static void BatchNorm( int iters, int batch, int channels, int height, int width) { @@ -101,4 +104,6 @@ BM_BATCH_NORM(1, 1024, 7, 7); BM_BATCH_NORM(32, 1, 256, 256); BM_BATCH_NORM(32, 3, 256, 256); +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/batch_norm_test.cc b/mace/ops/batch_norm_test.cc index 323b39e3a8ba111d3c55ca8b97b5344290576e68..eac95fa51f08d39a699ae382d64ed6bd3abd31f4 100644 --- a/mace/ops/batch_norm_test.cc +++ b/mace/ops/batch_norm_test.cc @@ -6,6 +6,8 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { class BatchNormOpTest : public OpsTestBase {}; @@ -75,11 +77,10 @@ TEST_F(BatchNormOpTest, SimpleCPU) { Simple(); } TEST_F(BatchNormOpTest, SimpleOPENCL) { Simple(); } TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { - srand(time(NULL)); - // generate random input - index_t batch = 1 + rand() % 10; - index_t channels = 3 + rand() % 50; + static unsigned int seed = time(NULL); + index_t batch = 1 + rand_r(&seed) % 10; + index_t channels = 3 + rand_r(&seed) % 50; index_t height = 64; index_t width = 64; @@ -147,11 +148,10 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { } TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { - srand(time(NULL)); - // generate random input - index_t batch = 1 + rand() % 10; - index_t channels = 3 + rand() % 50; + static unsigned int seed = time(NULL); + index_t batch = 1 + rand_r(&seed) % 10; + index_t channels = 3 + rand_r(&seed) % 50; index_t height = 64; index_t width = 64; @@ -220,11 +220,10 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { } TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { - srand(time(NULL)); - // generate random input - index_t batch = 1 + rand() % 10; - index_t channels = 3 + rand() % 50; + static unsigned int seed = time(NULL); + index_t batch = 1 + rand_r(&seed) % 10; + index_t channels = 3 + rand_r(&seed) % 50; index_t height = 103; index_t width = 113; @@ -292,11 +291,10 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { } TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { - srand(time(NULL)); - // generate random input - index_t batch = 1 + rand() % 10; - index_t channels = 3 + rand() % 50; + static unsigned int seed = time(NULL); + index_t batch = 1 + rand_r(&seed) % 10; + index_t channels = 3 + rand_r(&seed) % 50; index_t height = 103; index_t width = 113; @@ -363,4 +361,7 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.5); } -} + +} // namespace test +} // namespace ops +} // namespace mace diff --git a/mace/ops/batch_to_space.cc b/mace/ops/batch_to_space.cc index 83a79fba400c56f4e83b0b6f73748bd559c9bbb7..878e79f4f7c359b792560d2889b8552ba7086f4f 100644 --- a/mace/ops/batch_to_space.cc +++ b/mace/ops/batch_to_space.cc @@ -5,6 +5,7 @@ #include "mace/ops/batch_to_space.h" namespace mace { +namespace ops { void Register_BatchToSpaceND(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("BatchToSpaceND") @@ -19,4 +20,5 @@ void Register_BatchToSpaceND(OperatorRegistry *op_registry) { BatchToSpaceNDOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/batch_to_space.h b/mace/ops/batch_to_space.h index 91d2c0c63ecb89e18b3d82121742fd5aa03a5461..51157aa72d2e6f77dc30e70e76f274f68d8d96d2 100644 --- a/mace/ops/batch_to_space.h +++ b/mace/ops/batch_to_space.h @@ -2,15 +2,17 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#ifndef MACE_OPS_SPACE_TO_BATCH_H_ -#define MACE_OPS_SPACE_TO_BATCH_H_ +#ifndef MACE_OPS_BATCH_TO_SPACE_H_ +#define MACE_OPS_BATCH_TO_SPACE_H_ #include +#include #include "mace/core/operator.h" #include "mace/kernels/space_to_batch.h" namespace mace { +namespace ops { template class BatchToSpaceNDOp : public Operator { @@ -68,6 +70,7 @@ class BatchToSpaceNDOp : public Operator { OP_OUTPUT_TAGS(OUTPUT); }; +} // namespace ops } // namespace mace -#endif // MACE_OPS_SPACE_TO_BATCH_H_ +#endif // MACE_OPS_BATCH_TO_SPACE_H_ diff --git a/mace/ops/batch_to_space_benchmark.cc b/mace/ops/batch_to_space_benchmark.cc index cbd34cf4e8b21b1753f12c52d8c3915768daf2b9..de613f0401c994ab620d9a7c7d9ae2da4f434fa3 100644 --- a/mace/ops/batch_to_space_benchmark.cc +++ b/mace/ops/batch_to_space_benchmark.cc @@ -7,6 +7,9 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { + template static void BMBatchToSpace( int iters, int batch, int channels, int height, int width, int arg) { @@ -53,4 +56,7 @@ static void BMBatchToSpace( BM_BATCH_TO_SPACE(128, 8, 8, 128, 2); BM_BATCH_TO_SPACE(4, 128, 128, 32, 2); BM_BATCH_TO_SPACE(16, 64, 64, 32, 4); + +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/bias_add.cc b/mace/ops/bias_add.cc index 44b147ea4ef4e5f93df1d8da8abfcca9f215b428..aeeabc49e0ef1a52c10efcdfa131d70e95a0affa 100644 --- a/mace/ops/bias_add.cc +++ b/mace/ops/bias_add.cc @@ -5,6 +5,7 @@ #include "mace/ops/bias_add.h" namespace mace { +namespace ops { void Register_BiasAdd(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("BiasAdd") @@ -26,4 +27,5 @@ void Register_BiasAdd(OperatorRegistry *op_registry) { BiasAddOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/bias_add.h b/mace/ops/bias_add.h index ddc88f731cdb7ebd5fe156f74c0c9d3c12258718..686dd673eb5acc669b490a46e1da1612b2275e71 100644 --- a/mace/ops/bias_add.h +++ b/mace/ops/bias_add.h @@ -2,13 +2,14 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#ifndef MACE_BIAS_ADD_H_ -#define MACE_BIAS_ADD_H_ +#ifndef MACE_OPS_BIAS_ADD_H_ +#define MACE_OPS_BIAS_ADD_H_ #include "mace/core/operator.h" #include "mace/kernels/bias_add.h" namespace mace { +namespace ops { template class BiasAddOp : public Operator { @@ -40,6 +41,7 @@ class BiasAddOp : public Operator { OP_OUTPUT_TAGS(OUTPUT); }; +} // namespace ops } // namespace mace -#endif // MACE_BIAS_ADD_H_ +#endif // MACE_OPS_BIAS_ADD_H_ diff --git a/mace/ops/bias_add_benchmark.cc b/mace/ops/bias_add_benchmark.cc index 1d90da0b4d7beb4d87a7ee3e535e701c410b5656..375f78ac2e4b65a33079ea9cc05ef07cdd94d936 100644 --- a/mace/ops/bias_add_benchmark.cc +++ b/mace/ops/bias_add_benchmark.cc @@ -8,6 +8,9 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { + template static void BiasAdd(int iters, int batch, int channels, int height, int width) { mace::testing::StopTiming(); @@ -77,4 +80,7 @@ BM_BIAS_ADD(1, 512, 14, 14); BM_BIAS_ADD(1, 1024, 7, 7); BM_BIAS_ADD(32, 1, 256, 256); BM_BIAS_ADD(32, 3, 256, 256); + +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/bias_add_test.cc b/mace/ops/bias_add_test.cc index 688afa83ab4f64a4a4eb1346b4cf1d63c60a7fba..eff5ace0b81a665800e4a2345c58bc7a3c441247 100644 --- a/mace/ops/bias_add_test.cc +++ b/mace/ops/bias_add_test.cc @@ -6,6 +6,8 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { class BiasAddOpTest : public OpsTestBase {}; @@ -60,13 +62,12 @@ TEST_F(BiasAddOpTest, BiasAddSimpleOPENCL) { } TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { - srand(time(NULL)); - // generate random input - index_t batch = 1 + rand() % 10; - index_t channels = 3 + rand() % 50; - index_t height = 64 + rand() % 50; - index_t width = 64 + rand() % 50; + static unsigned int seed = time(NULL); + index_t batch = 1 + rand_r(&seed) % 10; + index_t channels = 3 + rand_r(&seed) % 50; + index_t height = 64 + rand_r(&seed) % 50; + index_t width = 64 + rand_r(&seed) % 50; // Construct graph OpsTestNet net; @@ -110,13 +111,12 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { } TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { - srand(time(NULL)); - // generate random input - index_t batch = 1 + rand() % 10; - index_t channels = 3 + rand() % 50; - index_t height = 103 + rand() % 100; - index_t width = 113 + rand() % 100; + static unsigned int seed = time(NULL); + index_t batch = 1 + rand_r(&seed) % 10; + index_t channels = 3 + rand_r(&seed) % 50; + index_t height = 103 + rand_r(&seed) % 100; + index_t width = 113 + rand_r(&seed) % 100; // Construct graph OpsTestNet net; @@ -158,4 +158,7 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2); } -} + +} // namespace test +} // namespace ops +} // namespace mace diff --git a/mace/ops/buffer_to_image.cc b/mace/ops/buffer_to_image.cc index 718374de349ef20d476faa063ee63fb2557bb3b7..abeccde0c49f229071e6c00e33f3fce72ca9abcf 100644 --- a/mace/ops/buffer_to_image.cc +++ b/mace/ops/buffer_to_image.cc @@ -5,6 +5,7 @@ #include "mace/ops/buffer_to_image.h" namespace mace { +namespace ops { void Register_BufferToImage(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("BufferToImage") @@ -20,4 +21,5 @@ void Register_BufferToImage(OperatorRegistry *op_registry) { BufferToImageOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/buffer_to_image.h b/mace/ops/buffer_to_image.h index d1d8621b0063b89965a8a83e6d27aba639ac8893..a50bebd37f2395b729486bed50cc00f971422a84 100644 --- a/mace/ops/buffer_to_image.h +++ b/mace/ops/buffer_to_image.h @@ -9,6 +9,7 @@ #include "mace/kernels/buffer_to_image.h" namespace mace { +namespace ops { template class BufferToImageOp : public Operator { @@ -36,5 +37,6 @@ class BufferToImageOp : public Operator { OP_OUTPUT_TAGS(OUTPUT); }; +} // namespace ops } // namespace mace #endif // MACE_OPS_BUFFER_TO_IMAGE_H_ diff --git a/mace/ops/buffer_to_image_test.cc b/mace/ops/buffer_to_image_test.cc index 04baa38275f6ecf4309a33ae5ed6ed3603007dfb..1d6d55ad181ae880f423b8444b49fb0e92255a8a 100644 --- a/mace/ops/buffer_to_image_test.cc +++ b/mace/ops/buffer_to_image_test.cc @@ -5,7 +5,9 @@ #include "gtest/gtest.h" #include "mace/ops/ops_test_util.h" -using namespace mace; +namespace mace { +namespace ops { +namespace test { template void TestBidirectionTransform(const int type, @@ -188,3 +190,7 @@ TEST(BufferToImageTest, ArgStringHalfToHalfSmall) { TestStringHalfBidirectionTransform( kernels::ARGUMENT, {2}, input_data); } + +} // namespace test +} // namespace ops +} // namespace mace diff --git a/mace/ops/channel_shuffle.cc b/mace/ops/channel_shuffle.cc index 9e00da6f9eccaacc7e10e93bcff81c03904993da..c1c5df9cd2da4046c33585d15d708f97625b9139 100644 --- a/mace/ops/channel_shuffle.cc +++ b/mace/ops/channel_shuffle.cc @@ -5,6 +5,7 @@ #include "mace/ops/channel_shuffle.h" namespace mace { +namespace ops { void Register_ChannelShuffle(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("ChannelShuffle") @@ -24,4 +25,5 @@ void Register_ChannelShuffle(OperatorRegistry *op_registry) { ChannelShuffleOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/channel_shuffle.h b/mace/ops/channel_shuffle.h index 5de47d663e96ba31368ab89c2b651730ede024a7..93cbfd40230034ac586561387d4bf7bb490f3794 100644 --- a/mace/ops/channel_shuffle.h +++ b/mace/ops/channel_shuffle.h @@ -11,6 +11,7 @@ #include "mace/kernels/channel_shuffle.h" namespace mace { +namespace ops { template class ChannelShuffleOp : public Operator { @@ -42,6 +43,7 @@ class ChannelShuffleOp : public Operator { kernels::ChannelShuffleFunctor functor_; }; +} // namespace ops } // namespace mace #endif // MACE_OPS_CHANNEL_SHUFFLE_H_ diff --git a/mace/ops/channel_shuffle_benchmark.cc b/mace/ops/channel_shuffle_benchmark.cc index 22b5b1aa7a11310c2aab139a424478487e24157f..c547cce6637dac8dda1c768b1aa5f8327c6e2553 100644 --- a/mace/ops/channel_shuffle_benchmark.cc +++ b/mace/ops/channel_shuffle_benchmark.cc @@ -7,10 +7,12 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { -template +template static void ChannelShuffle( - int iters, int batch, int channels, int height, int width, int group) { + int iters, int batch, int channels, int height, int width, int group) { mace::testing::StopTiming(); OpsTestNet net; @@ -23,15 +25,15 @@ static void ChannelShuffle( kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("ChannelShuffle", "ChannelShuffleTest") - .Input("InputImage") - .Output("Output") - .AddIntArg("group", group) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Output("Output") + .AddIntArg("group", group) + .Finalize(net.NewOperatorDef()); } else { OpDefBuilder("Softmax", "SoftmaxBM") - .Input("Input") - .Output("Output") - .Finalize(net.NewOperatorDef()); + .Input("Input") + .Output("Output") + .Finalize(net.NewOperatorDef()); } // Warm-up @@ -47,18 +49,19 @@ static void ChannelShuffle( net.Sync(); } -#define BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, TYPE, DEVICE) \ - static void BM_CHANNEL_SHUFFLE_##N##_##C##_##H##_##W##_##G##_##TYPE##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - mace::testing::MaccProcessed(tot); \ - mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ - ChannelShuffle(iters, N, C, H, W, G); \ - } \ +#define BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, TYPE, DEVICE) \ + static void \ + BM_CHANNEL_SHUFFLE_##N##_##C##_##H##_##W##_##G##_##TYPE##_##DEVICE( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::MaccProcessed(tot); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + ChannelShuffle(iters, N, C, H, W, G); \ + } \ BENCHMARK(BM_CHANNEL_SHUFFLE_##N##_##C##_##H##_##W##_##G##_##TYPE##_##DEVICE) -#define BM_CHANNEL_SHUFFLE(N, C, H, W, G) \ - BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, CPU); \ +#define BM_CHANNEL_SHUFFLE(N, C, H, W, G) \ + BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, CPU); \ BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, float, OPENCL); \ BM_CHANNEL_SHUFFLE_MACRO(N, C, H, W, G, half, OPENCL); @@ -66,4 +69,6 @@ BM_CHANNEL_SHUFFLE(1, 64, 64, 64, 8); BM_CHANNEL_SHUFFLE(1, 64, 128, 128, 8); BM_CHANNEL_SHUFFLE(1, 64, 256, 256, 8); -} // namespace mace +} // namespace test +} // namespace ops +} // namespace mace diff --git a/mace/ops/channel_shuffle_test.cc b/mace/ops/channel_shuffle_test.cc index 817a68c20aed7d1ac524aa8acde118ef8daeeb82..610a260a2a98a2f0768830b831ad28e215eae53d 100644 --- a/mace/ops/channel_shuffle_test.cc +++ b/mace/ops/channel_shuffle_test.cc @@ -1,10 +1,13 @@ // // Copyright (c) 2017 XiaoMi All rights reserved. // + #include "mace/core/operator.h" #include "mace/ops/ops_test_util.h" -using namespace mace; +namespace mace { +namespace ops { +namespace test { class ChannelShuffleOpTest : public OpsTestBase {}; @@ -38,30 +41,34 @@ TEST_F(ChannelShuffleOpTest, C16G4_OPENCL) { // Add input data net.AddInputFromArray( - "Input", {1, 1, 2, 16}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}); + "Input", {1, 1, 2, 16}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}); BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); - + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("ChannelShuffle", "ChannelShuffleTest") - .Input("InputImage") - .Output("OutputImage") - .AddIntArg("group", 4) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Output("OutputImage") + .AddIntArg("group", 4) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(DeviceType::OPENCL); // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); // Check auto expected = CreateTensor( - {1, 1, 2, 16}, {0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, - 16, 20, 24, 28, 17, 21, 25, 29, 18, 22, 26, 30, 19, 23, 27, 31}); + {1, 1, 2, 16}, + {0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15, + 16, 20, 24, 28, 17, 21, 25, 29, 18, 22, 26, 30, 19, 23, 27, 31}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } + +} // namespace test +} // namespace ops +} // namespace mace diff --git a/mace/ops/concat.cc b/mace/ops/concat.cc index 361fce51cf0ce7ecfb60da65e5b17791a6c4067d..2e6dbc8f30e99fac1e762466d034013b9239d968 100644 --- a/mace/ops/concat.cc +++ b/mace/ops/concat.cc @@ -5,6 +5,7 @@ #include "mace/ops/concat.h" namespace mace { +namespace ops { void Register_Concat(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("Concat") @@ -25,4 +26,5 @@ void Register_Concat(OperatorRegistry *op_registry) { ConcatOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/concat.h b/mace/ops/concat.h index cadd52937fd35e9fbf158411c40330d9da0e7ee3..b791619d31d440393dd9fdb0dfb688199d6789ec 100644 --- a/mace/ops/concat.h +++ b/mace/ops/concat.h @@ -5,9 +5,13 @@ #ifndef MACE_OPS_CONCAT_H_ #define MACE_OPS_CONCAT_H_ +#include + #include "mace/core/operator.h" #include "mace/kernels/concat.h" + namespace mace { +namespace ops { template class ConcatOp : public Operator { @@ -41,6 +45,7 @@ class ConcatOp : public Operator { OP_OUTPUT_TAGS(OUTPUT); }; +} // namespace ops } // namespace mace #endif // MACE_OPS_CONCAT_H_ diff --git a/mace/ops/concat_benchmark.cc b/mace/ops/concat_benchmark.cc index 8beea839b124cf34dfa2cb0107c08c386ac07b2f..a8da83588cb62b49033083c2a0f85318f4bbf984 100644 --- a/mace/ops/concat_benchmark.cc +++ b/mace/ops/concat_benchmark.cc @@ -7,6 +7,9 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { + template static void ConcatHelper(int iters, int concat_dim, int dim1) { mace::testing::StopTiming(); @@ -106,4 +109,6 @@ BM_CONCAT_OPENCL_MACRO(3, 32, 32, 64, half); BM_CONCAT_OPENCL_MACRO(3, 32, 32, 128, half); BM_CONCAT_OPENCL_MACRO(3, 32, 32, 256, half); +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/concat_test.cc b/mace/ops/concat_test.cc index 48727597441364ec78c8466fe4701c83862babf9..efc8bd7fc1bb4e59e92c0f14c11736011432586a 100644 --- a/mace/ops/concat_test.cc +++ b/mace/ops/concat_test.cc @@ -2,11 +2,16 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#include "mace/ops/concat.h" +#include +#include + #include "gmock/gmock.h" #include "mace/ops/ops_test_util.h" +#include "mace/ops/concat.h" -using namespace mace; +namespace mace { +namespace ops { +namespace test { class ConcatOpTest : public OpsTestBase {}; @@ -87,10 +92,10 @@ TEST_F(ConcatOpTest, CPUSimpleVertical) { } TEST_F(ConcatOpTest, CPURandom) { - srand(time(nullptr)); + static unsigned int seed = time(NULL); int dim = 5; - int num_inputs = 2 + rand() % 10; - int axis = rand() % dim; + int num_inputs = 2 + rand_r(&seed) % 10; + int axis = rand_r(&seed) % dim; // Construct graph OpsTestNet net; auto builder = OpDefBuilder("Concat", "ConcatTest"); @@ -108,7 +113,7 @@ TEST_F(ConcatOpTest, CPURandom) { std::vector input_ptrs(num_inputs, nullptr); index_t concat_axis_size = 0; for (int i = 0; i < num_inputs; ++i) { - input_shapes[i][axis] = 1 + rand() % dim; + input_shapes[i][axis] = 1 + rand_r(&seed) % dim; concat_axis_size += input_shapes[i][axis]; GenerateRandomRealTypeData(input_shapes[i], &inputs[i]); input_ptrs[i] = inputs[i].data(); @@ -217,3 +222,7 @@ TEST_F(ConcatOpTest, OPENCLAlignedMultiInput) { OpenclRandomTest( {{3, 32, 32, 32}, {3, 32, 32, 32}, {3, 32, 32, 32}, {3, 32, 32, 32}}, 3); } + +} // namespace test +} // namespace ops +} // namespace mace diff --git a/mace/ops/conv_2d.cc b/mace/ops/conv_2d.cc index 0185c1d1e62cdb0bf836b162e29101b12aa6f348..cf45d9df7acf38b4da0c1afa6f3f6c501b5a9a8b 100644 --- a/mace/ops/conv_2d.cc +++ b/mace/ops/conv_2d.cc @@ -5,6 +5,7 @@ #include "mace/ops/conv_2d.h" namespace mace { +namespace ops { void Register_Conv2D(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("Conv2D") @@ -26,4 +27,5 @@ void Register_Conv2D(OperatorRegistry *op_registry) { Conv2dOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/conv_2d.h b/mace/ops/conv_2d.h index c441b0b45b0b00619fe0a36554dcef307227b2d9..08f1bab24ea1b6b8b19faf34d57d38de0fcfb71b 100644 --- a/mace/ops/conv_2d.h +++ b/mace/ops/conv_2d.h @@ -12,6 +12,7 @@ #include "mace/ops/conv_pool_2d_base.h" namespace mace { +namespace ops { template class Conv2dOp : public ConvPool2dOpBase { @@ -44,6 +45,7 @@ class Conv2dOp : public ConvPool2dOpBase { OP_OUTPUT_TAGS(OUTPUT); }; +} // namespace ops } // namespace mace #endif // MACE_OPS_CONV_2D_H_ diff --git a/mace/ops/conv_2d_benchmark.cc b/mace/ops/conv_2d_benchmark.cc index 4abd31e4766cbb42bf74752198496cb02af2b114..90e4579eb9c53c4870a083f9871001420509318e 100644 --- a/mace/ops/conv_2d_benchmark.cc +++ b/mace/ops/conv_2d_benchmark.cc @@ -10,6 +10,8 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { template static void Conv2d(int iters, @@ -80,30 +82,32 @@ static void Conv2d(int iters, // approximate the amortized latency. The OpenCL runtime for Mali/Adreno is // in-order. -#define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, DILATION, P, OC, TYPE, \ - DEVICE) \ - static void \ - BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##D##DILATION##_##P##_##OC##_##TYPE##_##DEVICE( \ - int iters) { \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - int64_t pad_h = 0, pad_w = 0; \ - if (P == SAME) { \ - pad_h = KH / 2; \ - pad_w = KW / 2; \ - } \ - int64_t oh = \ - (H + 2 * pad_h - KH - (KH - 1) * (DILATION - 1)) / STRIDE + 1; \ - int64_t ow = \ - (W + 2 * pad_w - KW - (KW - 1) * (DILATION - 1)) / STRIDE + 1; \ - const int64_t macc = \ - static_cast(iters) * N * OC * oh * ow * (KH * KW * C + 1); \ - mace::testing::MaccProcessed(macc); \ - mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ - Conv2d(iters, N, C, H, W, KH, KW, STRIDE, DILATION, \ - mace::Padding::P, OC); \ - } \ - BENCHMARK( \ - BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##D##DILATION##_##P##_##OC##_##TYPE##_##DEVICE) +#define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, DILATION, P, OC, TYPE, \ + DEVICE) \ + static void \ + BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##D##DILATION\ + ##_##P##_##OC##_##TYPE##_##DEVICE( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + int64_t pad_h = 0, pad_w = 0; \ + if (P == SAME) { \ + pad_h = KH / 2; \ + pad_w = KW / 2; \ + } \ + int64_t oh = \ + (H + 2 * pad_h - KH - (KH - 1) * (DILATION - 1)) / STRIDE + 1; \ + int64_t ow = \ + (W + 2 * pad_w - KW - (KW - 1) * (DILATION - 1)) / STRIDE + 1; \ + const int64_t macc = \ + static_cast(iters) * N * OC * oh * ow * (KH * KW * C + 1); \ + mace::testing::MaccProcessed(macc); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + Conv2d(iters, N, C, H, W, KH, KW, STRIDE, DILATION, \ + mace::Padding::P, OC); \ + } \ + BENCHMARK( \ + BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##D##DILATION\ + ##_##P##_##OC##_##TYPE##_##DEVICE) #define BM_CONV_2D(N, C, H, W, KH, KW, S, D, P, OC) \ BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, D, P, OC, float, CPU); \ @@ -139,4 +143,6 @@ BM_CONV_2D(1, 32, 256, 256, 3, 3, 1, 4, VALID, 32); BM_CONV_2D(1, 128, 56, 56, 1, 1, 1, 1, SAME, 128); BM_CONV_2D(1, 1024, 7, 7, 1, 1, 1, 1, SAME, 1024); +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/conv_2d_test.cc b/mace/ops/conv_2d_test.cc index c88a460c57c3f03ad9b1c1890990827ecc622ee6..eef5a560d6a190932681173a0038fa4edd738703 100644 --- a/mace/ops/conv_2d_test.cc +++ b/mace/ops/conv_2d_test.cc @@ -2,11 +2,15 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#include "mace/ops/conv_2d.h" #include +#include + +#include "mace/ops/conv_2d.h" #include "mace/ops/ops_test_util.h" -using namespace mace; +namespace mace { +namespace ops { +namespace test { class Conv2dOpTest : public OpsTestBase {}; @@ -347,14 +351,13 @@ static void TestComplexConvNxNS12(const std::vector &shape, testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, Padding type) { - srand(time(NULL)); - // generate random input - index_t batch = 3 + (rand() % 10); + static unsigned int seed = time(NULL); + index_t batch = 3 + (rand_r(&seed) % 10); index_t height = shape[0]; index_t width = shape[1]; - index_t input_channels = shape[2] + (rand() % 10); - index_t output_channels = shape[3] + (rand() % 10); + index_t input_channels = shape[2] + (rand_r(&seed) % 10); + index_t output_channels = shape[3] + (rand_r(&seed) % 10); // Construct graph OpsTestNet net; OpDefBuilder("Conv2D", "Conv2dTest") @@ -729,3 +732,7 @@ TEST_F(Conv2dOpTest, OPENCLAlignedPad2) { TEST_F(Conv2dOpTest, OPENCLUnalignedPad4) { TestArbitraryPadConvNxN({107, 113, 5, 7}, {4, 4}); } + +} // namespace test +} // namespace ops +} // namespace mace diff --git a/mace/ops/conv_pool_2d_base.h b/mace/ops/conv_pool_2d_base.h index c3db95abcee0b253b3d7b9a37dfac6f8e5ae8ab4..a6f1299da25174e15a267373ca70a1090878c53b 100644 --- a/mace/ops/conv_pool_2d_base.h +++ b/mace/ops/conv_pool_2d_base.h @@ -5,10 +5,13 @@ #ifndef MACE_OPS_CONV_POOL_2D_BASE_H_ #define MACE_OPS_CONV_POOL_2D_BASE_H_ +#include + #include "mace/core/operator.h" #include "mace/kernels/conv_pool_2d_util.h" namespace mace { +namespace ops { template class ConvPool2dOpBase : public Operator { @@ -29,6 +32,7 @@ class ConvPool2dOpBase : public Operator { std::vector dilations_; }; +} // namespace ops } // namespace mace #endif // MACE_OPS_CONV_POOL_2D_BASE_H_ diff --git a/mace/ops/core_test.cc b/mace/ops/core_test.cc index 5c4f6efaf2a6b19df77185aa9c9740f760cd069b..3d533e5d5eddc56c74db483f173cac93bdb0ccd4 100644 --- a/mace/ops/core_test.cc +++ b/mace/ops/core_test.cc @@ -5,6 +5,8 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { TEST(CoreTest, INIT_MODE) { std::vector op_defs; @@ -56,4 +58,6 @@ TEST(CoreTest, INIT_MODE) { 1e-5); } +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/depthwise_conv2d.cc b/mace/ops/depthwise_conv2d.cc index a6fb9eb4d86d60f2762ea0052e820f6f2e2b79af..112cb03163f384e6b5ea1361b53e512f57f3c999 100644 --- a/mace/ops/depthwise_conv2d.cc +++ b/mace/ops/depthwise_conv2d.cc @@ -5,6 +5,7 @@ #include "mace/ops/depthwise_conv2d.h" namespace mace { +namespace ops { void Register_DepthwiseConv2d(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("DepthwiseConv2d") @@ -26,4 +27,5 @@ void Register_DepthwiseConv2d(OperatorRegistry *op_registry) { DepthwiseConv2dOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/depthwise_conv2d.h b/mace/ops/depthwise_conv2d.h index 0678ba0757d17a50fa1b2ef2cdcc5c4b2635a46d..f7abd6891f2d5dfbc0aefdd4b653b80330853ad5 100644 --- a/mace/ops/depthwise_conv2d.h +++ b/mace/ops/depthwise_conv2d.h @@ -6,6 +6,7 @@ #define MACE_OPS_DEPTHWISE_CONV2D_H_ #include +#include #include "mace/core/operator.h" #include "mace/kernels/conv_2d.h" @@ -13,6 +14,7 @@ #include "mace/ops/conv_pool_2d_base.h" namespace mace { +namespace ops { template class DepthwiseConv2dOp : public ConvPool2dOpBase { @@ -48,6 +50,7 @@ class DepthwiseConv2dOp : public ConvPool2dOpBase { OP_OUTPUT_TAGS(OUTPUT); }; +} // namespace ops } // namespace mace #endif // MACE_OPS_DEPTHWISE_CONV2D_H_ diff --git a/mace/ops/depthwise_conv2d_benchmark.cc b/mace/ops/depthwise_conv2d_benchmark.cc index b0a1fba67ea98ad00824c0d490d532023561749b..d97df4585d1925ca2a2c3f07e7a4ae29992636bb 100644 --- a/mace/ops/depthwise_conv2d_benchmark.cc +++ b/mace/ops/depthwise_conv2d_benchmark.cc @@ -10,6 +10,8 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { template static void DepthwiseConv2d(int iters, @@ -75,31 +77,33 @@ static void DepthwiseConv2d(int iters, } } -#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, M, TYPE, \ - DEVICE) \ - static void \ - BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##M##_##TYPE##_##DEVICE( \ - int iters) { \ - const int64_t dilation = 1; \ - const int64_t tot = static_cast(iters) * N * C * H * W; \ - int64_t pad_h = 0, pad_w = 0; \ - if (P == SAME) { \ - pad_h = KH / 2; \ - pad_w = KW / 2; \ - } \ - int64_t oh = \ - (H + 2 * pad_h - KH - (KH - 1) * (dilation - 1)) / STRIDE + 1; \ - int64_t ow = \ - (W + 2 * pad_w - KW - (KW - 1) * (dilation - 1)) / STRIDE + 1; \ - const int64_t macc = \ - static_cast(iters) * N * C * M * oh * ow * (KH * KW + 1); \ - mace::testing::MaccProcessed(macc); \ - mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ - DepthwiseConv2d(iters, N, C, H, W, KH, KW, STRIDE, \ - mace::Padding::P, M); \ - } \ - BENCHMARK( \ - BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##M##_##TYPE##_##DEVICE) +#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, M, TYPE, \ + DEVICE) \ + static void \ + BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_\ + ##P##_##M##_##TYPE##_##DEVICE( \ + int iters) { \ + const int64_t dilation = 1; \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + int64_t pad_h = 0, pad_w = 0; \ + if (P == SAME) { \ + pad_h = KH / 2; \ + pad_w = KW / 2; \ + } \ + int64_t oh = \ + (H + 2 * pad_h - KH - (KH - 1) * (dilation - 1)) / STRIDE + 1; \ + int64_t ow = \ + (W + 2 * pad_w - KW - (KW - 1) * (dilation - 1)) / STRIDE + 1; \ + const int64_t macc = \ + static_cast(iters) * N * C * M * oh * ow * (KH * KW + 1); \ + mace::testing::MaccProcessed(macc); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + DepthwiseConv2d(iters, N, C, H, W, KH, KW, STRIDE, \ + mace::Padding::P, M); \ + } \ + BENCHMARK( \ + BM_DEPTHWISE_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_\ + ##P##_##M##_##TYPE##_##DEVICE) #define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, M) \ BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, M, float, CPU); \ @@ -121,4 +125,6 @@ BM_DEPTHWISE_CONV_2D(1, 64, 33, 31, 3, 3, 2, SAME, 1); BM_DEPTHWISE_CONV_2D(1, 3, 512, 512, 3, 3, 2, VALID, 1); BM_DEPTHWISE_CONV_2D(1, 3, 512, 512, 3, 3, 2, SAME, 1); +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/depthwise_conv2d_test.cc b/mace/ops/depthwise_conv2d_test.cc index 0f4bf81cf23c32e41c736232d772d8d56332236e..59073899e24351dc798fd7f5da787a1eb6b25474 100644 --- a/mace/ops/depthwise_conv2d_test.cc +++ b/mace/ops/depthwise_conv2d_test.cc @@ -5,9 +5,9 @@ #include "mace/ops/conv_2d.h" #include "mace/ops/ops_test_util.h" -using namespace mace; - -namespace { +namespace mace { +namespace ops { +namespace test { class DepthwiseConv2dOpTest : public OpsTestBase {}; @@ -207,11 +207,10 @@ void TestNxNS12(const index_t height, const index_t width) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, Padding type) { - srand(time(NULL)); - // generate random input - index_t batch = 1 + rand() % 5; - index_t input_channels = 3 + rand() % 16; + static unsigned int seed = time(NULL); + index_t batch = 1 + rand_r(&seed) % 5; + index_t input_channels = 3 + rand_r(&seed) % 16; index_t multiplier = 1; // Construct graph OpsTestNet net; @@ -316,4 +315,6 @@ TEST_F(DepthwiseConv2dOpTest, OpenCLUnalignedNxNS12Half) { TestNxNS12(107, 113); } -} // namespace +} // namespace test +} // namespace ops +} // namespace mace diff --git a/mace/ops/eltwise.cc b/mace/ops/eltwise.cc index 88d7d43f0daa8e3434589fd8a5ae6c9673bf91d1..5c49f3563f46dc30e634fc225dca262a32d3d682 100644 --- a/mace/ops/eltwise.cc +++ b/mace/ops/eltwise.cc @@ -5,6 +5,7 @@ #include "mace/ops/eltwise.h" namespace mace { +namespace ops { void Register_Eltwise(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("Eltwise") @@ -26,4 +27,5 @@ void Register_Eltwise(OperatorRegistry *op_registry) { EltwiseOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/eltwise.h b/mace/ops/eltwise.h index 621a8f2b01fa575e4dfb79a4ab347f8455c8083b..f68622e5cd781f8078d0ada568b2e82fb634edfc 100644 --- a/mace/ops/eltwise.h +++ b/mace/ops/eltwise.h @@ -2,13 +2,14 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#ifndef MACE_OPS_RESHAPE_H_ -#define MACE_OPS_RESHAPE_H_ +#ifndef MACE_OPS_ELTWISE_H_ +#define MACE_OPS_ELTWISE_H_ #include "mace/core/operator.h" #include "mace/kernels/eltwise.h" namespace mace { +namespace ops { template class EltwiseOp : public Operator { @@ -44,6 +45,7 @@ class EltwiseOp : public Operator { OP_OUTPUT_TAGS(OUTPUT); }; +} // namespace ops } // namespace mace -#endif // MACE_OPS_RESHAPE_H_ +#endif // MACE_OPS_ELTWISE_H_ diff --git a/mace/ops/eltwise_benchmark.cc b/mace/ops/eltwise_benchmark.cc index b92d612b6117ea5b758f7ad9934b76bcd53ab406..478db803b59e9dc6c6cd02e4719189f894cdf96b 100644 --- a/mace/ops/eltwise_benchmark.cc +++ b/mace/ops/eltwise_benchmark.cc @@ -2,13 +2,17 @@ // Copyright (c) 2017 XiaoMi All rights reserved. // -#include "mace/kernels/eltwise.h" #include + #include "mace/core/operator.h" #include "mace/core/testing/test_benchmark.h" +#include "mace/kernels/eltwise.h" #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { + template static void EltwiseBenchmark( int iters, kernels::EltwiseType type, int n, int h, int w, int c) { @@ -81,4 +85,6 @@ BM_ELTWISE(0, 1, 240, 240, 256); BM_ELTWISE(1, 1, 240, 240, 256); BM_ELTWISE(2, 1, 240, 240, 256); +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/eltwise_test.cc b/mace/ops/eltwise_test.cc index 39fde396e48109d2c05653b3e24c0244f993e716..8a0fbcd882cf68156afbf459cac32d0116478bec 100644 --- a/mace/ops/eltwise_test.cc +++ b/mace/ops/eltwise_test.cc @@ -7,6 +7,8 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { class EltwiseOpTest : public OpsTestBase {}; @@ -170,4 +172,6 @@ TEST_F(EltwiseOpTest, OPENCLRandomHalf) { {13, 32, 32, 64}); } +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/folded_batch_norm.cc b/mace/ops/folded_batch_norm.cc index 5847ab9485f192232b109482a3f5d89e4259db04..10cc39272c814f09e37cee22450ed51259aac710 100644 --- a/mace/ops/folded_batch_norm.cc +++ b/mace/ops/folded_batch_norm.cc @@ -5,6 +5,7 @@ #include "mace/ops/folded_batch_norm.h" namespace mace { +namespace ops { void Register_FoldedBatchNorm(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("FoldedBatchNorm") @@ -26,4 +27,5 @@ void Register_FoldedBatchNorm(OperatorRegistry *op_registry) { FoldedBatchNormOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/folded_batch_norm.h b/mace/ops/folded_batch_norm.h index 28f7f99a8b20e26b553165258885222aca860483..dd2f7b17c87ea58c5956b9ea014dd7f1a09ce964 100644 --- a/mace/ops/folded_batch_norm.h +++ b/mace/ops/folded_batch_norm.h @@ -5,10 +5,13 @@ #ifndef MACE_OPS_FOLDED_BATCH_NORM_H_ #define MACE_OPS_FOLDED_BATCH_NORM_H_ +#include + #include "mace/core/operator.h" #include "mace/kernels/batch_norm.h" namespace mace { +namespace ops { template class FoldedBatchNormOp : public Operator { @@ -48,6 +51,7 @@ class FoldedBatchNormOp : public Operator { OP_OUTPUT_TAGS(OUTPUT); }; +} // namespace ops } // namespace mace #endif // MACE_OPS_FOLDED_BATCH_NORM_H_ diff --git a/mace/ops/folded_batch_norm_test.cc b/mace/ops/folded_batch_norm_test.cc index b72ce9b7de58f9212631bcd4713f72d35217bf2a..4c13e08c38e58477a5dbbe00885166d13d5bcddf 100644 --- a/mace/ops/folded_batch_norm_test.cc +++ b/mace/ops/folded_batch_norm_test.cc @@ -6,6 +6,8 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { class FoldedBatchNormOpTest : public OpsTestBase {}; @@ -14,12 +16,12 @@ void CalculateScaleOffset(const std::vector &gamma, const std::vector &mean, const std::vector &var, const float epsilon, - std::vector &scale, - std::vector &offset) { + std::vector *scale, + std::vector *offset) { size_t size = gamma.size(); for (int i = 0; i < size; ++i) { - scale[i] = gamma[i] / std::sqrt(var[i] + epsilon); - offset[i] = offset[i] - mean[i] * scale[i]; + (*scale)[i] = gamma[i] / std::sqrt(var[i] + epsilon); + (*offset)[i] = (*offset)[i] - mean[i] * (*scale)[i]; } } @@ -32,7 +34,7 @@ void Simple() { {5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}); std::vector scale(1); std::vector offset(1); - CalculateScaleOffset({4.0f}, {2.0}, {10}, {11.67f}, 1e-3, scale, offset); + CalculateScaleOffset({4.0f}, {2.0}, {10}, {11.67f}, 1e-3, &scale, &offset); net.AddInputFromArray("Scale", {1}, scale); net.AddInputFromArray("Offset", {1}, offset); @@ -172,11 +174,10 @@ width}); */ TEST_F(FoldedBatchNormOpTest, SimpleRandomOPENCL) { - srand(time(NULL)); - // generate random input - index_t batch = 1 + rand() % 10; - index_t channels = 3 + rand() % 50; + static unsigned int seed = time(NULL); + index_t batch = 1 + rand_r(&seed) % 10; + index_t channels = 3 + rand_r(&seed) % 50; index_t height = 64; index_t width = 64; @@ -227,11 +228,10 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomOPENCL) { } TEST_F(FoldedBatchNormOpTest, SimpleRandomHalfOPENCL) { - srand(time(NULL)); - // generate random input - index_t batch = 1 + rand() % 10; - index_t channels = 3 + rand() % 50; + static unsigned int seed = time(NULL); + index_t batch = 1 + rand_r(&seed) % 10; + index_t channels = 3 + rand_r(&seed) % 50; index_t height = 64; index_t width = 64; @@ -283,11 +283,10 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomHalfOPENCL) { } TEST_F(FoldedBatchNormOpTest, ComplexRandomOPENCL) { - srand(time(NULL)); - // generate random input - index_t batch = 1 + rand() % 10; - index_t channels = 3 + rand() % 50; + static unsigned int seed = time(NULL); + index_t batch = 1 + rand_r(&seed) % 10; + index_t channels = 3 + rand_r(&seed) % 50; index_t height = 103; index_t width = 113; @@ -337,11 +336,10 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomOPENCL) { } TEST_F(FoldedBatchNormOpTest, ComplexRandomHalfOPENCL) { - srand(time(NULL)); - // generate random input - index_t batch = 1 + rand() % 10; - index_t channels = 3 + rand() % 50; + static unsigned int seed = time(NULL); + index_t batch = 1 + rand_r(&seed) % 10; + index_t channels = 3 + rand_r(&seed) % 50; index_t height = 103; index_t width = 113; @@ -390,4 +388,7 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomHalfOPENCL) { kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 0.5); } -} + +} // namespace test +} // namespace ops +} // namespace mace diff --git a/mace/ops/fully_connected.cc b/mace/ops/fully_connected.cc index 7f9df74c3389be777dea74d2bd570e9b61c0f4cc..dd4c5b87228d8e1c0aacbf2b93e5cb1a03e3aa76 100644 --- a/mace/ops/fully_connected.cc +++ b/mace/ops/fully_connected.cc @@ -5,6 +5,7 @@ #include "mace/ops/fully_connected.h" namespace mace { +namespace ops { void Register_FullyConnected(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("FC") @@ -26,4 +27,5 @@ void Register_FullyConnected(OperatorRegistry *op_registry) { FullyConnectedOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/fully_connected.h b/mace/ops/fully_connected.h index 510f4773654a28b808e9abeaab59a9e5ae8923a0..5ac305ff9abb5ecc570f50375d8c4335f72e2fdd 100644 --- a/mace/ops/fully_connected.h +++ b/mace/ops/fully_connected.h @@ -11,6 +11,7 @@ #include "mace/kernels/fully_connected.h" namespace mace { +namespace ops { template class FullyConnectedOp : public Operator { @@ -48,6 +49,7 @@ class FullyConnectedOp : public Operator { OP_OUTPUT_TAGS(OUTPUT); }; +} // namespace ops } // namespace mace #endif // MACE_OPS_FULLY_CONNECTED_H_ diff --git a/mace/ops/fully_connected_benchmark.cc b/mace/ops/fully_connected_benchmark.cc index c136ce1b88543b7dbbb1b7dc26e8187093bd01a2..2328ea8e2290f2915e825d8dbf68f25dc6bd7d49 100644 --- a/mace/ops/fully_connected_benchmark.cc +++ b/mace/ops/fully_connected_benchmark.cc @@ -3,11 +3,14 @@ // #include + #include "mace/core/operator.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { template static void FCBenchmark( @@ -84,4 +87,7 @@ BM_FC(1, 16, 16, 32, 32); BM_FC(1, 8, 8, 32, 1000); BM_FC(1, 2, 2, 512, 2); BM_FC(1, 7, 7, 512, 4096); + +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/fully_connected_test.cc b/mace/ops/fully_connected_test.cc index be1dd1c11873f8b741eca58a00bba5de5112ba6b..26a893b25cf77f0a7af49b7379693729a2410de4 100644 --- a/mace/ops/fully_connected_test.cc +++ b/mace/ops/fully_connected_test.cc @@ -3,10 +3,13 @@ // #include + #include "mace/core/operator.h" #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { class FullyConnectedOpTest : public OpsTestBase {}; @@ -263,4 +266,6 @@ TEST_F(FullyConnectedOpTest, OPENCLHalfWidthFormatAligned) { TestWXFormat(1, 16, 32, 32, 32); } +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/fused_conv_2d.cc b/mace/ops/fused_conv_2d.cc index 4a0245f5e03e252775b1f17cea5a6823ecdaa56e..d4b5de4f809c7398adc6622ae9a11ee7aa911524 100644 --- a/mace/ops/fused_conv_2d.cc +++ b/mace/ops/fused_conv_2d.cc @@ -5,6 +5,7 @@ #include "mace/ops/fused_conv_2d.h" namespace mace { +namespace ops { void Register_FusedConv2D(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("FusedConv2D") @@ -26,4 +27,5 @@ void Register_FusedConv2D(OperatorRegistry *op_registry) { FusedConv2dOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/fused_conv_2d.h b/mace/ops/fused_conv_2d.h index dd43ca9cc2d3bb1c293c7bb9e9df72a9cf081de8..db9c6e3a48920c1346b12ac2943d42f940fb6c8f 100644 --- a/mace/ops/fused_conv_2d.h +++ b/mace/ops/fused_conv_2d.h @@ -13,6 +13,7 @@ #include "mace/ops/conv_pool_2d_base.h" namespace mace { +namespace ops { template class FusedConv2dOp : public ConvPool2dOpBase { @@ -47,6 +48,7 @@ class FusedConv2dOp : public ConvPool2dOpBase { OP_OUTPUT_TAGS(OUTPUT); }; +} // namespace ops } // namespace mace #endif // MACE_OPS_FUSED_CONV_2D_H_ diff --git a/mace/ops/fused_conv_2d_test.cc b/mace/ops/fused_conv_2d_test.cc index 34d2b55de6a1a8205c15f64a7da86636ac4af8bd..554f2cba53025e8ca038fd8c420d8099184e35ba 100644 --- a/mace/ops/fused_conv_2d_test.cc +++ b/mace/ops/fused_conv_2d_test.cc @@ -8,6 +8,8 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { class FusedConv2dOpTest : public OpsTestBase {}; @@ -276,9 +278,8 @@ static void TestComplexConvNxNS12(const std::vector &shape) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, Padding type) { - unsigned int seed = time(NULL); - // generate random input + static unsigned int seed = time(NULL); index_t batch = 3 + (rand_r(&seed) % 10); index_t height = shape[0]; index_t width = shape[1]; @@ -352,9 +353,8 @@ static void TestHalfComplexConvNxNS12(const std::vector &shape) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, Padding type) { - unsigned int seed = time(NULL); - // generate random input + static unsigned int seed = time(NULL); index_t batch = 3 + (rand_r(&seed) % 10); index_t height = shape[0]; index_t width = shape[1]; @@ -679,4 +679,6 @@ TEST_F(FusedConv2dOpTest, OPENCL15X15AtrousConvD4) { {2, 2}); } +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/global_avg_pooling.cc b/mace/ops/global_avg_pooling.cc index 65d34b49fff906c8159218a363afdcfcb70e67d4..5421f1ce0756f13e470645ed9a12242a6fe7612e 100644 --- a/mace/ops/global_avg_pooling.cc +++ b/mace/ops/global_avg_pooling.cc @@ -5,6 +5,7 @@ #include "mace/ops/global_avg_pooling.h" namespace mace { +namespace ops { void Register_GlobalAvgPooling(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("GlobalAvgPooling") @@ -14,4 +15,5 @@ void Register_GlobalAvgPooling(OperatorRegistry *op_registry) { GlobalAvgPoolingOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/global_avg_pooling.h b/mace/ops/global_avg_pooling.h index 6a7f49fedfe7a7080e3dbde332d586b9c831be22..59ab7452dc91e7c80ffeced960fbace9ffd5daef 100644 --- a/mace/ops/global_avg_pooling.h +++ b/mace/ops/global_avg_pooling.h @@ -11,6 +11,7 @@ #include "mace/kernels/global_avg_pooling.h" namespace mace { +namespace ops { template class GlobalAvgPoolingOp : public Operator { @@ -40,6 +41,7 @@ class GlobalAvgPoolingOp : public Operator { OP_OUTPUT_TAGS(OUTPUT); }; +} // namespace ops } // namespace mace #endif // MACE_OPS_GLOBAL_AVG_POOLING_H_ diff --git a/mace/ops/global_avg_pooling_benchmark.cc b/mace/ops/global_avg_pooling_benchmark.cc index 70a88408910fca25af5aed8b2f594e3056caaf9c..0e8126bc3704e728dfd935d38c59ed1ddcf3c1b2 100644 --- a/mace/ops/global_avg_pooling_benchmark.cc +++ b/mace/ops/global_avg_pooling_benchmark.cc @@ -8,7 +8,8 @@ #include "mace/ops/ops_test_util.h" namespace mace { -namespace kernels { +namespace ops { +namespace test { template static void GlobalAvgPooling( @@ -54,5 +55,6 @@ BM_GLOBAL_AVG_POOLING(1, 3, 7, 7); BM_GLOBAL_AVG_POOLING(1, 3, 64, 64); BM_GLOBAL_AVG_POOLING(1, 3, 256, 256); -} // namespace kernels +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/global_avg_pooling_test.cc b/mace/ops/global_avg_pooling_test.cc index 8c2fccd8ea231c7fa989b966fb8fa07e682b7e04..8b82f7ab4e3c9e8a98d0cf5549028a3a4d691a9f 100644 --- a/mace/ops/global_avg_pooling_test.cc +++ b/mace/ops/global_avg_pooling_test.cc @@ -5,6 +5,8 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { class GlobalAvgPoolingOpTest : public OpsTestBase {}; @@ -32,4 +34,6 @@ TEST_F(GlobalAvgPoolingOpTest, 3x7x7_CPU) { ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.001); } +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/image_to_buffer.cc b/mace/ops/image_to_buffer.cc index d5031539d83ab330eaf0410e68063a16b4892b40..6ea1d8db5fc61452cdea89d776c423d99a222223 100644 --- a/mace/ops/image_to_buffer.cc +++ b/mace/ops/image_to_buffer.cc @@ -5,6 +5,7 @@ #include "mace/ops/image_to_buffer.h" namespace mace { +namespace ops { void Register_ImageToBuffer(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("ImageToBuffer") @@ -20,4 +21,5 @@ void Register_ImageToBuffer(OperatorRegistry *op_registry) { ImageToBufferOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/image_to_buffer.h b/mace/ops/image_to_buffer.h index 22169b4e0e7379385fc750480caf5b261dd12336..b786d3092faeb3c989bc9ad9870c063d57faaa37 100644 --- a/mace/ops/image_to_buffer.h +++ b/mace/ops/image_to_buffer.h @@ -9,6 +9,7 @@ #include "mace/kernels/buffer_to_image.h" namespace mace { +namespace ops { template class ImageToBufferOp : public Operator { @@ -35,5 +36,7 @@ class ImageToBufferOp : public Operator { OP_OUTPUT_TAGS(OUTPUT); }; +} // namespace ops } // namespace mace + #endif // MACE_OPS_IMAGE_TO_BUFFER_H_ diff --git a/mace/ops/matmul.cc b/mace/ops/matmul.cc index e8a178cb053a92cf6237d274d8206f7d941c9c2f..3ebe99a93acd1dafe8b90cdf468affdc60f4d880 100644 --- a/mace/ops/matmul.cc +++ b/mace/ops/matmul.cc @@ -5,6 +5,7 @@ #include "mace/ops/matmul.h" namespace mace { +namespace ops { void Register_MatMul(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("MatMul") @@ -26,4 +27,5 @@ void Register_MatMul(OperatorRegistry *op_registry) { MatMulOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/matmul.h b/mace/ops/matmul.h index b45ae35aa83f7f94f426d245fdc21bb47959607f..b8bca6a247e04885e2dc73015c27262cb8285542 100644 --- a/mace/ops/matmul.h +++ b/mace/ops/matmul.h @@ -9,6 +9,7 @@ #include "mace/kernels/matmul.h" namespace mace { +namespace ops { template class MatMulOp : public Operator { @@ -35,6 +36,7 @@ class MatMulOp : public Operator { kernels::MatMulFunctor functor_; }; +} // namespace ops } // namespace mace #endif // MACE_OPS_MATMUL_H_ diff --git a/mace/ops/matmul_benchmark.cc b/mace/ops/matmul_benchmark.cc index 6c167636259aae1d7eafde2d587f49d19b0e96c3..850acb184d404befd0c7897831175ff2cabe8e74 100644 --- a/mace/ops/matmul_benchmark.cc +++ b/mace/ops/matmul_benchmark.cc @@ -3,11 +3,15 @@ // #include + #include "mace/core/operator.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { + template static void MatMulBenchmark( int iters, int batch, int height, int channels, int out_width) { @@ -72,4 +76,7 @@ BM_MATMUL(16, 32, 128, 3969); BM_MATMUL(16, 128, 128, 49); BM_MATMUL(16, 128, 128, 961); BM_MATMUL(16, 128, 128, 3969); + +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/matmul_test.cc b/mace/ops/matmul_test.cc index 2c54bd0e269f12b7d43faed34cb2106307d85677..192cfc31f63b77faa791e632239b99b433f9a6ad 100644 --- a/mace/ops/matmul_test.cc +++ b/mace/ops/matmul_test.cc @@ -3,10 +3,13 @@ // #include + #include "mace/core/operator.h" #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { class MatMulOpTest : public OpsTestBase {}; @@ -170,4 +173,7 @@ TEST_F(MatMulOpTest, OPENCLHalfUnAlignedWithBatch) { Complex(16, 32, 64, 64); Complex(31, 31, 61, 67); } + +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index 5d83ce3cf3b60f935baade8fcecd72fefbf58c88..72d97b3e816b8974aa744f027f41603f647c1064 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -22,6 +22,8 @@ #include "mace/utils/utils.h" namespace mace { +namespace ops { +namespace test { class OpDefBuilder { public: @@ -423,6 +425,8 @@ void ImageToBuffer(OpsTestNet *net, net->Sync(); } +} // namespace test +} // namespace ops } // namespace mace #endif // MACE_OPS_OPS_TEST_UTIL_H_ diff --git a/mace/ops/pooling.cc b/mace/ops/pooling.cc index f761f5459ae260ba2352e8a759f002b35aa1df57..166a71ade097b34249b865b9d8ded158f415a22a 100644 --- a/mace/ops/pooling.cc +++ b/mace/ops/pooling.cc @@ -5,6 +5,7 @@ #include "mace/ops/pooling.h" namespace mace { +namespace ops { void Register_Pooling(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("Pooling") @@ -30,4 +31,5 @@ void Register_Pooling(OperatorRegistry *op_registry) { PoolingOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/pooling.h b/mace/ops/pooling.h index d31e9fc96ba771ed451358245c5b4e878961ea98..1033ad6ec32eac56def19a7286dcf29f8979a7bb 100644 --- a/mace/ops/pooling.h +++ b/mace/ops/pooling.h @@ -12,6 +12,7 @@ #include "mace/ops/conv_pool_2d_base.h" namespace mace { +namespace ops { template class PoolingOp : public ConvPool2dOpBase { @@ -46,6 +47,7 @@ class PoolingOp : public ConvPool2dOpBase { OP_OUTPUT_TAGS(OUTPUT); }; +} // namespace ops } // namespace mace #endif // MACE_OPS_POOLING_H_ diff --git a/mace/ops/pooling_benchmark.cc b/mace/ops/pooling_benchmark.cc index f54f9b687e880e08c68d2519b8b3deb663fbc68c..dd5c57a9971291fd6510e5e8ba398b86273683b4 100644 --- a/mace/ops/pooling_benchmark.cc +++ b/mace/ops/pooling_benchmark.cc @@ -9,7 +9,8 @@ #include "mace/ops/ops_test_util.h" namespace mace { -namespace kernels { +namespace ops { +namespace test { template static void Pooling(int iters, @@ -73,5 +74,6 @@ BM_POOLING(1, 3, 257, 257, 2, 2, SAME, MAX); BM_POOLING(1, 3, 513, 513, 2, 2, SAME, MAX); BM_POOLING(1, 3, 1025, 1025, 2, 2, SAME, MAX); -} // namespace kernels +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/pooling_test.cc b/mace/ops/pooling_test.cc index 4297374433e8e33985439a0991a4c3690257d559..dd81a4bb3cbd0b4fb0b7e5f5ee105810ce19df09 100644 --- a/mace/ops/pooling_test.cc +++ b/mace/ops/pooling_test.cc @@ -10,6 +10,8 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { class PoolingOpTest : public OpsTestBase {}; @@ -394,4 +396,6 @@ TEST_F(PoolingOpTest, OPENCLUnAlignedLargeKernelAvgPooling) { Padding::SAME); } +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/reshape.cc b/mace/ops/reshape.cc index df528603537ab05087df3b7713180afb660eeed0..26dff665eeb040893ced6832bfa4b4312b6a4482 100644 --- a/mace/ops/reshape.cc +++ b/mace/ops/reshape.cc @@ -5,6 +5,7 @@ #include "mace/ops/reshape.h" namespace mace { +namespace ops { void Register_Reshape(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("Reshape") @@ -14,4 +15,5 @@ void Register_Reshape(OperatorRegistry *op_registry) { ReshapeOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/reshape.h b/mace/ops/reshape.h index 02b4f575b63648302eed431088251d30e3924a2c..e7f1a80ad5f315d3e873e13ba1c0d9e075fc9234 100644 --- a/mace/ops/reshape.h +++ b/mace/ops/reshape.h @@ -11,6 +11,7 @@ #include "mace/kernels/reshape.h" namespace mace { +namespace ops { template class ReshapeOp : public Operator { @@ -63,6 +64,7 @@ class ReshapeOp : public Operator { OP_OUTPUT_TAGS(OUTPUT); }; +} // namespace ops } // namespace mace #endif // MACE_OPS_RESHAPE_H_ diff --git a/mace/ops/reshape_test.cc b/mace/ops/reshape_test.cc index 0a879da7432173b4b139b01cc9a68cc6b23ceab1..e8c363a61e255f28657b9a7cf0f4955e4dcd279b 100644 --- a/mace/ops/reshape_test.cc +++ b/mace/ops/reshape_test.cc @@ -7,6 +7,8 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { class ReshapeTest : public OpsTestBase {}; @@ -54,4 +56,6 @@ TEST_F(ReshapeTest, Complex) { TestReshape({1, 2, 3, 4}, {1, 3, 8}, {1, 3, 8}); } +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/resize_bilinear.cc b/mace/ops/resize_bilinear.cc index d304c24cd4288821732b32ce547a01724efadc13..a7449a3c6a97ce8dc5fcff666507a33d16e92a4b 100644 --- a/mace/ops/resize_bilinear.cc +++ b/mace/ops/resize_bilinear.cc @@ -5,6 +5,7 @@ #include "mace/ops/resize_bilinear.h" namespace mace { +namespace ops { void Register_ResizeBilinear(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("ResizeBilinear") @@ -26,4 +27,5 @@ void Register_ResizeBilinear(OperatorRegistry *op_registry) { ResizeBilinearOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/resize_bilinear.h b/mace/ops/resize_bilinear.h index a3f9d0753ce384c8c1cef8bc3c4b37f1d26a3eb1..1fc727484920c4092b7e12ca8a8031c562a366a4 100644 --- a/mace/ops/resize_bilinear.h +++ b/mace/ops/resize_bilinear.h @@ -9,6 +9,7 @@ #include "mace/kernels/resize_bilinear.h" namespace mace { +namespace ops { template class ResizeBilinearOp : public Operator { @@ -34,6 +35,7 @@ class ResizeBilinearOp : public Operator { kernels::ResizeBilinearFunctor functor_; }; +} // namespace ops } // namespace mace #endif // MACE_OPS_RESIZE_BILINEAR_H_ diff --git a/mace/ops/resize_bilinear_benchmark.cc b/mace/ops/resize_bilinear_benchmark.cc index f93530eca59c911e368f32e45d2340221afd7f40..aa66d346c983db82f20074395e1155745cd0d18f 100644 --- a/mace/ops/resize_bilinear_benchmark.cc +++ b/mace/ops/resize_bilinear_benchmark.cc @@ -8,6 +8,9 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { + template static void ResizeBilinearBenchmark(int iters, int batch, @@ -86,4 +89,6 @@ BM_RESIZE_BILINEAR(1, 128, 240, 240, 480, 480); BM_RESIZE_BILINEAR(1, 3, 4032, 3016, 480, 480); BM_RESIZE_BILINEAR(1, 3, 480, 480, 4032, 3016); +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/resize_bilinear_test.cc b/mace/ops/resize_bilinear_test.cc index f5f1a720907e3bfb32017b503a3a92a8e087878c..896fe630842c524d77c5a2066792408bffca492f 100644 --- a/mace/ops/resize_bilinear_test.cc +++ b/mace/ops/resize_bilinear_test.cc @@ -4,11 +4,13 @@ #include -#include "mace/ops/resize_bilinear.h" #include "mace/core/operator.h" +#include "mace/ops/resize_bilinear.h" #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { class ResizeBilinearTest : public OpsTestBase {}; @@ -63,9 +65,8 @@ TEST_F(ResizeBilinearTest, ResizeBilinearWAlignCorners) { template void TestRandomResizeBilinear() { - unsigned int seed = time(nullptr); testing::internal::LogToStderr(); - + static unsigned int seed = time(NULL); for (int round = 0; round < 10; ++round) { int batch = 1 + rand_r(&seed) % 5; int channels = 1 + rand_r(&seed) % 100; @@ -108,7 +109,7 @@ void TestRandomResizeBilinear() { ImageToBuffer(&net, "OutputImage", "DeviceOutput", kernels::BufferType::IN_OUT_CHANNEL); } else { - // TODO(yejianwu) support NEON + // TODO(someone): support NEON } // Check ExpectTensorNear(expected, *net.GetOutput("DeviceOutput"), 0.001); @@ -125,4 +126,6 @@ TEST_F(ResizeBilinearTest, OPENCLRandomResizeBilinear) { TestRandomResizeBilinear(); } +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/slice.cc b/mace/ops/slice.cc index 6de3da403fca90031c76597c31126f742bf8ba5f..d482b0288bbcaaa8d7144ad98de10b50d4c7db40 100644 --- a/mace/ops/slice.cc +++ b/mace/ops/slice.cc @@ -5,6 +5,7 @@ #include "mace/ops/slice.h" namespace mace { +namespace ops { void Register_Slice(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("Slice") @@ -25,4 +26,5 @@ void Register_Slice(OperatorRegistry *op_registry) { SliceOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/slice.h b/mace/ops/slice.h index 2e1ba6de53b6e05c6268cddc1cbce860dc848748..a1a6ad112578c41f5966a0a61951a730c50cf245 100644 --- a/mace/ops/slice.h +++ b/mace/ops/slice.h @@ -9,7 +9,9 @@ #include "mace/core/operator.h" #include "mace/kernels/slice.h" + namespace mace { +namespace ops { template class SliceOp : public Operator { @@ -36,6 +38,7 @@ class SliceOp : public Operator { OP_INPUT_TAGS(INPUT); }; +} // namespace ops } // namespace mace #endif // MACE_OPS_SLICE_H_ diff --git a/mace/ops/slice_benchmark.cc b/mace/ops/slice_benchmark.cc index 8a3fa0c94958518084bba35f855bc86bcb977f63..a38c995d7f6c46d493d511d5f10ef1b9a1790f09 100644 --- a/mace/ops/slice_benchmark.cc +++ b/mace/ops/slice_benchmark.cc @@ -7,6 +7,9 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { + template static void BMSliceHelper(int iters, const std::vector &input_shape, @@ -79,5 +82,6 @@ BM_SLICE(1, 32, 32, 256, 2); BM_SLICE(1, 128, 128, 32, 2); BM_SLICE(1, 128, 128, 128, 2); - +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/slice_test.cc b/mace/ops/slice_test.cc index dc900d67bb99c08a8b847c93cb8173fb28aad87b..bd0244d3cd9453785c30a219c6011c82edd2bd84 100644 --- a/mace/ops/slice_test.cc +++ b/mace/ops/slice_test.cc @@ -5,17 +5,19 @@ #include #include +#include "gmock/gmock.h" #include "mace/ops/slice.h" #include "mace/ops/ops_test_util.h" -#include "gmock/gmock.h" namespace mace { +namespace ops { +namespace test { class SliceOpTest : public OpsTestBase {}; template void RandomTest(const int num_outputs) { - unsigned int seed = time(nullptr); + static unsigned int seed = time(NULL); const index_t output_channels = 4 * (1 + rand_r(&seed) % 10); const index_t input_channels = num_outputs * output_channels; const index_t batch = 3 + (rand_r(&seed) % 10); @@ -108,4 +110,6 @@ TEST_F(SliceOpTest, OPENCLHalf) { RandomTest(11); } +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/softmax.cc b/mace/ops/softmax.cc index 7b68e76241e9df00d3f1bf048ee7a5079ebed29d..6b8ead8164736f59a59b6079992c7bbd28a8b1b8 100644 --- a/mace/ops/softmax.cc +++ b/mace/ops/softmax.cc @@ -5,6 +5,7 @@ #include "mace/ops/softmax.h" namespace mace { +namespace ops { void Register_Softmax(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("Softmax") @@ -26,4 +27,5 @@ void Register_Softmax(OperatorRegistry *op_registry) { SoftmaxOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/softmax.h b/mace/ops/softmax.h index c092aebc4999f55ad377e8e8ecdb1369f457dbea..0aedaa3cadf155b3dffe99576007d56f5c2cd6c7 100644 --- a/mace/ops/softmax.h +++ b/mace/ops/softmax.h @@ -9,6 +9,7 @@ #include "mace/kernels/softmax.h" namespace mace { +namespace ops { template class SoftmaxOp : public Operator { @@ -34,6 +35,7 @@ class SoftmaxOp : public Operator { OP_OUTPUT_TAGS(OUTPUT); }; +} // namespace ops } // namespace mace #endif // MACE_OPS_SOFTMAX_H_ diff --git a/mace/ops/softmax_benchmark.cc b/mace/ops/softmax_benchmark.cc index fd77b7af5fd028ed77fdeb5b1db570572b93b8c2..fb6dc4ef27ae2ae6904d5a598a70484724fd24ef 100644 --- a/mace/ops/softmax_benchmark.cc +++ b/mace/ops/softmax_benchmark.cc @@ -3,11 +3,15 @@ // #include + #include "mace/core/operator.h" #include "mace/core/testing/test_benchmark.h" #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { + template static void SoftmaxBenchmark( int iters, int batch, int channels, int height, int width) { @@ -66,4 +70,7 @@ BM_SOFTMAX(1, 3, 512, 512); BM_SOFTMAX(1, 4, 512, 512); BM_SOFTMAX(1, 10, 256, 256); BM_SOFTMAX(1, 1024, 7, 7); + +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/softmax_test.cc b/mace/ops/softmax_test.cc index cb994ba828b976fb25ce05719e8d8ab01b25e1a6..b4ad23097db6474df978f1537572aecac11b6dfa 100644 --- a/mace/ops/softmax_test.cc +++ b/mace/ops/softmax_test.cc @@ -6,6 +6,8 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { class SoftmaxOpTest : public OpsTestBase {}; @@ -102,4 +104,6 @@ TEST_F(SoftmaxOpTest, OPENCLUnAligned) { Complex({5, 211, 107, 1}); } +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/space_to_batch.cc b/mace/ops/space_to_batch.cc index 89957f67bd838f910535a5bfceb64d2976c8876a..009f4abe37589af4d3ac5f9889c5652086d3b02f 100644 --- a/mace/ops/space_to_batch.cc +++ b/mace/ops/space_to_batch.cc @@ -5,6 +5,7 @@ #include "mace/ops/space_to_batch.h" namespace mace { +namespace ops { void Register_SpaceToBatchND(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("SpaceToBatchND") @@ -19,4 +20,5 @@ void Register_SpaceToBatchND(OperatorRegistry *op_registry) { SpaceToBatchNDOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/space_to_batch.h b/mace/ops/space_to_batch.h index 35cddf5d77913e6ce613ced0ff9483f2496994d3..dd051fa92ffc56602fb9e27c9f175065cff1a2fe 100644 --- a/mace/ops/space_to_batch.h +++ b/mace/ops/space_to_batch.h @@ -12,6 +12,7 @@ #include "mace/kernels/space_to_batch.h" namespace mace { +namespace ops { template class SpaceToBatchNDOp : public Operator { @@ -72,6 +73,7 @@ class SpaceToBatchNDOp : public Operator { OP_OUTPUT_TAGS(OUTPUT); }; +} // namespace ops } // namespace mace #endif // MACE_OPS_SPACE_TO_BATCH_H_ diff --git a/mace/ops/space_to_batch_benchmark.cc b/mace/ops/space_to_batch_benchmark.cc index 7829a0439500272e4145eacb93c3fa36c5ae0a0e..62a4f7ddf5c51dd29266321f5e3ec779ed00edde 100644 --- a/mace/ops/space_to_batch_benchmark.cc +++ b/mace/ops/space_to_batch_benchmark.cc @@ -7,6 +7,9 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { + template static void BMSpaceToBatch( int iters, int batch, int height, int width, int channels, int shape) { @@ -55,4 +58,7 @@ static void BMSpaceToBatch( BM_SPACE_TO_BATCH(128, 16, 16, 128, 2); BM_SPACE_TO_BATCH(1, 256, 256, 32, 2); BM_SPACE_TO_BATCH(1, 256, 256, 32, 4); + +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/space_to_batch_test.cc b/mace/ops/space_to_batch_test.cc index b20ca84da12c8f38b43c25e7b7df6cd98cc6788b..452a9638c14134cfbbb37feb0d7236798d6980a8 100644 --- a/mace/ops/space_to_batch_test.cc +++ b/mace/ops/space_to_batch_test.cc @@ -3,10 +3,13 @@ // #include + #include "gtest/gtest.h" #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { template void RunSpaceToBatch(const std::vector &input_shape, @@ -217,4 +220,6 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) { // space_tensor.get()); //} +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/winograd_convolution_test.cc b/mace/ops/winograd_convolution_test.cc index 6965e4e957bd571013065566c19e3308abce3f98..1f335c6bd9642adc20039f16a1b2158ff97f9cab 100644 --- a/mace/ops/winograd_convolution_test.cc +++ b/mace/ops/winograd_convolution_test.cc @@ -3,11 +3,14 @@ // #include + #include "mace/core/operator.h" #include "mace/kernels/conv_pool_2d_util.h" #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { class WinogradConvlutionTest : public OpsTestBase {}; @@ -40,7 +43,7 @@ void WinogradConvolution(const index_t batch, const index_t in_channels, const index_t out_channels, const Padding padding) { - srand(time(NULL)); + // srand(time(NULL)); // Construct graph OpsTestNet net; @@ -157,7 +160,7 @@ void WinogradConvolutionWithPad(const index_t batch, const index_t in_channels, const index_t out_channels, const int padding) { - srand(time(NULL)); + // srand(time(NULL)); // Construct graph OpsTestNet net; @@ -246,9 +249,6 @@ void WinogradConvolutionWithPad(const index_t batch, } } -TEST_F(WinogradConvlutionTest, UnAlignedConvolutionPad2) { - WinogradConvolutionWithPad(1, 64, 64, 40, 19, 2); - WinogradConvolutionWithPad(1, 32, 32, 96, 109, 2); -} - +} // namespace test +} // namespace ops } // namespace mace diff --git a/mace/ops/winograd_inverse_transform.cc b/mace/ops/winograd_inverse_transform.cc index 677fe3e7905bac6c553e794bd28c998627faff08..47d1148b78d14ec3bb3cda183d645bae1ac59599 100644 --- a/mace/ops/winograd_inverse_transform.cc +++ b/mace/ops/winograd_inverse_transform.cc @@ -5,6 +5,7 @@ #include "mace/ops/winograd_inverse_transform.h" namespace mace { +namespace ops { void Register_WinogradInverseTransform(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("WinogradInverseTransform") @@ -19,4 +20,5 @@ void Register_WinogradInverseTransform(OperatorRegistry *op_registry) { WinogradInverseTransformOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/winograd_inverse_transform.h b/mace/ops/winograd_inverse_transform.h index c53c1efee8b8100841ff43e7bdacb38188e42ef6..55be83b70e983abc1fed4623dfd21ed3018233f1 100644 --- a/mace/ops/winograd_inverse_transform.h +++ b/mace/ops/winograd_inverse_transform.h @@ -13,6 +13,7 @@ #include "mace/kernels/winograd_transform.h" namespace mace { +namespace ops { template class WinogradInverseTransformOp : public Operator { @@ -43,6 +44,7 @@ class WinogradInverseTransformOp : public Operator { OP_OUTPUT_TAGS(OUTPUT); }; +} // namespace ops } // namespace mace #endif // MACE_OPS_WINOGRAD_INVERSE_TRANSFORM_H_ diff --git a/mace/ops/winograd_transform.cc b/mace/ops/winograd_transform.cc index e7cd86882a73ca967bfbfb587181cd6ae57ad70a..f8aa4f621a137d6b87337951fd7896c20ec0a3dc 100644 --- a/mace/ops/winograd_transform.cc +++ b/mace/ops/winograd_transform.cc @@ -5,6 +5,7 @@ #include "mace/ops/winograd_transform.h" namespace mace { +namespace ops { void Register_WinogradTransform(OperatorRegistry *op_registry) { REGISTER_OPERATOR(op_registry, OpKeyBuilder("WinogradTransform") @@ -19,4 +20,5 @@ void Register_WinogradTransform(OperatorRegistry *op_registry) { WinogradTransformOp); } +} // namespace ops } // namespace mace diff --git a/mace/ops/winograd_transform.h b/mace/ops/winograd_transform.h index e225adc768570345680c7d93039c8db2ca7738d6..c9476fc80e642451394270ac6350cdab8f1c4602 100644 --- a/mace/ops/winograd_transform.h +++ b/mace/ops/winograd_transform.h @@ -11,6 +11,7 @@ #include "mace/kernels/winograd_transform.h" namespace mace { +namespace ops { template class WinogradTransformOp : public Operator { @@ -37,6 +38,7 @@ class WinogradTransformOp : public Operator { OP_OUTPUT_TAGS(OUTPUT); }; +} // namespace ops } // namespace mace #endif // MACE_OPS_WINOGRAD_TRANSFORM_H_ diff --git a/mace/ops/winograd_transform_benchmark.cc b/mace/ops/winograd_transform_benchmark.cc index e0025d1ef0948399fe6dba637d8e30a8232692fa..bd20ae9f28ac28f664944a735162569d4c0a61d4 100644 --- a/mace/ops/winograd_transform_benchmark.cc +++ b/mace/ops/winograd_transform_benchmark.cc @@ -7,6 +7,9 @@ #include "mace/ops/ops_test_util.h" namespace mace { +namespace ops { +namespace test { + template static void BMWinogradTransform( int iters, int batch, int height, int width, int channels) { @@ -105,4 +108,6 @@ BM_WINOGRAD_INVERSE_TRANSFORM(1, 14, 14, 32); BM_WINOGRAD_INVERSE_TRANSFORM(1, 62, 62, 32); BM_WINOGRAD_INVERSE_TRANSFORM(1, 126, 126, 32); +} // namespace test +} // namespace ops } // namespace mace diff --git a/tools/benchmark.sh b/tools/benchmark.sh index dbcc862c431a188b8bd55cd4fe93ba0703a9f296..409bfeb4a2b368de06af136f9846e09f13301fc4 100644 --- a/tools/benchmark.sh +++ b/tools/benchmark.sh @@ -18,7 +18,13 @@ OPTION_ARGS=$3 echo $OPTION_ARGS -DEVICE_ID=`echo_device_id_by_soc $TARGET_SOC` +RESULT_VALUE=`echo_device_id_by_soc $TARGET_SOC` +if [ $? -ne 0 ]; then + echo $RESULT_VALUE + exit 1 +else + DEVICE_ID=$RESULT_VALUE +fi if [ -f "$MODEL_OUTPUT_DIR/benchmark_model" ]; then rm -rf $MODEL_OUTPUT_DIR/benchmark_model diff --git a/tools/build_run_throughput_test.sh b/tools/build_run_throughput_test.sh index aa1bf074ecb86451c086bdc422a80c9f0e3f1f9c..ef148e6ab3cd954b99a9f653ea299e7086708ab9 100644 --- a/tools/build_run_throughput_test.sh +++ b/tools/build_run_throughput_test.sh @@ -17,7 +17,13 @@ RUN_SECONDS=$2 MERGED_LIB_FILE=$3 MODEL_INPUT_DIR=$4 -DEVICE_ID=`echo_device_id_by_soc $TARGET_SOC` +RESULT_VALUE=`echo_device_id_by_soc $TARGET_SOC` +if [ $? -ne 0 ]; then + echo $RESULT_VALUE + exit 1 +else + DEVICE_ID=$RESULT_VALUE +fi if [ "$CPU_MODEL_TAG" != '' ]; then CPU_MODEL_TAG_BUILD_FLAGS="--copt=-DMACE_CPU_MODEL_TAG=${CPU_MODEL_TAG}" diff --git a/tools/clear_env.sh b/tools/clear_env.sh index cc8a6f00ef0a30fee992c17d23ac0ddf60e335bc..ef14f55bf2d6433b8134258d12e8cf24c91f3efb 100644 --- a/tools/clear_env.sh +++ b/tools/clear_env.sh @@ -13,7 +13,13 @@ CURRENT_DIR=`dirname $0` source ${CURRENT_DIR}/env.sh TARGET_SOC=$1 -DEVICE_ID=`echo_device_id_by_soc $TARGET_SOC` +RESULT_VALUE=`echo_device_id_by_soc $TARGET_SOC` +if [ $? -ne 0 ]; then + echo $RESULT_VALUE + exit 1 +else + DEVICE_ID=$RESULT_VALUE +fi if [ x"$TARGET_ABI" != x"host" ]; then adb -s $DEVICE_ID shell rm -rf $PHONE_DATA_DIR || exit 1 diff --git a/tools/env.sh b/tools/env.sh index 254ddd6982b09e5464d5e6762e151520168f2a90..f48787a8956ac79349379207b054b0e7c4723e5f 100644 --- a/tools/env.sh +++ b/tools/env.sh @@ -40,6 +40,10 @@ echo_device_id_by_soc() device_soc=`adb -s ${device} shell getprop | grep ro.board.platform | cut -d [ -f3 | cut -d ] -f1` if [ x"$TARGET_SOC" = x"$device_soc" ]; then echo "$device" + return 0 fi done + + echo "MACE ERROR: Not found device with soc ${TARGET_SOC}" + return 1 } diff --git a/tools/generate_production_code.sh b/tools/generate_production_code.sh index c713b47aaaab39beaaaa66a9fddc8e9ac92fb097..f4f021c4e930e50a5c969568e0ecda0891357914 100644 --- a/tools/generate_production_code.sh +++ b/tools/generate_production_code.sh @@ -16,7 +16,13 @@ TARGET_SOC=$1 CL_BIN_DIRS=$2 PULL_OR_NOT=$3 -DEVICE_ID=`echo_device_id_by_soc $TARGET_SOC` +RESULT_VALUE=`echo_device_id_by_soc $TARGET_SOC` +if [ $? -ne 0 ]; then + echo $RESULT_VALUE + exit 1 +else + DEVICE_ID=$RESULT_VALUE +fi if [ "$PULL_OR_NOT" = 1 ]; then CL_BIN_DIR=${CL_BIN_DIRS} diff --git a/tools/tuning_run.sh b/tools/tuning_run.sh index 1d9f57e5d74f3261b3b13b76a825d097ace1f8d9..c4e8dbe99e403b6d067b4a802022f341e6a49141 100644 --- a/tools/tuning_run.sh +++ b/tools/tuning_run.sh @@ -22,7 +22,13 @@ OPTION_ARGS=$7 echo $OPTION_ARGS -DEVICE_ID=`echo_device_id_by_soc $TARGET_SOC` +RESULT_VALUE=`echo_device_id_by_soc $TARGET_SOC` +if [ $? -ne 0 ]; then + echo $RESULT_VALUE + exit 1 +else + DEVICE_ID=$RESULT_VALUE +fi if [ x"$TARGET_ABI" = x"host" ]; then MACE_CPP_MIN_VLOG_LEVEL=$VLOG_LEVEL \ diff --git a/tools/validate_tools.sh b/tools/validate_tools.sh index 7e6159c4b7a1e1349a469822c761283301192c37..1001ed80e64709db806096425fa387e15f9fd78d 100644 --- a/tools/validate_tools.sh +++ b/tools/validate_tools.sh @@ -16,7 +16,13 @@ TARGET_SOC=$1 MODEL_OUTPUT_DIR=$2 GENERATE_DATA_OR_NOT=$3 -DEVICE_ID=`echo_device_id_by_soc $TARGET_SOC` +RESULT_VALUE=`echo_device_id_by_soc $TARGET_SOC` +if [ $? -ne 0 ]; then + echo $RESULT_VALUE + exit 1 +else + DEVICE_ID=$RESULT_VALUE +fi IFS=',' read -r -a INPUT_NAMES <<< "${INPUT_NODES}" IFS=',' read -r -a OUTPUT_NAMES <<< "${OUTPUT_NODES}" diff --git a/tools/wino_conv.py b/tools/wino_conv.py index a8cdf3d8e88586b10dd3256de3670978c2a2e5f2..0dc3f8d611e32c6cf931bd8ec9228cb8a25408ab 100644 --- a/tools/wino_conv.py +++ b/tools/wino_conv.py @@ -2,22 +2,89 @@ import numpy as np import math import tensorflow as tf -A_T = np.array([[1, 1, 1, 0], [0, 1, -1, -1]]).astype(np.float32) -A = np.transpose(A_T) -B_T = np.array([ +A_T = {} +A = {} +B_T = {} +B = {} +G = {} +G_T = {} +# f(2, 3) +A_T[4] = np.array([[1, 1, 1, 0], [0, 1, -1, -1]]).astype(np.float32) +A[4] = np.transpose(A_T[4]) +B_T[4] = np.array([ [1, 0, -1, 0], [0, 1, 1, 0], [0, -1, 1, 0], [0, 1, 0, -1] ]).astype(np.float32) -B = np.transpose(B_T) -G = np.array([ +B[4] = np.transpose(B_T[4]) +G[4] = np.array([ [1, 0, 0], [0.5, 0.5, 0.5], [0.5, -0.5, 0.5], [0, 0, 1], ]).astype(np.float32) -G_T = np.transpose(G) +G_T[4] = np.transpose(G[4]) + +# f(4, 3) +A_T[6] = np.array([ + [1, 1, 1, 1, 1, 0], + [0, 1, -1, 2, -2, 0], + [0, 1, 1, 4, 4, 0], + [0, 1, -1, 8, -8, 1], +]).astype(np.float32) +A[6] = np.transpose(A_T[6]) +B_T[6] = np.array([ + [4, 0, -5, 0, 1, 0], + [0, -4, -4, 1, 1, 0], + [0, 4, -4, -1, 1, 0], + [0, -2, -1, 2, 1, 0], + [0, 2, -1, -2, 1, 0], + [0, 4, 0, -5, 0, 1], +]).astype(np.float32) +B[6] = np.transpose(B_T[6]) +G[6] = np.array([ + [1/4.0 , 0 , 0 ], + [-1/6.0, -1/6.0 , -1/6.0], + [-1/6.0, 1/6.0 , -1/6.0], + [1/24.0, 1/12.0 , 1/6.0 ], + [1/24.0, -1/12.0, 1/6.0 ], + [ 0 , 0 , 1 ], +]).astype(np.float32) +G_T[6] = np.transpose(G[6]) + +# f(6, 3) +A_T[8] = np.array([ + [1, 1, 1 , 1 , 1 , 1 , 1 , 0], + [0, 1, -1, 2 , -2 , 1/2. , -1/2. , 0], + [0, 1, 1 , 4 , 4 , 1/4. , 1/4. , 0], + [0, 1, -1, 8 , -8 , 1/8. , -1/8. , 0], + [0, 1, 1 , 16, 16 , 1/16., 1/16. , 0], + [0, 1, -1, 32, -32, 1/32., -1/32., 1], +]).astype(np.float32) +A[8] = np.transpose(A_T[8]) +B_T[8] = np.array([ + [1, 0 , -21/4., 0 , 21/4., 0 , -1, 0], + [0, 1 , 1 , -17/4., -17/4., 1 , 1 , 0], + [0, -1 , 1 , 17/4. , -17/4., -1 , 1 , 0], + [0, 1/2. , 1/4. , -5/2. , -5/4., 2 , 1 , 0], + [0, -1/2., 1/4. , 5/2. , -5/4., -2 , 1 , 0], + [0, 2 , 4 , -5/2. , -5 , 1/2. , 1 , 0], + [0, -2 , 4 , 5/2. , -5 , -1/2. , 1 , 0], + [0, -1 , 0 , 21/4. , 0 , -21/4., 0 , 1], +]).astype(np.float32) +B[8] = np.transpose(B_T[8]) +G[8] = np.array([ + [ 1 , 0 , 0 ], + [-2/9. , -2/9. , -2/9.], + [-2/9. , 2/9. , -2/9.], + [1/90. , 1/45. , 2/45.], + [1/90. , -1/45. , 2/45.], + [32/45., 16/45. , 8/45.], + [32/45., -16/45., 8/45.], + [ 0 , 0 , 1 ], +]).astype(np.float32) +G_T[8] = np.transpose(G[8]) def output_shape(input_shape, filter_shape): @@ -29,55 +96,54 @@ def output_shape(input_shape, filter_shape): return out_shape -def winog_conv(input, filter): - m = 2 - r = 3 +def winograd_conv(m, r, input, filter): alpha = m + r - 1 + print 'Winograd(m = %d, r = %d, tile size=%d' % (m, r, alpha) + alpha_square = alpha * alpha input_shape = input.shape filter_shape = filter.shape out_shape = output_shape(input_shape, filter_shape) K = filter_shape[0] C = input_shape[1] - U = np.zeros((K * 16, C)) + U = np.zeros((K * alpha_square, C)) for k in range(K): for c in range(C): - u = np.dot(np.dot(G, filter[k, c, :, :]), G_T) - for i in range(4): - for j in range(4) : - U[(i * 4 + j) * K + k, c] = u[i, j] + u = np.dot(np.dot(G[alpha], filter[k, c, :, :]), G_T[alpha]) + for i in range(alpha): + for j in range(alpha) : + U[(i * alpha + j) * K + k, c] = u[i, j] print 'filter out: ', U.shape - print U[0, 0] - U.astype(np.float32).tofile("filter_out") - rounded_h = int(math.ceil(out_shape[2] / 2.0)) - rounded_w = int(math.ceil(out_shape[3] / 2.0)) + rounded_h = int(math.ceil(out_shape[2] / (m * 1.0))) + rounded_w = int(math.ceil(out_shape[3] / (m * 1.0))) P = input_shape[0] * rounded_h * rounded_w - V = np.zeros((C * 16, P)) + V = np.zeros((C * alpha_square, P)) for p in range(P): for c in range(C): n = p / (rounded_w * rounded_h) t = p % (rounded_h * rounded_w) h_idx = t / rounded_w w_idx = t % rounded_w - h_start = h_idx * 2 - w_start = w_idx * 2 - h_end = min(h_start+4, input_shape[2]) - w_end = min(w_start+4, input_shape[3]) - d = np.zeros((4, 4)) - d[0:h_end-h_start, 0:w_end-w_start] = input[n, c, h_start:h_end, w_start:w_end] - v = np.dot(np.dot(B_T, d), B) - for i in range(4): - for j in range(4): - V[(i*4+j)*C + c, p] = v[i, j] - - tmp = V.reshape(16, C, P, 1) + h_start = h_idx * m + w_start = w_idx * m + h_end = min(h_start+alpha, input_shape[2]) + w_end = min(w_start+alpha, input_shape[3]) + d = np.zeros((alpha, alpha)) + d[0:h_end-h_start, 0:w_end-w_start] = \ + input[n, c, h_start:h_end, w_start:w_end] + v = np.dot(np.dot(B_T[alpha], d), B[alpha]) + for i in range(alpha): + for j in range(alpha): + V[(i*alpha+j)*C + c, p] = v[i, j] + + tmp = V.reshape(alpha_square, C, P, 1) print 'input out: ', tmp.shape tmp.astype(np.float32).tofile("C") - M = np.zeros((16 * K, P)) - for i in range(alpha * alpha): + M = np.zeros((alpha_square * K, P)) + for i in range(alpha_square): u = U[i * K : (i+1) * K, :] v = V[i * C : (i+1) * C, :] M[i * K : (i+1) * K, :] = np.dot(u, v) @@ -87,17 +153,17 @@ def winog_conv(input, filter): res = np.zeros((out_shape[0], out_shape[2], out_shape[3], out_shape[1])) for k in range(K): for b in range(P): - m = np.zeros((4, 4)) - for i in range(4): - for j in range(4): - m[i][j] = M[(i*4+j) * K + k, b] - y = np.dot(np.dot(A_T, m), A) - for i in range(2): - for j in range(2): + tm = np.zeros((alpha, alpha)) + for i in range(alpha): + for j in range(alpha): + tm[i][j] = M[(i*alpha+j) * K + k, b] + y = np.dot(np.dot(A_T[alpha], tm), A[alpha]) + for i in range(m): + for j in range(m): n = b / (rounded_h * rounded_w) t = b % (rounded_h * rounded_w) - p = (t / rounded_w) * 2 + i - q = (t % rounded_w) * 2 + j + p = (t / rounded_w) * m + i + q = (t % rounded_w) * m + j if p >= out_shape[2] or q >= out_shape[3]: continue res[n, p, q, k] = y[i, j] @@ -115,25 +181,27 @@ def tf_conv(input, filter): def main(): - input = np.random.random([7, 61, 71, 31]).astype(np.float32) + input = np.random.random([5, 23, 29, 15]).astype(np.float32) # input = np.fromfile(file="A", dtype=np.float32) # input = input.reshape(1, 3, 3, 5) print 'input shape: ', input.shape - input.tofile("A") - filter = np.random.random([3, 3, 31, 31]).astype(np.float32) + # input.tofile("A") + filter = np.random.random([3, 3, 15, 13]).astype(np.float32) tf_out = tf_conv(input, filter) input = input.transpose((0, 3, 1, 2)) filter = filter.transpose((3, 2, 0, 1)) print 'filter shape: ', filter.shape - filter.tofile("filter_in") - winog_out = winog_conv(input, filter) - res = np.allclose(tf_out, winog_out) - if res: - print "=========Pass=========" - else: - print "=========Failed=========" - print "TF: ", tf_out - print "Winograd: ", winog_out + # filter.tofile("filter_in") + for i in [2, 4, 6]: + print "==========f(%d,3)==========" % i + winograd_out = winograd_conv(i, 3, input, filter) + res = np.allclose(tf_out, winograd_out) + if res: + print "=========Pass=========" + else: + print "=========Failed=======" + print "TF: ", tf_out + print "Winograd: ", winograd_out if __name__ == '__main__':