From 33415ee93336cd1648d8ca4c3ec4548aa31ab579 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=AF=85?= Date: Mon, 28 May 2018 17:40:27 +0800 Subject: [PATCH] Return mace status for allocate --- mace/core/mace.cc | 8 +- mace/core/net.cc | 6 +- mace/core/workspace.cc | 19 +- mace/kernels/addn.h | 2 +- mace/kernels/arm/conv_2d_neon.h | 102 +-- mace/kernels/arm/conv_2d_neon_15x1.cc | 20 +- mace/kernels/arm/conv_2d_neon_1x1.cc | 8 +- mace/kernels/arm/conv_2d_neon_1x15.cc | 20 +- mace/kernels/arm/conv_2d_neon_1x7.cc | 25 +- mace/kernels/arm/conv_2d_neon_3x3.cc | 69 +- mace/kernels/arm/conv_2d_neon_5x5.cc | 123 ++-- mace/kernels/arm/conv_2d_neon_7x1.cc | 26 +- mace/kernels/arm/conv_2d_neon_7x7.cc | 331 +++++---- mace/kernels/arm/conv_winograd.cc | 192 ++---- mace/kernels/arm/conv_winograd_test.cc | 40 +- mace/kernels/arm/depthwise_conv2d_neon.h | 18 +- mace/kernels/arm/depthwise_conv2d_neon_3x3.cc | 202 ++---- mace/kernels/channel_shuffle.h | 2 +- mace/kernels/concat.h | 2 +- mace/kernels/conv_2d.h | 6 +- mace/kernels/conv_pool_2d_util.cc | 6 +- mace/kernels/deconv_2d.h | 4 +- mace/kernels/depth_to_space.h | 2 +- mace/kernels/depthwise_conv2d.h | 2 +- mace/kernels/eltwise.h | 2 +- mace/kernels/fully_connected.h | 2 +- mace/kernels/matmul.h | 2 +- mace/kernels/opencl/activation.cc | 38 +- mace/kernels/opencl/addn.cc | 10 +- mace/kernels/opencl/batch_norm.cc | 21 +- mace/kernels/opencl/bias_add.cc | 13 +- mace/kernels/opencl/buffer_to_image.cc | 12 +- mace/kernels/opencl/channel_shuffle.cc | 29 +- mace/kernels/opencl/concat.cc | 73 +- mace/kernels/opencl/conv_2d.cc | 120 ++-- mace/kernels/opencl/conv_2d_1x1.cc | 51 +- mace/kernels/opencl/conv_2d_3x3.cc | 60 +- mace/kernels/opencl/conv_2d_general.cc | 56 +- mace/kernels/opencl/deconv_2d_opencl.cc | 96 +-- mace/kernels/opencl/depth_to_space.cc | 15 +- mace/kernels/opencl/depthwise_conv.cc | 61 +- mace/kernels/opencl/eltwise.cc | 37 +- mace/kernels/opencl/fully_connected.cc | 76 ++- mace/kernels/opencl/helper.cc | 52 +- mace/kernels/opencl/helper.h | 5 +- mace/kernels/opencl/image_to_buffer.cc | 10 +- mace/kernels/opencl/matmul.cc | 17 +- .../kernels/opencl/out_of_range_check_test.cc | 20 +- mace/kernels/opencl/pad.cc | 44 +- mace/kernels/opencl/pooling.cc | 24 +- mace/kernels/opencl/resize_bilinear.cc | 16 +- mace/kernels/opencl/slice.cc | 34 +- mace/kernels/opencl/softmax.cc | 18 +- mace/kernels/opencl/space_to_batch.cc | 27 +- mace/kernels/opencl/winograd_transform.cc | 21 +- mace/kernels/pad.h | 2 +- mace/kernels/pooling.h | 2 +- mace/kernels/proposal.h | 2 +- mace/kernels/psroi_align.h | 2 +- mace/kernels/resize_bilinear.h | 2 +- mace/kernels/slice.h | 2 +- mace/kernels/space_to_batch.h | 4 +- mace/ops/BUILD | 58 +- mace/ops/activation.h | 6 +- mace/ops/activation_test.cc | 9 +- mace/ops/addn_test.cc | 8 +- mace/ops/batch_norm.h | 2 +- mace/ops/batch_norm_test.cc | 269 ++++---- mace/ops/batch_to_space.h | 3 +- mace/ops/bias_add.h | 2 +- mace/ops/bias_add_test.cc | 58 +- mace/ops/buffer_to_image_test.cc | 14 +- mace/ops/channel_shuffle_test.cc | 20 +- mace/ops/concat_test.cc | 14 +- mace/ops/conv_2d_test.cc | 637 ++++++++---------- mace/ops/conv_pool_2d_base.h | 3 +- mace/ops/core_test.cc | 5 +- mace/ops/deconv_2d_test.cc | 464 +++++-------- mace/ops/depth_to_space_test.cc | 131 ++-- mace/ops/depthwise_conv2d_test.cc | 206 +++--- mace/ops/eltwise.h | 14 +- mace/ops/eltwise_test.cc | 545 ++++++--------- mace/ops/folded_batch_norm.h | 2 +- mace/ops/folded_batch_norm_test.cc | 193 +++--- mace/ops/fully_connected.h | 46 +- mace/ops/fully_connected_test.cc | 98 ++- mace/ops/local_response_norm.h | 5 +- mace/ops/local_response_norm_test.cc | 22 +- mace/ops/matmul_test.cc | 24 +- mace/ops/ops_test_util.h | 152 +++-- mace/ops/pad.h | 3 +- mace/ops/pad_test.cc | 75 +-- mace/ops/pooling_test.cc | 354 +++++----- mace/ops/proposal_test.cc | 15 +- mace/ops/quantize.cc | 6 +- mace/ops/quantize.h | 42 +- mace/ops/quantize_test.cc | 154 ++--- mace/ops/resize_bilinear.h | 5 +- mace/ops/resize_bilinear_test.cc | 51 +- mace/ops/slice.h | 4 +- mace/ops/slice_test.cc | 38 +- mace/ops/softmax.h | 2 +- mace/ops/softmax_test.cc | 35 +- mace/ops/space_to_batch.h | 10 +- mace/ops/space_to_batch_test.cc | 147 ++-- mace/ops/space_to_depth.h | 16 +- mace/ops/transpose.cc | 6 +- mace/ops/transpose.h | 16 +- mace/ops/transpose_test.cc | 39 +- mace/ops/winograd_convolution_test.cc | 49 +- mace/public/mace.h | 2 +- 111 files changed, 2763 insertions(+), 3629 deletions(-) diff --git a/mace/core/mace.cc b/mace/core/mace.cc index bd834ecf..36401a9b 100644 --- a/mace/core/mace.cc +++ b/mace/core/mace.cc @@ -155,13 +155,13 @@ MaceStatus MaceEngine::Impl::Init( } } else { #endif - MACE_FAILURE_RETURN(ws_->LoadModelTensor( + MACE_RETURN_IF_ERROR(ws_->LoadModelTensor( *net_def, device_type_, model_data)); // Init model auto net = CreateNet(op_registry_, *net_def, ws_.get(), device_type_, NetMode::INIT); - MACE_FAILURE_RETURN(net->Run()); + MACE_RETURN_IF_ERROR(net->Run()); net_ = CreateNet(op_registry_, *net_def, ws_.get(), device_type_); #ifdef MACE_ENABLE_HEXAGON } @@ -195,7 +195,7 @@ MaceStatus MaceEngine::Impl::Run( " please use 1 to fill missing dimensions"); Tensor *input_tensor = ws_->GetTensor(MakeString("mace_input_node_", input.first)); - input_tensor->Resize(input.second.shape()); + MACE_RETURN_IF_ERROR(input_tensor->Resize(input.second.shape())); { Tensor::MappingGuard input_guard(input_tensor); float *input_data = input_tensor->mutable_data(); @@ -221,7 +221,7 @@ MaceStatus MaceEngine::Impl::Run( hexagon_controller_->ExecuteGraph(*input_tensors[0], output_tensors[0]); } else { #endif - MACE_FAILURE_RETURN(net_->Run(run_metadata)); + MACE_RETURN_IF_ERROR(net_->Run(run_metadata)); #ifdef MACE_ENABLE_HEXAGON } #endif diff --git a/mace/core/net.cc b/mace/core/net.cc index ea4b0721..5114d8bc 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -71,7 +71,7 @@ MaceStatus SerialNet::Run(RunMetadata *run_metadata) { CallStats call_stats; if (future_wait) { StatsFuture future; - MACE_FAILURE_RETURN(op->Run(&future)); + MACE_RETURN_IF_ERROR(op->Run(&future)); if (run_metadata != nullptr) { future.wait_fn(&call_stats); } else { @@ -79,10 +79,10 @@ MaceStatus SerialNet::Run(RunMetadata *run_metadata) { } } else if (run_metadata != nullptr) { call_stats.start_micros = NowMicros(); - MACE_FAILURE_RETURN(op->Run(nullptr)); + MACE_RETURN_IF_ERROR(op->Run(nullptr)); call_stats.end_micros = NowMicros(); } else { - MACE_FAILURE_RETURN(op->Run(nullptr)); + MACE_RETURN_IF_ERROR(op->Run(nullptr)); } if (run_metadata != nullptr) { diff --git a/mace/core/workspace.cc b/mace/core/workspace.cc index 3d2566e6..410c0118 100644 --- a/mace/core/workspace.cc +++ b/mace/core/workspace.cc @@ -83,10 +83,7 @@ MaceStatus Workspace::LoadModelTensor(const NetDef &net_def, } else { tensor_buffer_ = std::unique_ptr( new Buffer(GetDeviceAllocator(type))); - MaceStatus status = tensor_buffer_->Allocate(model_data_size); - if (status != MaceStatus::MACE_SUCCESS) { - return status; - } + MACE_RETURN_IF_ERROR(tensor_buffer_->Allocate(model_data_size)); tensor_buffer_->Map(nullptr); tensor_buffer_->Copy(const_cast(model_data), 0, model_data_size); @@ -156,11 +153,8 @@ MaceStatus Workspace::CreateOutputTensorBuffer(const NetDef &net_def, if (mem_block.mem_id() >= 20000) { std::unique_ptr image_buf( new Image()); - MaceStatus status = image_buf->Allocate( - {mem_block.x(), mem_block.y()}, dtype); - if (status != MaceStatus::MACE_SUCCESS) { - return status; - } + MACE_RETURN_IF_ERROR(image_buf->Allocate( + {mem_block.x(), mem_block.y()}, dtype)); preallocated_allocator_.SetBuffer(mem_block.mem_id(), std::move(image_buf)); } @@ -168,12 +162,9 @@ MaceStatus Workspace::CreateOutputTensorBuffer(const NetDef &net_def, if (mem_block.mem_id() < 20000) { std::unique_ptr tensor_buf( new Buffer(GetDeviceAllocator(device_type))); - MaceStatus status = tensor_buf->Allocate( + MACE_RETURN_IF_ERROR(tensor_buf->Allocate( mem_block.x() * GetEnumTypeSize(dtype) - + MACE_EXTRA_BUFFER_PAD_SIZE); - if (status != MaceStatus::MACE_SUCCESS) { - return status; - } + + MACE_EXTRA_BUFFER_PAD_SIZE)); preallocated_allocator_.SetBuffer(mem_block.mem_id(), std::move(tensor_buf)); } diff --git a/mace/kernels/addn.h b/mace/kernels/addn.h index dd98ee09..2215343f 100644 --- a/mace/kernels/addn.h +++ b/mace/kernels/addn.h @@ -40,7 +40,7 @@ struct AddNFunctor { Tensor *output_tensor, StatsFuture *future) { MACE_UNUSED(future); - MACE_FAILURE_RETURN(output_tensor->ResizeLike(input_tensors[0])); + MACE_RETURN_IF_ERROR(output_tensor->ResizeLike(input_tensors[0])); index_t size = output_tensor->size(); Tensor::MappingGuard output_map(output_tensor); float *output_data = output_tensor->mutable_data(); diff --git a/mace/kernels/arm/conv_2d_neon.h b/mace/kernels/arm/conv_2d_neon.h index dd0ecde0..7c7f7a77 100644 --- a/mace/kernels/arm/conv_2d_neon.h +++ b/mace/kernels/arm/conv_2d_neon.h @@ -21,73 +21,73 @@ namespace mace { namespace kernels { void Conv2dNeonK1x1S1(const float *input, - const float *filter, - const index_t batch, - const index_t height, - const index_t width, - const index_t in_channels, - const index_t out_channels, - float *output); + const float *filter, + const index_t batch, + const index_t height, + const index_t width, + const index_t in_channels, + const index_t out_channels, + float *output); void Conv2dNeonK3x3S1(const float *input, - const float *filter, - const index_t *in_shape, - const index_t *out_shape, - float *output); + const float *filter, + const index_t *in_shape, + const index_t *out_shape, + float *output); void Conv2dNeonK3x3S2(const float *input, - const float *filter, - const index_t *in_shape, - const index_t *out_shape, - float *output); + const float *filter, + const index_t *in_shape, + const index_t *out_shape, + float *output); void Conv2dNeonK5x5S1(const float *input, - const float *filter, - const index_t *in_shape, - const index_t *out_shape, - float *output); + const float *filter, + const index_t *in_shape, + const index_t *out_shape, + float *output); void Conv2dNeonK1x7S1(const float *input, - const float *filter, - const index_t *in_shape, - const index_t *out_shape, - float *output); + const float *filter, + const index_t *in_shape, + const index_t *out_shape, + float *output); void Conv2dNeonK7x1S1(const float *input, - const float *filter, - const index_t *in_shape, - const index_t *out_shape, - float *output); + const float *filter, + const index_t *in_shape, + const index_t *out_shape, + float *output); void Conv2dNeonK7x7S1(const float *input, - const float *filter, - const index_t *in_shape, - const index_t *out_shape, - float *output); + const float *filter, + const index_t *in_shape, + const index_t *out_shape, + float *output); void Conv2dNeonK7x7S2(const float *input, - const float *filter, - const index_t *in_shape, - const index_t *out_shape, - float *output); + const float *filter, + const index_t *in_shape, + const index_t *out_shape, + float *output); void Conv2dNeonK7x7S3(const float *input, - const float *filter, - const index_t *in_shape, - const index_t *out_shape, - float *output); + const float *filter, + const index_t *in_shape, + const index_t *out_shape, + float *output); void Conv2dNeonK1x15S1(const float *input, - const float *filter, - const index_t *in_shape, - const index_t *out_shape, - float *output); + const float *filter, + const index_t *in_shape, + const index_t *out_shape, + float *output); void Conv2dNeonK15x1S1(const float *input, - const float *filter, - const index_t *in_shape, - const index_t *out_shape, - float *output); + const float *filter, + const index_t *in_shape, + const index_t *out_shape, + float *output); // calculate one output channel and one input channel inline void Conv2dCPUKHxKWCalc(const float *in_ptr, @@ -99,13 +99,13 @@ inline void Conv2dCPUKHxKWCalc(const float *in_ptr, const index_t out_width, float *out_ptr, const int stride) { - for (index_t h = 0; h < out_height; ++h) { + for (index_t h = 0; h < out_height; ++h) { for (index_t w = 0; w < out_width; ++w) { for (int i = 0; i < filter_height; ++i) { for (int j = 0; j < filter_width; ++j) { - out_ptr[h * out_width + w] - += in_ptr[(h * stride + i) * in_width + (w * stride + j)] - * filter_ptr[i * filter_width + j]; + out_ptr[h * out_width + w] += + in_ptr[(h * stride + i) * in_width + (w * stride + j)] * + filter_ptr[i * filter_width + j]; } } } diff --git a/mace/kernels/arm/conv_2d_neon_15x1.cc b/mace/kernels/arm/conv_2d_neon_15x1.cc index 9a5d2c41..0facfc6e 100644 --- a/mace/kernels/arm/conv_2d_neon_15x1.cc +++ b/mace/kernels/arm/conv_2d_neon_15x1.cc @@ -38,16 +38,15 @@ inline void Conv2dCPUK15x1Calc(const float *in_ptr, for (index_t iw = 0; iw < tile_width && w + iw < out_width; ++iw) { for (int i = 0; i < 15; ++i) { for (int j = 0; j < 1; ++j) { - out_ptr[io * out_image_size + ih * out_width + w + iw] - += in_ptr[(ih * stride + i) * in_width + ((w + iw) * stride + j)] - * filter_ptr[io * in_channels * 15 + i * 1 + j]; + out_ptr[io * out_image_size + ih * out_width + w + iw] += + in_ptr[(ih * stride + i) * in_width + ((w + iw) * stride + j)] * + filter_ptr[io * in_channels * 15 + i * 1 + j]; } } } } } - // Ho = 4, Wo = 1, Co = 1 void Conv2dNeonK15x1S1(const float *input, const float *filter, @@ -59,7 +58,7 @@ void Conv2dNeonK15x1S1(const float *input, const index_t in_batch_size = in_shape[1] * in_image_size; const index_t out_batch_size = out_shape[1] * out_image_size; const index_t tile_width = - out_shape[1] < 4 ? RoundUpDiv4(out_shape[3]) : out_shape[3]; + out_shape[1] < 4 ? RoundUpDiv4(out_shape[3]) : out_shape[3]; #pragma omp parallel for collapse(3) for (index_t b = 0; b < out_shape[0]; ++b) { @@ -69,8 +68,7 @@ void Conv2dNeonK15x1S1(const float *input, const index_t out_width = out_shape[3]; const index_t in_channels = in_shape[1]; const index_t in_width = in_shape[3]; - float *out_ptr_base = - output + b * out_batch_size + m * out_image_size; + float *out_ptr_base = output + b * out_batch_size + m * out_image_size; for (index_t c = 0; c < in_channels; ++c) { const float *in_ptr_base = input + b * in_batch_size + c * in_image_size; @@ -147,16 +145,16 @@ void Conv2dNeonK15x1S1(const float *input, out_ptr_base[out_offset + 2 * out_width] = vo[2]; out_ptr_base[out_offset + 3 * out_width] = vo[3]; } // wt - } // h + } // h #else Conv2dCPUK15x1Calc(in_ptr_base, filter_ptr, in_width, in_channels, out_height, out_width, w, tile_width, out_image_size, out_ptr_base, 0, 1); #endif } // c - } // w - } // m - } // b + } // w + } // m + } // b } } // namespace kernels diff --git a/mace/kernels/arm/conv_2d_neon_1x1.cc b/mace/kernels/arm/conv_2d_neon_1x1.cc index 214ed68e..b4c4b828 100644 --- a/mace/kernels/arm/conv_2d_neon_1x1.cc +++ b/mace/kernels/arm/conv_2d_neon_1x1.cc @@ -31,12 +31,8 @@ void Conv2dNeonK1x1S1(const float *input, const index_t out_channels, float *output) { for (index_t b = 0; b < batch; ++b) { - Gemm(filter, - input + b * in_channels * height * width, - 1, - out_channels, - in_channels, - height * width, + Gemm(filter, input + b * in_channels * height * width, 1, out_channels, + in_channels, height * width, output + b * out_channels * height * width); } } diff --git a/mace/kernels/arm/conv_2d_neon_1x15.cc b/mace/kernels/arm/conv_2d_neon_1x15.cc index 0dd39fba..6fdc6ed8 100644 --- a/mace/kernels/arm/conv_2d_neon_1x15.cc +++ b/mace/kernels/arm/conv_2d_neon_1x15.cc @@ -17,8 +17,8 @@ #endif #include "mace/kernels/arm/conv_2d_neon.h" -#include "mace/utils/utils.h" #include "mace/utils/logging.h" +#include "mace/utils/utils.h" namespace mace { namespace kernels { @@ -39,16 +39,15 @@ inline void Conv2dCPUK1x15Calc(const float *in_ptr, for (index_t iw = 0; iw < out_width; ++iw) { for (int i = 0; i < 1; ++i) { for (int j = 0; j < 15; ++j) { - out_ptr[io * out_image_size + (h + ih) * out_width + iw] - += in_ptr[((h + ih) * stride + i) * in_width + (iw * stride + j)] - * filter_ptr[io * in_channels * 15 + i * 15 + j]; + out_ptr[io * out_image_size + (h + ih) * out_width + iw] += + in_ptr[((h + ih) * stride + i) * in_width + (iw * stride + j)] * + filter_ptr[io * in_channels * 15 + i * 15 + j]; } } } } } - // Ho = 1, Wo = 4, Co = 1 void Conv2dNeonK1x15S1(const float *input, const float *filter, @@ -70,8 +69,7 @@ void Conv2dNeonK1x15S1(const float *input, const index_t out_width = out_shape[3]; const index_t in_channels = in_shape[1]; const index_t in_width = in_shape[3]; - float *out_ptr_base = - output + b * out_batch_size + m * out_image_size; + float *out_ptr_base = output + b * out_batch_size + m * out_image_size; for (index_t c = 0; c < in_channels; ++c) { const float *in_ptr_base = input + b * in_batch_size + c * in_image_size; @@ -133,16 +131,16 @@ void Conv2dNeonK1x15S1(const float *input, vst1q_f32(out_ptr_base + out_offset, vo); } // w - } // ht + } // ht #else Conv2dCPUK1x15Calc(in_ptr_base, filter_ptr, in_width, in_channels, out_height, h, tile_height, out_width, out_image_size, out_ptr_base, 0, 1); #endif } // c - } // h - } // m - } // b + } // h + } // m + } // b } } // namespace kernels diff --git a/mace/kernels/arm/conv_2d_neon_1x7.cc b/mace/kernels/arm/conv_2d_neon_1x7.cc index a60fa56d..8a7b1a41 100644 --- a/mace/kernels/arm/conv_2d_neon_1x7.cc +++ b/mace/kernels/arm/conv_2d_neon_1x7.cc @@ -41,8 +41,7 @@ void Conv2dNeonK1x7S1(const float *input, const index_t in_channels = in_shape[1]; const index_t in_width = in_shape[3]; if (m + 3 < out_channels) { - float *out_ptr0_base = - output + b * out_batch_size + m * out_image_size; + float *out_ptr0_base = output + b * out_batch_size + m * out_image_size; #if defined(MACE_ENABLE_NEON) float *out_ptr1_base = output + b * out_batch_size + (m + 1) * out_image_size; @@ -56,12 +55,9 @@ void Conv2dNeonK1x7S1(const float *input, input + b * in_batch_size + c * in_image_size; const float *filter_ptr0 = filter + m * in_channels * 7 + c * 7; #if defined(MACE_ENABLE_NEON) - const float *filter_ptr1 = - filter + (m + 1) * in_channels * 7 + c * 7; - const float *filter_ptr2 = - filter + (m + 2) * in_channels * 7 + c * 7; - const float *filter_ptr3 = - filter + (m + 3) * in_channels * 7 + c * 7; + const float *filter_ptr1 = filter + (m + 1) * in_channels * 7 + c * 7; + const float *filter_ptr2 = filter + (m + 2) * in_channels * 7 + c * 7; + const float *filter_ptr3 = filter + (m + 3) * in_channels * 7 + c * 7; /* load filter (4 outch x 1 height x 4 width) */ float32x4_t vf00, vf01; float32x4_t vf10, vf11; @@ -174,7 +170,7 @@ void Conv2dNeonK1x7S1(const float *input, vst1q_f32(out_ptr2_base + out_offset, vo2); vst1q_f32(out_ptr3_base + out_offset, vo3); } // w - } // h + } // h #else for (index_t oc = 0; oc < 4; ++oc) { Conv2dCPUKHxKWCalc(in_ptr_base, filter_ptr0 + oc * in_channels * 7, @@ -239,17 +235,16 @@ void Conv2dNeonK1x7S1(const float *input, vst1q_f32(out_ptr0_base + out_offset, vo0); } // w - } // h + } // h #else - Conv2dCPUKHxKWCalc(in_ptr_base, filter_ptr0, - in_width, 1, 7, out_height, out_width, - out_ptr0_base, 1); + Conv2dCPUKHxKWCalc(in_ptr_base, filter_ptr0, in_width, 1, 7, + out_height, out_width, out_ptr0_base, 1); #endif } // c } } // if - } // m - } // b + } // m + } // b } } // namespace kernels diff --git a/mace/kernels/arm/conv_2d_neon_3x3.cc b/mace/kernels/arm/conv_2d_neon_3x3.cc index 048bef46..94551667 100644 --- a/mace/kernels/arm/conv_2d_neon_3x3.cc +++ b/mace/kernels/arm/conv_2d_neon_3x3.cc @@ -45,7 +45,7 @@ void Conv2dNeonK3x3S1(const float *input, float *out_ptr0_base = output + b * out_batch_size + m * out_image_size; #if defined(MACE_ENABLE_NEON) float *out_ptr1_base = - output + b * out_batch_size + (m + 1) * out_image_size; + output + b * out_batch_size + (m + 1) * out_image_size; #endif for (index_t c = 0; c < in_channels; ++c) { const float *in_ptr0 = input + b * in_batch_size + c * in_image_size; @@ -54,11 +54,11 @@ void Conv2dNeonK3x3S1(const float *input, #if defined(MACE_ENABLE_NEON) float *out_ptr1 = out_ptr1_base; const float *in_ptr1 = - input + b * in_batch_size + c * in_image_size + 1 * in_width; + input + b * in_batch_size + c * in_image_size + 1 * in_width; const float *in_ptr2 = - input + b * in_batch_size + c * in_image_size + 2 * in_width; + input + b * in_batch_size + c * in_image_size + 2 * in_width; const float *in_ptr3 = - input + b * in_batch_size + c * in_image_size + 3 * in_width; + input + b * in_batch_size + c * in_image_size + 3 * in_width; const float *filter_ptr1 = filter + (m + 1) * in_channels * 9 + c * 9; #endif #if defined(MACE_ENABLE_NEON) && defined(__aarch64__) @@ -75,7 +75,6 @@ void Conv2dNeonK3x3S1(const float *input, vf11 = vld1q_f32(filter_ptr1 + 3); vf12 = vld1q_f32(filter_ptr1 + 6); - for (index_t h = 0; h + 1 < out_height; h += 2) { for (index_t w = 0; w + 3 < out_width; w += 4) { // input (4 height x 3 slide): vi_height_slide @@ -179,7 +178,7 @@ void Conv2dNeonK3x3S1(const float *input, out_ptr0 += out_width; out_ptr1 += out_width; - } // h + } // h #elif defined(MACE_ENABLE_NEON) // arm v7 float *out_ptr0 = out_ptr0_base; @@ -198,7 +197,6 @@ void Conv2dNeonK3x3S1(const float *input, vf167 = vld1_f32(filter_ptr1 + 6); vf189 = vld1_f32(filter_ptr1 + 8); - for (index_t h = 0; h + 1 < out_height; h += 2) { for (index_t w = 0; w + 3 < out_width; w += 4) { // input (4 height x 3 slide): vi_height_slide @@ -313,18 +311,18 @@ void Conv2dNeonK3x3S1(const float *input, } // c } else { for (index_t mm = m; mm < out_channels; ++mm) { - float - *out_ptr0_base = output + b * out_batch_size + mm * out_image_size; + float *out_ptr0_base = + output + b * out_batch_size + mm * out_image_size; for (index_t c = 0; c < in_channels; ++c) { - const float - *in_ptr0 = input + b * in_batch_size + c * in_image_size; + const float *in_ptr0 = + input + b * in_batch_size + c * in_image_size; #if defined(MACE_ENABLE_NEON) const float *in_ptr1 = - input + b * in_batch_size + c * in_image_size + 1 * in_width; + input + b * in_batch_size + c * in_image_size + 1 * in_width; const float *in_ptr2 = - input + b * in_batch_size + c * in_image_size + 2 * in_width; + input + b * in_batch_size + c * in_image_size + 2 * in_width; const float *in_ptr3 = - input + b * in_batch_size + c * in_image_size + 3 * in_width; + input + b * in_batch_size + c * in_image_size + 3 * in_width; #endif const float *filter_ptr0 = filter + mm * in_channels * 9 + c * 9; @@ -396,7 +394,6 @@ void Conv2dNeonK3x3S1(const float *input, vst1q_f32(out_ptr0, vo00); vst1q_f32(out_ptr0 + out_width, vo01); - in_ptr0 += 4; in_ptr1 += 4; in_ptr2 += 4; @@ -411,7 +408,7 @@ void Conv2dNeonK3x3S1(const float *input, in_ptr3 += 2 + in_width; out_ptr0 += out_width; - } // h + } // h #elif defined(MACE_ENABLE_NEON) // arm v7 float *out_ptr0 = out_ptr0_base; @@ -482,7 +479,6 @@ void Conv2dNeonK3x3S1(const float *input, vst1q_f32(out_ptr0, vo00); vst1q_f32(out_ptr0 + out_width, vo01); - in_ptr0 += 4; in_ptr1 += 4; in_ptr2 += 4; @@ -499,15 +495,14 @@ void Conv2dNeonK3x3S1(const float *input, out_ptr0 += out_width; } // h #else - Conv2dCPUKHxKWCalc(in_ptr0, filter_ptr0, - in_width, 3, 3, out_height, out_width, - out_ptr0_base, 1); + Conv2dCPUKHxKWCalc(in_ptr0, filter_ptr0, in_width, 3, 3, out_height, + out_width, out_ptr0_base, 1); #endif } // c - } // mm - } // if - } // m - } // b + } // mm + } // if + } // m + } // b } void Conv2dNeonK3x3S2(const float *input, @@ -529,8 +524,7 @@ void Conv2dNeonK3x3S2(const float *input, const index_t out_height = out_shape[2]; const index_t out_width = out_shape[3]; const float *in_base = input + b * in_batch_size + c * in_image_size; - const float - *filter_ptr = filter + m * in_channels * 9 + c * 9; + const float *filter_ptr = filter + m * in_channels * 9 + c * 9; float *out_base = output + b * out_batch_size + m * out_image_size; #if defined(MACE_ENABLE_NEON) && defined(__aarch64__) @@ -569,8 +563,8 @@ void Conv2dNeonK3x3S2(const float *input, index_t out_offset = h * out_width + w; vo = vld1q_f32(out_base + out_offset); - vi00 = vi0.val[0]; // [0.2.4.6] - vi01 = vi0.val[1]; // [1.3.5.7] + vi00 = vi0.val[0]; // [0.2.4.6] + vi01 = vi0.val[1]; // [1.3.5.7] vi02 = vextq_f32(vi00, vi0n, 1); // [2.4.6.8] vi10 = vi1.val[0]; vi11 = vi1.val[1]; @@ -591,8 +585,8 @@ void Conv2dNeonK3x3S2(const float *input, vo = vfmaq_laneq_f32(vo, vi22, vf02, 3); vst1q_f32(out_base + out_offset, vo); - } // w - } // h + } // w + } // h #elif defined(MACE_ENABLE_NEON) // arm v7 // load filter (1 outch x 3 height x 3 width): vf_outch_height float32x2_t vf01, vf23, vf45, vf67, vf78; @@ -631,8 +625,8 @@ void Conv2dNeonK3x3S2(const float *input, index_t out_offset = h * out_width + w; vo = vld1q_f32(out_base + out_offset); - vi00 = vi0.val[0]; // [0.2.4.6] - vi01 = vi0.val[1]; // [1.3.5.7] + vi00 = vi0.val[0]; // [0.2.4.6] + vi01 = vi0.val[1]; // [1.3.5.7] vi02 = vextq_f32(vi00, vi0n, 1); // [2.4.6.8] vi10 = vi1.val[0]; vi11 = vi1.val[1]; @@ -654,15 +648,14 @@ void Conv2dNeonK3x3S2(const float *input, vst1q_f32(out_base + out_offset, vo); } // w - } // h + } // h #else - Conv2dCPUKHxKWCalc(in_base, filter_ptr, - in_width, 3, 3, out_height, out_width, - out_base, 2); + Conv2dCPUKHxKWCalc(in_base, filter_ptr, in_width, 3, 3, out_height, + out_width, out_base, 2); #endif } // c - } // m - } // b + } // m + } // b } } // namespace kernels diff --git a/mace/kernels/arm/conv_2d_neon_5x5.cc b/mace/kernels/arm/conv_2d_neon_5x5.cc index 7a32a291..a60bec41 100644 --- a/mace/kernels/arm/conv_2d_neon_5x5.cc +++ b/mace/kernels/arm/conv_2d_neon_5x5.cc @@ -21,59 +21,59 @@ namespace mace { namespace kernels { -#define MACE_Conv2dNeonK5x5SnLoadCalc4 \ - /* load filter (4 outch x 1 height x 4 width) */ \ - float32x4_t vf00, vf10, vf20, vf30; \ - float32x2_t vf01, vf11, vf21, vf31; \ - vf00 = vld1q_f32(filter_ptr0); \ - vf01 = vld1_f32(filter_ptr0 + 3); \ - vf10 = vld1q_f32(filter_ptr1); \ - vf11 = vld1_f32(filter_ptr1 + 3); \ - vf20 = vld1q_f32(filter_ptr2); \ - vf21 = vld1_f32(filter_ptr2 + 3); \ - vf30 = vld1q_f32(filter_ptr3); \ - vf31 = vld1_f32(filter_ptr3 + 3); \ - \ - /* outch 0 */ \ - vo0 = vmlaq_lane_f32(vo0, vi0, vget_low_f32(vf00), 0); \ - vo0 = vmlaq_lane_f32(vo0, vi1, vget_low_f32(vf00), 1); \ - vo0 = vmlaq_lane_f32(vo0, vi2, vget_high_f32(vf00), 0); \ - vo0 = vmlaq_lane_f32(vo0, vi3, vget_high_f32(vf00), 1); \ - vo0 = vmlaq_lane_f32(vo0, vi4, vf01, 1); \ - \ - /* outch 1 */ \ - vo1 = vmlaq_lane_f32(vo1, vi0, vget_low_f32(vf10), 0); \ - vo1 = vmlaq_lane_f32(vo1, vi1, vget_low_f32(vf10), 1); \ - vo1 = vmlaq_lane_f32(vo1, vi2, vget_high_f32(vf10), 0); \ - vo1 = vmlaq_lane_f32(vo1, vi3, vget_high_f32(vf10), 1); \ - vo1 = vmlaq_lane_f32(vo1, vi4, vf11, 1); \ - \ - /* outch 2 */ \ - vo2 = vmlaq_lane_f32(vo2, vi0, vget_low_f32(vf20), 0); \ - vo2 = vmlaq_lane_f32(vo2, vi1, vget_low_f32(vf20), 1); \ - vo2 = vmlaq_lane_f32(vo2, vi2, vget_high_f32(vf20), 0); \ - vo2 = vmlaq_lane_f32(vo2, vi3, vget_high_f32(vf20), 1); \ - vo2 = vmlaq_lane_f32(vo2, vi4, vf21, 1); \ - \ - /* outch 3 */ \ - vo3 = vmlaq_lane_f32(vo3, vi0, vget_low_f32(vf30), 0); \ - vo3 = vmlaq_lane_f32(vo3, vi1, vget_low_f32(vf30), 1); \ - vo3 = vmlaq_lane_f32(vo3, vi2, vget_high_f32(vf30), 0); \ - vo3 = vmlaq_lane_f32(vo3, vi3, vget_high_f32(vf30), 1); \ +#define MACE_Conv2dNeonK5x5SnLoadCalc4 \ + /* load filter (4 outch x 1 height x 4 width) */ \ + float32x4_t vf00, vf10, vf20, vf30; \ + float32x2_t vf01, vf11, vf21, vf31; \ + vf00 = vld1q_f32(filter_ptr0); \ + vf01 = vld1_f32(filter_ptr0 + 3); \ + vf10 = vld1q_f32(filter_ptr1); \ + vf11 = vld1_f32(filter_ptr1 + 3); \ + vf20 = vld1q_f32(filter_ptr2); \ + vf21 = vld1_f32(filter_ptr2 + 3); \ + vf30 = vld1q_f32(filter_ptr3); \ + vf31 = vld1_f32(filter_ptr3 + 3); \ + \ + /* outch 0 */ \ + vo0 = vmlaq_lane_f32(vo0, vi0, vget_low_f32(vf00), 0); \ + vo0 = vmlaq_lane_f32(vo0, vi1, vget_low_f32(vf00), 1); \ + vo0 = vmlaq_lane_f32(vo0, vi2, vget_high_f32(vf00), 0); \ + vo0 = vmlaq_lane_f32(vo0, vi3, vget_high_f32(vf00), 1); \ + vo0 = vmlaq_lane_f32(vo0, vi4, vf01, 1); \ + \ + /* outch 1 */ \ + vo1 = vmlaq_lane_f32(vo1, vi0, vget_low_f32(vf10), 0); \ + vo1 = vmlaq_lane_f32(vo1, vi1, vget_low_f32(vf10), 1); \ + vo1 = vmlaq_lane_f32(vo1, vi2, vget_high_f32(vf10), 0); \ + vo1 = vmlaq_lane_f32(vo1, vi3, vget_high_f32(vf10), 1); \ + vo1 = vmlaq_lane_f32(vo1, vi4, vf11, 1); \ + \ + /* outch 2 */ \ + vo2 = vmlaq_lane_f32(vo2, vi0, vget_low_f32(vf20), 0); \ + vo2 = vmlaq_lane_f32(vo2, vi1, vget_low_f32(vf20), 1); \ + vo2 = vmlaq_lane_f32(vo2, vi2, vget_high_f32(vf20), 0); \ + vo2 = vmlaq_lane_f32(vo2, vi3, vget_high_f32(vf20), 1); \ + vo2 = vmlaq_lane_f32(vo2, vi4, vf21, 1); \ + \ + /* outch 3 */ \ + vo3 = vmlaq_lane_f32(vo3, vi0, vget_low_f32(vf30), 0); \ + vo3 = vmlaq_lane_f32(vo3, vi1, vget_low_f32(vf30), 1); \ + vo3 = vmlaq_lane_f32(vo3, vi2, vget_high_f32(vf30), 0); \ + vo3 = vmlaq_lane_f32(vo3, vi3, vget_high_f32(vf30), 1); \ vo3 = vmlaq_lane_f32(vo3, vi4, vf31, 1); -#define MACE_Conv2dNeonK5x5SnLoadCalc1 \ - /* load filter (1 outch x 1 height x 4 width) */ \ - float32x4_t vf00; \ - float32x2_t vf01; \ - vf00 = vld1q_f32(filter_ptr0); \ - vf01 = vld1_f32(filter_ptr0 + 3); \ - \ - /* outch 0 */ \ - vo0 = vmlaq_lane_f32(vo0, vi0, vget_low_f32(vf00), 0); \ - vo0 = vmlaq_lane_f32(vo0, vi1, vget_low_f32(vf00), 1); \ - vo0 = vmlaq_lane_f32(vo0, vi2, vget_high_f32(vf00), 0); \ - vo0 = vmlaq_lane_f32(vo0, vi3, vget_high_f32(vf00), 1); \ +#define MACE_Conv2dNeonK5x5SnLoadCalc1 \ + /* load filter (1 outch x 1 height x 4 width) */ \ + float32x4_t vf00; \ + float32x2_t vf01; \ + vf00 = vld1q_f32(filter_ptr0); \ + vf01 = vld1_f32(filter_ptr0 + 3); \ + \ + /* outch 0 */ \ + vo0 = vmlaq_lane_f32(vo0, vi0, vget_low_f32(vf00), 0); \ + vo0 = vmlaq_lane_f32(vo0, vi1, vget_low_f32(vf00), 1); \ + vo0 = vmlaq_lane_f32(vo0, vi2, vget_high_f32(vf00), 0); \ + vo0 = vmlaq_lane_f32(vo0, vi3, vget_high_f32(vf00), 1); \ vo0 = vmlaq_lane_f32(vo0, vi4, vf01, 1); // Ho = 1, Wo = 4, Co = 4 @@ -99,7 +99,7 @@ void Conv2dNeonK5x5S1(const float *input, float *out_ptr0_base = output + b * out_batch_size + m * out_image_size; #if defined(MACE_ENABLE_NEON) && !defined(__aarch64__) float *out_ptr1_base = - output + b * out_batch_size + (m + 1) * out_image_size; + output + b * out_batch_size + (m + 1) * out_image_size; float *out_ptr2_base = output + b * out_batch_size + (m + 2) * out_image_size; float *out_ptr3_base = @@ -118,7 +118,7 @@ void Conv2dNeonK5x5S1(const float *input, filter + (m + 3) * in_channels * 25 + c * 25; for (index_t h = 0; h < out_height; ++h) { for (index_t w = 0; w + 3 < out_width; w += 4) { - // input offset + // input offset index_t in_offset = h * in_width + w; // output (4 outch x 1 height x 4 width): vo_outch_height float32x4_t vo0, vo1, vo2, vo3; @@ -157,7 +157,7 @@ void Conv2dNeonK5x5S1(const float *input, filter_ptr2 -= 25; filter_ptr3 -= 25; } // w - } // h + } // h #else for (index_t oc = 0; oc < 4; ++oc) { Conv2dCPUKHxKWCalc(in_ptr_base, filter_ptr0 + oc * in_channels * 25, @@ -203,17 +203,16 @@ void Conv2dNeonK5x5S1(const float *input, vst1q_f32(out_ptr0_base + out_offset, vo0); filter_ptr0 -= 25; } // w - } // h + } // h #else - Conv2dCPUKHxKWCalc(in_ptr_base, filter_ptr0, - in_width, 5, 5, out_height, out_width, - out_ptr0_base, 1); + Conv2dCPUKHxKWCalc(in_ptr_base, filter_ptr0, in_width, 5, 5, + out_height, out_width, out_ptr0_base, 1); #endif } // c - } // mm - } // if - } // m - } // b + } // mm + } // if + } // m + } // b } } // namespace kernels diff --git a/mace/kernels/arm/conv_2d_neon_7x1.cc b/mace/kernels/arm/conv_2d_neon_7x1.cc index 17215bb8..97d1bec2 100644 --- a/mace/kernels/arm/conv_2d_neon_7x1.cc +++ b/mace/kernels/arm/conv_2d_neon_7x1.cc @@ -41,8 +41,7 @@ void Conv2dNeonK7x1S1(const float *input, const index_t in_channels = in_shape[1]; const index_t in_width = in_shape[3]; if (m + 3 < out_channels) { - float *out_ptr0_base = - output + b * out_batch_size + m * out_image_size; + float *out_ptr0_base = output + b * out_batch_size + m * out_image_size; #if defined(MACE_ENABLE_NEON) float *out_ptr1_base = output + b * out_batch_size + (m + 1) * out_image_size; @@ -56,12 +55,9 @@ void Conv2dNeonK7x1S1(const float *input, input + b * in_batch_size + c * in_image_size; const float *filter_ptr0 = filter + m * in_channels * 7 + c * 7; #if defined(MACE_ENABLE_NEON) - const float *filter_ptr1 = - filter + (m + 1) * in_channels * 7 + c * 7; - const float *filter_ptr2 = - filter + (m + 2) * in_channels * 7 + c * 7; - const float *filter_ptr3 = - filter + (m + 3) * in_channels * 7 + c * 7; + const float *filter_ptr1 = filter + (m + 1) * in_channels * 7 + c * 7; + const float *filter_ptr2 = filter + (m + 2) * in_channels * 7 + c * 7; + const float *filter_ptr3 = filter + (m + 3) * in_channels * 7 + c * 7; /* load filter (4 outch x 4 height x 1 width) */ float32x4_t vf00, vf01; float32x4_t vf10, vf11; @@ -98,7 +94,6 @@ void Conv2dNeonK7x1S1(const float *input, out_ptr3_base[out_offset + 2 * out_width], out_ptr3_base[out_offset + 3 * out_width]}; - // input offset index_t in_offset = h * in_width + w; // input (3 slide) @@ -203,7 +198,7 @@ void Conv2dNeonK7x1S1(const float *input, out_ptr3_base[out_offset + 2 * out_width] = vo3[2]; out_ptr3_base[out_offset + 3 * out_width] = vo3[3]; } // w - } // h + } // h #else for (index_t oc = 0; oc < 4; ++oc) { Conv2dCPUKHxKWCalc(in_ptr_base, filter_ptr0 + oc * in_channels * 7, @@ -280,17 +275,16 @@ void Conv2dNeonK7x1S1(const float *input, out_ptr0_base[out_offset + 2 * out_width] = vo0[2]; out_ptr0_base[out_offset + 3 * out_width] = vo0[3]; } // w - } // h + } // h #else - Conv2dCPUKHxKWCalc(in_ptr_base, filter_ptr0, - in_width, 7, 1, out_height, out_width, - out_ptr0_base, 1); + Conv2dCPUKHxKWCalc(in_ptr_base, filter_ptr0, in_width, 7, 1, + out_height, out_width, out_ptr0_base, 1); #endif } // c } } // if - } // m - } // b + } // m + } // b } } // namespace kernels diff --git a/mace/kernels/arm/conv_2d_neon_7x7.cc b/mace/kernels/arm/conv_2d_neon_7x7.cc index 8488127b..d824f881 100644 --- a/mace/kernels/arm/conv_2d_neon_7x7.cc +++ b/mace/kernels/arm/conv_2d_neon_7x7.cc @@ -21,136 +21,136 @@ namespace mace { namespace kernels { -#define MACE_Conv2dArmv8NeonK7x7SnLoadCalc4 \ - /* load filter (4 outch x 1 height x 4 width) */ \ - float32x4_t vf00, vf01; \ - float32x4_t vf10, vf11; \ - float32x4_t vf20, vf21; \ - float32x4_t vf30, vf31; \ - vf00 = vld1q_f32(filter_ptr0); \ - vf01 = vld1q_f32(filter_ptr0 + 3); \ - vf10 = vld1q_f32(filter_ptr1); \ - vf11 = vld1q_f32(filter_ptr1 + 3); \ - vf20 = vld1q_f32(filter_ptr2); \ - vf21 = vld1q_f32(filter_ptr2 + 3); \ - vf30 = vld1q_f32(filter_ptr3); \ - vf31 = vld1q_f32(filter_ptr3 + 3); \ - \ - /* outch 0 */ \ - vo0 = vfmaq_laneq_f32(vo0, vi0, vf00, 0); \ - vo0 = vfmaq_laneq_f32(vo0, vi1, vf00, 1); \ - vo0 = vfmaq_laneq_f32(vo0, vi2, vf00, 2); \ - vo0 = vfmaq_laneq_f32(vo0, vi3, vf00, 3); \ - vo0 = vfmaq_laneq_f32(vo0, vi4, vf01, 1); \ - vo0 = vfmaq_laneq_f32(vo0, vi5, vf01, 2); \ - vo0 = vfmaq_laneq_f32(vo0, vi6, vf01, 3); \ - \ - /* outch 1 */ \ - vo1 = vfmaq_laneq_f32(vo1, vi0, vf10, 0); \ - vo1 = vfmaq_laneq_f32(vo1, vi1, vf10, 1); \ - vo1 = vfmaq_laneq_f32(vo1, vi2, vf10, 2); \ - vo1 = vfmaq_laneq_f32(vo1, vi3, vf10, 3); \ - vo1 = vfmaq_laneq_f32(vo1, vi4, vf11, 1); \ - vo1 = vfmaq_laneq_f32(vo1, vi5, vf11, 2); \ - vo1 = vfmaq_laneq_f32(vo1, vi6, vf11, 3); \ - \ - /* outch 2 */ \ - vo2 = vfmaq_laneq_f32(vo2, vi0, vf20, 0); \ - vo2 = vfmaq_laneq_f32(vo2, vi1, vf20, 1); \ - vo2 = vfmaq_laneq_f32(vo2, vi2, vf20, 2); \ - vo2 = vfmaq_laneq_f32(vo2, vi3, vf20, 3); \ - vo2 = vfmaq_laneq_f32(vo2, vi4, vf21, 1); \ - vo2 = vfmaq_laneq_f32(vo2, vi5, vf21, 2); \ - vo2 = vfmaq_laneq_f32(vo2, vi6, vf21, 3); \ - \ - /* outch 3 */ \ - vo3 = vfmaq_laneq_f32(vo3, vi0, vf30, 0); \ - vo3 = vfmaq_laneq_f32(vo3, vi1, vf30, 1); \ - vo3 = vfmaq_laneq_f32(vo3, vi2, vf30, 2); \ - vo3 = vfmaq_laneq_f32(vo3, vi3, vf30, 3); \ - vo3 = vfmaq_laneq_f32(vo3, vi4, vf31, 1); \ - vo3 = vfmaq_laneq_f32(vo3, vi5, vf31, 2); \ +#define MACE_Conv2dArmv8NeonK7x7SnLoadCalc4 \ + /* load filter (4 outch x 1 height x 4 width) */ \ + float32x4_t vf00, vf01; \ + float32x4_t vf10, vf11; \ + float32x4_t vf20, vf21; \ + float32x4_t vf30, vf31; \ + vf00 = vld1q_f32(filter_ptr0); \ + vf01 = vld1q_f32(filter_ptr0 + 3); \ + vf10 = vld1q_f32(filter_ptr1); \ + vf11 = vld1q_f32(filter_ptr1 + 3); \ + vf20 = vld1q_f32(filter_ptr2); \ + vf21 = vld1q_f32(filter_ptr2 + 3); \ + vf30 = vld1q_f32(filter_ptr3); \ + vf31 = vld1q_f32(filter_ptr3 + 3); \ + \ + /* outch 0 */ \ + vo0 = vfmaq_laneq_f32(vo0, vi0, vf00, 0); \ + vo0 = vfmaq_laneq_f32(vo0, vi1, vf00, 1); \ + vo0 = vfmaq_laneq_f32(vo0, vi2, vf00, 2); \ + vo0 = vfmaq_laneq_f32(vo0, vi3, vf00, 3); \ + vo0 = vfmaq_laneq_f32(vo0, vi4, vf01, 1); \ + vo0 = vfmaq_laneq_f32(vo0, vi5, vf01, 2); \ + vo0 = vfmaq_laneq_f32(vo0, vi6, vf01, 3); \ + \ + /* outch 1 */ \ + vo1 = vfmaq_laneq_f32(vo1, vi0, vf10, 0); \ + vo1 = vfmaq_laneq_f32(vo1, vi1, vf10, 1); \ + vo1 = vfmaq_laneq_f32(vo1, vi2, vf10, 2); \ + vo1 = vfmaq_laneq_f32(vo1, vi3, vf10, 3); \ + vo1 = vfmaq_laneq_f32(vo1, vi4, vf11, 1); \ + vo1 = vfmaq_laneq_f32(vo1, vi5, vf11, 2); \ + vo1 = vfmaq_laneq_f32(vo1, vi6, vf11, 3); \ + \ + /* outch 2 */ \ + vo2 = vfmaq_laneq_f32(vo2, vi0, vf20, 0); \ + vo2 = vfmaq_laneq_f32(vo2, vi1, vf20, 1); \ + vo2 = vfmaq_laneq_f32(vo2, vi2, vf20, 2); \ + vo2 = vfmaq_laneq_f32(vo2, vi3, vf20, 3); \ + vo2 = vfmaq_laneq_f32(vo2, vi4, vf21, 1); \ + vo2 = vfmaq_laneq_f32(vo2, vi5, vf21, 2); \ + vo2 = vfmaq_laneq_f32(vo2, vi6, vf21, 3); \ + \ + /* outch 3 */ \ + vo3 = vfmaq_laneq_f32(vo3, vi0, vf30, 0); \ + vo3 = vfmaq_laneq_f32(vo3, vi1, vf30, 1); \ + vo3 = vfmaq_laneq_f32(vo3, vi2, vf30, 2); \ + vo3 = vfmaq_laneq_f32(vo3, vi3, vf30, 3); \ + vo3 = vfmaq_laneq_f32(vo3, vi4, vf31, 1); \ + vo3 = vfmaq_laneq_f32(vo3, vi5, vf31, 2); \ vo3 = vfmaq_laneq_f32(vo3, vi6, vf31, 3); -#define MACE_Conv2dArmv8NeonK7x7SnLoadCalc1 \ - /* load filter (1 outch x 1 height x 4 width) */ \ - float32x4_t vf00, vf01; \ - vf00 = vld1q_f32(filter_ptr0); \ - vf01 = vld1q_f32(filter_ptr0 + 3); \ - \ - /* outch 0 */ \ - vo0 = vfmaq_laneq_f32(vo0, vi0, vf00, 0); \ - vo0 = vfmaq_laneq_f32(vo0, vi1, vf00, 1); \ - vo0 = vfmaq_laneq_f32(vo0, vi2, vf00, 2); \ - vo0 = vfmaq_laneq_f32(vo0, vi3, vf00, 3); \ - vo0 = vfmaq_laneq_f32(vo0, vi4, vf01, 1); \ - vo0 = vfmaq_laneq_f32(vo0, vi5, vf01, 2); \ +#define MACE_Conv2dArmv8NeonK7x7SnLoadCalc1 \ + /* load filter (1 outch x 1 height x 4 width) */ \ + float32x4_t vf00, vf01; \ + vf00 = vld1q_f32(filter_ptr0); \ + vf01 = vld1q_f32(filter_ptr0 + 3); \ + \ + /* outch 0 */ \ + vo0 = vfmaq_laneq_f32(vo0, vi0, vf00, 0); \ + vo0 = vfmaq_laneq_f32(vo0, vi1, vf00, 1); \ + vo0 = vfmaq_laneq_f32(vo0, vi2, vf00, 2); \ + vo0 = vfmaq_laneq_f32(vo0, vi3, vf00, 3); \ + vo0 = vfmaq_laneq_f32(vo0, vi4, vf01, 1); \ + vo0 = vfmaq_laneq_f32(vo0, vi5, vf01, 2); \ vo0 = vfmaq_laneq_f32(vo0, vi6, vf01, 3); -#define MACE_Conv2dArmv7NeonK7x7SnLoadCalc4 \ - /* load filter (4 outch x 1 height x 4 width) */ \ - float32x4_t vf00, vf01; \ - float32x4_t vf10, vf11; \ - float32x4_t vf20, vf21; \ - float32x4_t vf30, vf31; \ - vf00 = vld1q_f32(filter_ptr0); \ - vf01 = vld1q_f32(filter_ptr0 + 3); \ - vf10 = vld1q_f32(filter_ptr1); \ - vf11 = vld1q_f32(filter_ptr1 + 3); \ - vf20 = vld1q_f32(filter_ptr2); \ - vf21 = vld1q_f32(filter_ptr2 + 3); \ - vf30 = vld1q_f32(filter_ptr3); \ - vf31 = vld1q_f32(filter_ptr3 + 3); \ - \ - /* outch 0 */ \ - vo0 = vmlaq_lane_f32(vo0, vi0, vget_low_f32(vf00), 0); \ - vo0 = vmlaq_lane_f32(vo0, vi1, vget_low_f32(vf00), 1); \ - vo0 = vmlaq_lane_f32(vo0, vi2, vget_high_f32(vf00), 0); \ - vo0 = vmlaq_lane_f32(vo0, vi3, vget_high_f32(vf00), 1); \ - vo0 = vmlaq_lane_f32(vo0, vi4, vget_low_f32(vf01), 1); \ - vo0 = vmlaq_lane_f32(vo0, vi5, vget_high_f32(vf01), 0); \ - vo0 = vmlaq_lane_f32(vo0, vi6, vget_high_f32(vf01), 1); \ - \ - /* outch 1 */ \ - vo1 = vmlaq_lane_f32(vo1, vi0, vget_low_f32(vf10), 0); \ - vo1 = vmlaq_lane_f32(vo1, vi1, vget_low_f32(vf10), 1); \ - vo1 = vmlaq_lane_f32(vo1, vi2, vget_high_f32(vf10), 0); \ - vo1 = vmlaq_lane_f32(vo1, vi3, vget_high_f32(vf10), 1); \ - vo1 = vmlaq_lane_f32(vo1, vi4, vget_low_f32(vf11), 1); \ - vo1 = vmlaq_lane_f32(vo1, vi5, vget_high_f32(vf11), 0); \ - vo1 = vmlaq_lane_f32(vo1, vi6, vget_high_f32(vf11), 1); \ - \ - /* outch 2 */ \ - vo2 = vmlaq_lane_f32(vo2, vi0, vget_low_f32(vf20), 0); \ - vo2 = vmlaq_lane_f32(vo2, vi1, vget_low_f32(vf20), 1); \ - vo2 = vmlaq_lane_f32(vo2, vi2, vget_high_f32(vf20), 0); \ - vo2 = vmlaq_lane_f32(vo2, vi3, vget_high_f32(vf20), 1); \ - vo2 = vmlaq_lane_f32(vo2, vi4, vget_low_f32(vf21), 1); \ - vo2 = vmlaq_lane_f32(vo2, vi5, vget_high_f32(vf21), 0); \ - vo2 = vmlaq_lane_f32(vo2, vi6, vget_high_f32(vf21), 1); \ - \ - /* outch 3 */ \ - vo3 = vmlaq_lane_f32(vo3, vi0, vget_low_f32(vf30), 0); \ - vo3 = vmlaq_lane_f32(vo3, vi1, vget_low_f32(vf30), 1); \ - vo3 = vmlaq_lane_f32(vo3, vi2, vget_high_f32(vf30), 0); \ - vo3 = vmlaq_lane_f32(vo3, vi3, vget_high_f32(vf30), 1); \ - vo3 = vmlaq_lane_f32(vo3, vi4, vget_low_f32(vf31), 1); \ - vo3 = vmlaq_lane_f32(vo3, vi5, vget_high_f32(vf31), 0); \ +#define MACE_Conv2dArmv7NeonK7x7SnLoadCalc4 \ + /* load filter (4 outch x 1 height x 4 width) */ \ + float32x4_t vf00, vf01; \ + float32x4_t vf10, vf11; \ + float32x4_t vf20, vf21; \ + float32x4_t vf30, vf31; \ + vf00 = vld1q_f32(filter_ptr0); \ + vf01 = vld1q_f32(filter_ptr0 + 3); \ + vf10 = vld1q_f32(filter_ptr1); \ + vf11 = vld1q_f32(filter_ptr1 + 3); \ + vf20 = vld1q_f32(filter_ptr2); \ + vf21 = vld1q_f32(filter_ptr2 + 3); \ + vf30 = vld1q_f32(filter_ptr3); \ + vf31 = vld1q_f32(filter_ptr3 + 3); \ + \ + /* outch 0 */ \ + vo0 = vmlaq_lane_f32(vo0, vi0, vget_low_f32(vf00), 0); \ + vo0 = vmlaq_lane_f32(vo0, vi1, vget_low_f32(vf00), 1); \ + vo0 = vmlaq_lane_f32(vo0, vi2, vget_high_f32(vf00), 0); \ + vo0 = vmlaq_lane_f32(vo0, vi3, vget_high_f32(vf00), 1); \ + vo0 = vmlaq_lane_f32(vo0, vi4, vget_low_f32(vf01), 1); \ + vo0 = vmlaq_lane_f32(vo0, vi5, vget_high_f32(vf01), 0); \ + vo0 = vmlaq_lane_f32(vo0, vi6, vget_high_f32(vf01), 1); \ + \ + /* outch 1 */ \ + vo1 = vmlaq_lane_f32(vo1, vi0, vget_low_f32(vf10), 0); \ + vo1 = vmlaq_lane_f32(vo1, vi1, vget_low_f32(vf10), 1); \ + vo1 = vmlaq_lane_f32(vo1, vi2, vget_high_f32(vf10), 0); \ + vo1 = vmlaq_lane_f32(vo1, vi3, vget_high_f32(vf10), 1); \ + vo1 = vmlaq_lane_f32(vo1, vi4, vget_low_f32(vf11), 1); \ + vo1 = vmlaq_lane_f32(vo1, vi5, vget_high_f32(vf11), 0); \ + vo1 = vmlaq_lane_f32(vo1, vi6, vget_high_f32(vf11), 1); \ + \ + /* outch 2 */ \ + vo2 = vmlaq_lane_f32(vo2, vi0, vget_low_f32(vf20), 0); \ + vo2 = vmlaq_lane_f32(vo2, vi1, vget_low_f32(vf20), 1); \ + vo2 = vmlaq_lane_f32(vo2, vi2, vget_high_f32(vf20), 0); \ + vo2 = vmlaq_lane_f32(vo2, vi3, vget_high_f32(vf20), 1); \ + vo2 = vmlaq_lane_f32(vo2, vi4, vget_low_f32(vf21), 1); \ + vo2 = vmlaq_lane_f32(vo2, vi5, vget_high_f32(vf21), 0); \ + vo2 = vmlaq_lane_f32(vo2, vi6, vget_high_f32(vf21), 1); \ + \ + /* outch 3 */ \ + vo3 = vmlaq_lane_f32(vo3, vi0, vget_low_f32(vf30), 0); \ + vo3 = vmlaq_lane_f32(vo3, vi1, vget_low_f32(vf30), 1); \ + vo3 = vmlaq_lane_f32(vo3, vi2, vget_high_f32(vf30), 0); \ + vo3 = vmlaq_lane_f32(vo3, vi3, vget_high_f32(vf30), 1); \ + vo3 = vmlaq_lane_f32(vo3, vi4, vget_low_f32(vf31), 1); \ + vo3 = vmlaq_lane_f32(vo3, vi5, vget_high_f32(vf31), 0); \ vo3 = vmlaq_lane_f32(vo3, vi6, vget_high_f32(vf31), 1); -#define MACE_Conv2dArmv7NeonK7x7SnLoadCalc1 \ - /* load filter (1 outch x 1 height x 4 width) */ \ - float32x4_t vf00, vf01; \ - vf00 = vld1q_f32(filter_ptr0); \ - vf01 = vld1q_f32(filter_ptr0 + 3); \ - \ - /* outch 0 */ \ - vo0 = vmlaq_lane_f32(vo0, vi0, vget_low_f32(vf00), 0); \ - vo0 = vmlaq_lane_f32(vo0, vi1, vget_low_f32(vf00), 1); \ - vo0 = vmlaq_lane_f32(vo0, vi2, vget_high_f32(vf00), 0); \ - vo0 = vmlaq_lane_f32(vo0, vi3, vget_high_f32(vf00), 1); \ - vo0 = vmlaq_lane_f32(vo0, vi4, vget_low_f32(vf01), 1); \ - vo0 = vmlaq_lane_f32(vo0, vi5, vget_high_f32(vf01), 0); \ +#define MACE_Conv2dArmv7NeonK7x7SnLoadCalc1 \ + /* load filter (1 outch x 1 height x 4 width) */ \ + float32x4_t vf00, vf01; \ + vf00 = vld1q_f32(filter_ptr0); \ + vf01 = vld1q_f32(filter_ptr0 + 3); \ + \ + /* outch 0 */ \ + vo0 = vmlaq_lane_f32(vo0, vi0, vget_low_f32(vf00), 0); \ + vo0 = vmlaq_lane_f32(vo0, vi1, vget_low_f32(vf00), 1); \ + vo0 = vmlaq_lane_f32(vo0, vi2, vget_high_f32(vf00), 0); \ + vo0 = vmlaq_lane_f32(vo0, vi3, vget_high_f32(vf00), 1); \ + vo0 = vmlaq_lane_f32(vo0, vi4, vget_low_f32(vf01), 1); \ + vo0 = vmlaq_lane_f32(vo0, vi5, vget_high_f32(vf01), 0); \ vo0 = vmlaq_lane_f32(vo0, vi6, vget_high_f32(vf01), 1); // Ho = 1, Wo = 4, Co = 4 @@ -176,7 +176,7 @@ void Conv2dNeonK7x7S1(const float *input, float *out_ptr0_base = output + b * out_batch_size + m * out_image_size; #if defined(MACE_ENABLE_NEON) float *out_ptr1_base = - output + b * out_batch_size + (m + 1) * out_image_size; + output + b * out_batch_size + (m + 1) * out_image_size; float *out_ptr2_base = output + b * out_batch_size + (m + 2) * out_image_size; float *out_ptr3_base = @@ -195,7 +195,7 @@ void Conv2dNeonK7x7S1(const float *input, filter + (m + 3) * in_channels * 49 + c * 49; for (index_t h = 0; h < out_height; ++h) { for (index_t w = 0; w + 3 < out_width; w += 4) { - // input offset + // input offset index_t in_offset = h * in_width + w; // output (4 outch x 1 height x 4 width): vo_outch_height float32x4_t vo0, vo1, vo2, vo3; @@ -242,7 +242,7 @@ void Conv2dNeonK7x7S1(const float *input, filter_ptr2 -= 49; filter_ptr3 -= 49; } // w - } // h + } // h #else for (index_t oc = 0; oc < 4; ++oc) { Conv2dCPUKHxKWCalc(in_ptr_base, filter_ptr0 + oc * in_channels * 49, @@ -296,17 +296,16 @@ void Conv2dNeonK7x7S1(const float *input, vst1q_f32(out_ptr0_base + out_offset, vo0); filter_ptr0 -= 49; } // w - } // h + } // h #else - Conv2dCPUKHxKWCalc(in_ptr_base, filter_ptr0, - in_width, 7, 7, out_height, out_width, - out_ptr0_base, 1); + Conv2dCPUKHxKWCalc(in_ptr_base, filter_ptr0, in_width, 7, 7, + out_height, out_width, out_ptr0_base, 1); #endif } // c - } // mm - } // if - } // m - } // b + } // mm + } // if + } // m + } // b } // Ho = 1, Wo = 4, Co = 4 @@ -372,8 +371,8 @@ void Conv2dNeonK7x7S2(const float *input, vvi0 = vld2q_f32(in_ptr_base + in_offset); // [8.10.12.14, 9.11.13.15] vvi1 = vld2q_f32(in_ptr_base + in_offset + 8); - vi0 = vvi0.val[0]; // [0.2.4.6] - vi1 = vvi0.val[1]; // [1.3.5.7] + vi0 = vvi0.val[0]; // [0.2.4.6] + vi1 = vvi0.val[1]; // [1.3.5.7] vi2 = vextq_f32(vi0, vvi1.val[0], 1); // [2.4.6.8] vi3 = vextq_f32(vi1, vvi1.val[1], 1); // [3.5.7.9] vi4 = vextq_f32(vi0, vvi1.val[0], 2); // [4.6.8.10] @@ -381,9 +380,9 @@ void Conv2dNeonK7x7S2(const float *input, vi6 = vextq_f32(vi0, vvi1.val[0], 3); // [6.8.10.12] #if defined(__aarch64__) - MACE_Conv2dArmv8NeonK7x7SnLoadCalc4; + MACE_Conv2dArmv8NeonK7x7SnLoadCalc4; #else - MACE_Conv2dArmv7NeonK7x7SnLoadCalc4; + MACE_Conv2dArmv7NeonK7x7SnLoadCalc4; #endif in_offset += in_width; @@ -403,7 +402,7 @@ void Conv2dNeonK7x7S2(const float *input, filter_ptr2 -= 49; filter_ptr3 -= 49; } // w - } // h + } // h #else for (index_t oc = 0; oc < 4; ++oc) { Conv2dCPUKHxKWCalc(in_ptr_base, filter_ptr0 + oc * in_channels * 49, @@ -441,8 +440,8 @@ void Conv2dNeonK7x7S2(const float *input, vvi0 = vld2q_f32(in_ptr_base + in_offset); // [8.10.12.14, 9.11.13.15] vvi1 = vld2q_f32(in_ptr_base + in_offset + 8); - vi0 = vvi0.val[0]; // [0.2.4.6] - vi1 = vvi0.val[1]; // [1.3.5.7] + vi0 = vvi0.val[0]; // [0.2.4.6] + vi1 = vvi0.val[1]; // [1.3.5.7] vi2 = vextq_f32(vi0, vvi1.val[0], 1); // [2.4.6.8] vi3 = vextq_f32(vi1, vvi1.val[1], 1); // [3.5.7.9] vi4 = vextq_f32(vi0, vvi1.val[0], 2); // [4.6.8.10] @@ -462,17 +461,16 @@ void Conv2dNeonK7x7S2(const float *input, vst1q_f32(out_ptr0_base + out_offset, vo0); filter_ptr0 -= 49; } // w - } // h + } // h #else - Conv2dCPUKHxKWCalc(in_ptr_base, filter_ptr0, - in_width, 7, 7, out_height, out_width, - out_ptr0_base, 2); + Conv2dCPUKHxKWCalc(in_ptr_base, filter_ptr0, in_width, 7, 7, + out_height, out_width, out_ptr0_base, 2); #endif } // c - } // mm - } // if - } // m - } // b + } // mm + } // if + } // m + } // b } // Ho = 1, Wo = 4, Co = 4 @@ -538,18 +536,18 @@ void Conv2dNeonK7x7S3(const float *input, vvi0 = vld3q_f32(in_ptr_base + in_offset); // [12.15.xx.xx, 13.xx.xx.xx, 14.xx.xx.xx] vvi1 = vld3q_f32(in_ptr_base + in_offset + 12); - vi0 = vvi0.val[0]; // [0.3.6.9] - vi1 = vvi0.val[1]; // [1.4.7.10] - vi2 = vvi0.val[2]; // [2.5.8.11] + vi0 = vvi0.val[0]; // [0.3.6.9] + vi1 = vvi0.val[1]; // [1.4.7.10] + vi2 = vvi0.val[2]; // [2.5.8.11] vi3 = vextq_f32(vi0, vvi1.val[0], 1); // [3.6.9.12] vi4 = vextq_f32(vi1, vvi1.val[1], 1); // [4.7.10.13] vi5 = vextq_f32(vi2, vvi1.val[2], 1); // [5.8.11.14] vi6 = vextq_f32(vi0, vvi1.val[0], 2); // [6.9.12.15] #if defined(__aarch64__) - MACE_Conv2dArmv8NeonK7x7SnLoadCalc4; + MACE_Conv2dArmv8NeonK7x7SnLoadCalc4; #else - MACE_Conv2dArmv7NeonK7x7SnLoadCalc4; + MACE_Conv2dArmv7NeonK7x7SnLoadCalc4; #endif in_offset += in_width; @@ -569,7 +567,7 @@ void Conv2dNeonK7x7S3(const float *input, filter_ptr2 -= 49; filter_ptr3 -= 49; } // w - } // h + } // h #else for (index_t oc = 0; oc < 4; ++oc) { Conv2dCPUKHxKWCalc(in_ptr_base, filter_ptr0 + oc * in_channels * 49, @@ -607,9 +605,9 @@ void Conv2dNeonK7x7S3(const float *input, vvi0 = vld3q_f32(in_ptr_base + in_offset); // [12.15.xx.xx, 13.xx.xx.xx, 14.xx.xx.xx] vvi1 = vld3q_f32(in_ptr_base + in_offset + 12); - vi0 = vvi0.val[0]; // [0.3.6.9] - vi1 = vvi0.val[1]; // [1.4.7.10] - vi2 = vvi0.val[2]; // [2.5.8.11] + vi0 = vvi0.val[0]; // [0.3.6.9] + vi1 = vvi0.val[1]; // [1.4.7.10] + vi2 = vvi0.val[2]; // [2.5.8.11] vi3 = vextq_f32(vi0, vvi1.val[0], 1); // [3.6.9.12] vi4 = vextq_f32(vi1, vvi1.val[1], 1); // [4.7.10.13] vi5 = vextq_f32(vi2, vvi1.val[2], 1); // [5.8.11.14] @@ -628,17 +626,16 @@ void Conv2dNeonK7x7S3(const float *input, vst1q_f32(out_ptr0_base + out_offset, vo0); filter_ptr0 -= 49; } // w - } // h + } // h #else - Conv2dCPUKHxKWCalc(in_ptr_base, filter_ptr0, - in_width, 7, 7, out_height, out_width, - out_ptr0_base, 3); + Conv2dCPUKHxKWCalc(in_ptr_base, filter_ptr0, in_width, 7, 7, + out_height, out_width, out_ptr0_base, 3); #endif } // c - } // mm - } // if - } // m - } // b + } // mm + } // if + } // m + } // b } } // namespace kernels diff --git a/mace/kernels/arm/conv_winograd.cc b/mace/kernels/arm/conv_winograd.cc index 6a3b520b..b074ced7 100644 --- a/mace/kernels/arm/conv_winograd.cc +++ b/mace/kernels/arm/conv_winograd.cc @@ -17,8 +17,8 @@ #include "mace/kernels/arm/conv_winograd.h" #include "mace/kernels/gemm.h" -#include "mace/utils/utils.h" #include "mace/utils/logging.h" +#include "mace/utils/utils.h" namespace mace { namespace kernels { @@ -44,14 +44,13 @@ void TransformInput4x4(const float *input, for (index_t h = 0; h < in_height - 2; h += 2) { for (index_t w = 0; w < in_width - 2; w += 2) { float d0, d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11, d12, d13, d14, - d15; + d15; float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, - s15; + s15; // load tile data - const float *input_ptr = - input + n * input_batch_size + c * in_height_width + h * in_width - + w; + const float *input_ptr = input + n * input_batch_size + + c * in_height_width + h * in_width + w; d0 = input_ptr[0]; d1 = input_ptr[1]; d2 = input_ptr[2]; @@ -92,7 +91,7 @@ void TransformInput4x4(const float *input, // store output float *output_ptr = - output + n * output_batch_size + c * tile_count + tile_index; + output + n * output_batch_size + c * tile_count + tile_index; output_ptr[0] = s0; output_ptr[1 * stride] = s1; output_ptr[2 * stride] = s2; @@ -166,9 +165,8 @@ void TransformInput8x8(const float *input, float s[8][8]; for (index_t h = 0; h < in_height - 2; h += 6) { for (index_t w = 0; w < in_width - 2; w += 6) { - const float *input_ptr = - input + n * input_batch_size + c * in_height_width + h * in_width - + w; + const float *input_ptr = input + n * input_batch_size + + c * in_height_width + h * in_width + w; for (int i = 0; i < 8; ++i) { float d0, d1, d2, d3, d4, d5, d6, d7; @@ -203,7 +201,7 @@ void TransformInput8x8(const float *input, } float *output_ptr = - output + n * output_batch_size + c * tile_count + tile_index; + output + n * output_batch_size + c * tile_count + tile_index; for (int i = 0; i < 8; ++i) { float d0, d1, d2, d3, d4, d5, d6, d7; d0 = s[0][i]; @@ -258,27 +256,18 @@ void BatchGemm(const float *input, const index_t out_stride = out_channels * tile_count; if (batch == 1) { - Gemm(filter, - input, - in_tile_area, - out_channels, - in_channels, - tile_count, + Gemm(filter, input, in_tile_area, out_channels, in_channels, tile_count, output); } else { #pragma omp parallel for collapse(2) for (int b = 0; b < batch; ++b) { for (int i = 0; i < in_tile_area; ++i) { - const float - *in_ptr = input + b * in_batch_size + i * in_stride; + const float *in_ptr = input + b * in_batch_size + i * in_stride; const float *filter_ptr = filter + i * filter_stride; float *out_ptr = output + b * out_batch_size + i * out_stride; - Gemm(filter_ptr, - in_ptr, - 1, - out_channels, /* rows */ - in_channels, /* K */ - tile_count, /* cols */ + Gemm(filter_ptr, in_ptr, 1, out_channels, /* rows */ + in_channels, /* K */ + tile_count, /* cols */ out_ptr); } } @@ -305,12 +294,12 @@ void TransformOutput4x4(const float *input, for (index_t h = 0; h < out_height; h += 2) { for (index_t w = 0; w < out_width; w += 2) { float d0, d1, d2, d3, d4, d5, d6, d7, d8, d9, d10, d11, d12, d13, d14, - d15; + d15; float s0, s1, s2, s3, s4, s5, s6, s7; float v0, v1, v2, v3; const float *input_ptr = - input + n * input_batch_size + m * tile_count + tile_offset; + input + n * input_batch_size + m * tile_count + tile_offset; d0 = input_ptr[0]; d1 = input_ptr[1 * stride]; d2 = input_ptr[2 * stride]; @@ -345,9 +334,8 @@ void TransformOutput4x4(const float *input, v2 = s2 - s4 - s6; v3 = s3 - s5 - s7; - float *output_ptr = - output + n * output_batch_size + m * out_image_size + h * out_width - + w; + float *output_ptr = output + n * output_batch_size + + m * out_image_size + h * out_width + w; output_ptr[0] = v0; output_ptr[1] = v1; output_ptr[out_width] = v2; @@ -403,7 +391,7 @@ void TransformOutput8x8(const float *input, for (index_t h = 0; h < out_height; h += 6) { for (index_t w = 0; w < out_width; w += 6) { const float *input_ptr = - input + n * input_batch_size + m * tile_count + tile_offset; + input + n * input_batch_size + m * tile_count + tile_offset; for (int i = 0; i < 8; ++i) { float d0, d1, d2, d3, d4, d5, d6, d7; @@ -433,9 +421,8 @@ void TransformOutput8x8(const float *input, input_ptr += 8 * stride; } - float *output_ptr = - output + n * output_batch_size + m * out_image_size + h * out_width - + w; + float *output_ptr = output + n * output_batch_size + + m * out_image_size + h * out_width + w; for (int i = 0; i < 6; ++i) { float d0, d1, d2, d3, d4, d5, d6, d7; @@ -471,7 +458,6 @@ void TransformOutput8x8(const float *input, } } // namespace - // OCHW => TOC // no need to optimize, it will exist in converter void TransformFilter4x4(const float *filter, @@ -485,7 +471,7 @@ void TransformFilter4x4(const float *filter, for (index_t c = 0; c < in_channels; ++c) { float g0, g1, g2, g3, g4, g5, g6, g7, g8; float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, - s15; + s15; // load filter index_t filter_offset = (m * in_channels + c) * 9; @@ -573,16 +559,14 @@ void TransformFilter8x8(const float *filter, float *output) { const index_t stride = out_channels * in_channels; - const float G[8][3] = { - {1.0f, 0.0f, 0.0f}, - {-2.0f / 9, -2.0f / 9, -2.0f / 9}, - {-2.0f / 9, 2.0f / 9, -2.0f / 9}, - {1.0f / 90, 1.0f / 45, 2.0f / 45}, - {1.0f / 90, -1.0f / 45, 2.0f / 45}, - {1.0f / 45, 1.0f / 90, 1.0f / 180}, - {1.0f / 45, -1.0f / 90, 1.0f / 180}, - {0.0f, 0.0f, 1.0f} - }; + const float G[8][3] = {{1.0f, 0.0f, 0.0f}, + {-2.0f / 9, -2.0f / 9, -2.0f / 9}, + {-2.0f / 9, 2.0f / 9, -2.0f / 9}, + {1.0f / 90, 1.0f / 45, 2.0f / 45}, + {1.0f / 90, -1.0f / 45, 2.0f / 45}, + {1.0f / 45, 1.0f / 90, 1.0f / 180}, + {1.0f / 45, -1.0f / 90, 1.0f / 180}, + {0.0f, 0.0f, 1.0f}}; #pragma omp parallel for collapse(2) for (index_t m = 0; m < out_channels; ++m) { @@ -612,7 +596,7 @@ void TransformFilter8x8(const float *filter, for (int i = 0; i < 8; ++i) { for (int j = 0; j < 8; ++j) { output[output_offset + (i * 8 + j) * stride] = - G[i][0] * s[0][j] + G[i][1] * s[1][j] + G[i][2] * s[2][j]; + G[i][0] * s[0][j] + G[i][1] * s[1][j] + G[i][2] * s[2][j]; } } } @@ -633,62 +617,38 @@ void WinoGradConv3x3s1(const float *input, index_t out_height = in_height - 2; index_t out_width = in_width - 2; index_t tile_height_count = - RoundUpDiv(out_height, static_cast(out_tile_size)); + RoundUpDiv(out_height, static_cast(out_tile_size)); index_t tile_width_count = - RoundUpDiv(out_width, static_cast(out_tile_size)); + RoundUpDiv(out_width, static_cast(out_tile_size)); index_t tile_count = tile_height_count * tile_width_count; switch (out_tile_size) { case 2: - TransformInput4x4(input, - batch, - in_height, - in_width, - in_channels, - tile_count, - transformed_input); + TransformInput4x4(input, batch, in_height, in_width, in_channels, + tile_count, transformed_input); break; case 6: - TransformInput8x8(input, - batch, - in_height, - in_width, - in_channels, - tile_count, - transformed_input); + TransformInput8x8(input, batch, in_height, in_width, in_channels, + tile_count, transformed_input); break; - default:MACE_NOT_IMPLEMENTED; + default: + MACE_NOT_IMPLEMENTED; } - BatchGemm(transformed_input, - transformed_filter, - batch, - in_channels, - out_channels, - tile_count, - out_tile_size, - transformed_output); + BatchGemm(transformed_input, transformed_filter, batch, in_channels, + out_channels, tile_count, out_tile_size, transformed_output); switch (out_tile_size) { case 2: - TransformOutput4x4(transformed_output, - batch, - out_height, - out_width, - out_channels, - tile_count, - output); + TransformOutput4x4(transformed_output, batch, out_height, out_width, + out_channels, tile_count, output); break; case 6: - TransformOutput8x8(transformed_output, - batch, - out_height, - out_width, - out_channels, - tile_count, - output); + TransformOutput8x8(transformed_output, batch, out_height, out_width, + out_channels, tile_count, output); break; - default:MACE_NOT_IMPLEMENTED; + default: + MACE_NOT_IMPLEMENTED; } } @@ -704,52 +664,39 @@ void WinoGradConv3x3s1(const float *input, index_t out_height = in_height - 2; index_t out_width = in_width - 2; index_t tile_height_count = - RoundUpDiv(out_height, static_cast(out_tile_size)); + RoundUpDiv(out_height, static_cast(out_tile_size)); index_t tile_width_count = - RoundUpDiv(out_width, static_cast(out_tile_size)); + RoundUpDiv(out_width, static_cast(out_tile_size)); index_t tile_count = tile_height_count * tile_width_count; index_t in_tile_area = (out_tile_size + 2) * (out_tile_size + 2); index_t transformed_input_size = - in_tile_area * batch * in_channels * tile_count; + in_tile_area * batch * in_channels * tile_count; index_t transformed_filter_size = in_tile_area * out_channels * in_channels; - index_t - transformed_output_size = in_tile_area * batch * out_channels * tile_count; + index_t transformed_output_size = + in_tile_area * batch * out_channels * tile_count; - float *transformed_input = new float[transformed_input_size]; // TNCB + float *transformed_input = new float[transformed_input_size]; // TNCB float *transformed_filter = new float[transformed_filter_size]; // TOC float *transformed_output = new float[transformed_output_size]; switch (out_tile_size) { case 2: - TransformFilter4x4(filter, - in_channels, - out_channels, - transformed_filter); + TransformFilter4x4(filter, in_channels, out_channels, transformed_filter); break; case 6: - TransformFilter8x8(filter, - in_channels, - out_channels, - transformed_filter); + TransformFilter8x8(filter, in_channels, out_channels, transformed_filter); break; - default:MACE_NOT_IMPLEMENTED; + default: + MACE_NOT_IMPLEMENTED; } - WinoGradConv3x3s1(input, - transformed_filter, - batch, - in_height, - in_width, - in_channels, - out_channels, - out_tile_size, - transformed_input, - transformed_output, - output); - - delete[]transformed_input; - delete[]transformed_filter; - delete[]transformed_output; + WinoGradConv3x3s1(input, transformed_filter, batch, in_height, in_width, + in_channels, out_channels, out_tile_size, transformed_input, + transformed_output, output); + + delete[] transformed_input; + delete[] transformed_filter; + delete[] transformed_output; } void ConvRef3x3s1(const float *input, @@ -769,7 +716,7 @@ void ConvRef3x3s1(const float *input, for (index_t h = 0; h < out_height; ++h) { for (index_t w = 0; w < out_width; ++w) { index_t out_offset = - ((b * out_channels + m) * out_height + h) * out_width + w; + ((b * out_channels + m) * out_height + h) * out_width + w; output[out_offset] = 0; for (index_t c = 0; c < in_channels; ++c) { for (index_t kh = 0; kh < 3; ++kh) { @@ -777,11 +724,10 @@ void ConvRef3x3s1(const float *input, index_t ih = h + kh; index_t iw = w + kw; index_t in_offset = - ((b * in_channels + c) * in_height + ih) * in_width + iw; - index_t - filter_offset = (((m * in_channels) + c) * 3 + kh) * 3 + kw; - output[out_offset] += - input[in_offset] * filter[filter_offset]; + ((b * in_channels + c) * in_height + ih) * in_width + iw; + index_t filter_offset = + (((m * in_channels) + c) * 3 + kh) * 3 + kw; + output[out_offset] += input[in_offset] * filter[filter_offset]; } } } diff --git a/mace/kernels/arm/conv_winograd_test.cc b/mace/kernels/arm/conv_winograd_test.cc index ee3223b5..166b67a5 100644 --- a/mace/kernels/arm/conv_winograd_test.cc +++ b/mace/kernels/arm/conv_winograd_test.cc @@ -13,13 +13,13 @@ // limitations under the License. #include -#include #include #include +#include -#include "mace/kernels/arm/conv_winograd.h" -#include "mace/core/types.h" #include "mace/core/tensor.h" +#include "mace/core/types.h" +#include "mace/kernels/arm/conv_winograd.h" namespace mace { namespace kernels { @@ -55,32 +55,18 @@ TEST(ConvWinogradTest, winograd) { std::random_device rd; std::mt19937 gen(rd()); std::normal_distribution nd(0, 1); - std::generate(input_data, input_data + input_size, - [&gen, &nd] { - return std::max(-1.0f, std::min(1.0f, nd(gen))); - }); - std::generate(filter_data, filter_data + filter_size, - [&gen, &nd] { - return std::max(-1.0f, std::min(1.0f, nd(gen))); - }); + std::generate(input_data, input_data + input_size, [&gen, &nd] { + return std::max(-1.0f, std::min(1.0f, nd(gen))); + }); + std::generate(filter_data, filter_data + filter_size, [&gen, &nd] { + return std::max(-1.0f, std::min(1.0f, nd(gen))); + }); - kernels::ConvRef3x3s1(input_data, - filter_data, - batch, - in_height, - in_width, - in_channels, - out_channels, - output_data_ref); + kernels::ConvRef3x3s1(input_data, filter_data, batch, in_height, in_width, + in_channels, out_channels, output_data_ref); - kernels::WinoGradConv3x3s1(input_data, - filter_data, - batch, - in_height, - in_width, - in_channels, - out_channels, - 6, + kernels::WinoGradConv3x3s1(input_data, filter_data, batch, in_height, + in_width, in_channels, out_channels, 6, output_data); // test diff --git a/mace/kernels/arm/depthwise_conv2d_neon.h b/mace/kernels/arm/depthwise_conv2d_neon.h index 119867bf..ec3fb360 100644 --- a/mace/kernels/arm/depthwise_conv2d_neon.h +++ b/mace/kernels/arm/depthwise_conv2d_neon.h @@ -32,15 +32,15 @@ void DepthwiseConv2dNeonK3x3S1(const float *input, float *output); void DepthwiseConv2dNeonK3x3S2(const float *input, - const float *filter, - const index_t *in_shape, - const index_t *out_shape, - const int *pad_hw, - const index_t valid_h_start, - const index_t valid_h_stop, - const index_t valid_w_start, - const index_t valid_w_stop, - float *output); + const float *filter, + const index_t *in_shape, + const index_t *out_shape, + const int *pad_hw, + const index_t valid_h_start, + const index_t valid_h_stop, + const index_t valid_w_start, + const index_t valid_w_stop, + float *output); } // namespace kernels } // namespace mace diff --git a/mace/kernels/arm/depthwise_conv2d_neon_3x3.cc b/mace/kernels/arm/depthwise_conv2d_neon_3x3.cc index 443e57b7..3a4491fb 100644 --- a/mace/kernels/arm/depthwise_conv2d_neon_3x3.cc +++ b/mace/kernels/arm/depthwise_conv2d_neon_3x3.cc @@ -16,8 +16,8 @@ #include #endif -#include "mace/kernels/arm/depthwise_conv2d_neon.h" #include "mace/core/macros.h" +#include "mace/kernels/arm/depthwise_conv2d_neon.h" namespace mace { namespace kernels { @@ -52,9 +52,9 @@ void DepthwiseConv2dPixel(const float *in_base, // Ho = 2, Wo = 4, Co = 1 void DepthwiseConv2dNeonK3x3S1(const float *input, const float *filter, - const index_t* in_shape, - const index_t* out_shape, - const int* pad_hw, + const index_t *in_shape, + const index_t *out_shape, + const int *pad_hw, const index_t valid_h_start, const index_t valid_h_stop, const index_t valid_w_start, @@ -88,18 +88,9 @@ void DepthwiseConv2dNeonK3x3S1(const float *input, // top for (h = 0; h < valid_h_start; ++h) { for (w = 0; w < out_shape[3]; ++w) { - DepthwiseConv2dPixel(in_base, - filter_ptr, - h, - w, - h - pad_top, - w - pad_left, - out_width, - in_height, - in_width, - 3, - 3, - out_base); + DepthwiseConv2dPixel(in_base, filter_ptr, h, w, h - pad_top, + w - pad_left, out_width, in_height, in_width, 3, + 3, out_base); } } @@ -113,30 +104,12 @@ void DepthwiseConv2dNeonK3x3S1(const float *input, for (h = valid_h_start; h + 1 < valid_h_stop; h += 2) { // left for (w = 0; w < valid_w_start; ++w) { - DepthwiseConv2dPixel(in_base, - filter_ptr, - h, - w, - h - pad_top, - w - pad_left, - out_width, - in_height, - in_width, - 3, - 3, - out_base); - DepthwiseConv2dPixel(in_base, - filter_ptr, - h + 1, - w, - h + 1 - pad_top, - w - pad_left, - out_width, - in_height, - in_width, - 3, - 3, - out_base); + DepthwiseConv2dPixel(in_base, filter_ptr, h, w, h - pad_top, + w - pad_left, out_width, in_height, in_width, 3, + 3, out_base); + DepthwiseConv2dPixel(in_base, filter_ptr, h + 1, w, h + 1 - pad_top, + w - pad_left, out_width, in_height, in_width, 3, + 3, out_base); } for (w = valid_w_start; w + 3 < valid_w_stop; w += 4) { @@ -227,47 +200,20 @@ void DepthwiseConv2dNeonK3x3S1(const float *input, // right for (; w < out_width; ++w) { - DepthwiseConv2dPixel(in_base, - filter_ptr, - h, - w, - h - pad_top, - w - pad_left, - out_width, - in_height, - in_width, - 3, - 3, - out_base); - DepthwiseConv2dPixel(in_base, - filter_ptr, - h + 1, - w, - h + 1 - pad_top, - w - pad_left, - out_width, - in_height, - in_width, - 3, - 3, - out_base); + DepthwiseConv2dPixel(in_base, filter_ptr, h, w, h - pad_top, + w - pad_left, out_width, in_height, in_width, 3, + 3, out_base); + DepthwiseConv2dPixel(in_base, filter_ptr, h + 1, w, h + 1 - pad_top, + w - pad_left, out_width, in_height, in_width, 3, + 3, out_base); } } // h #else for (index_t ih = valid_h_start; ih < valid_h_stop; ++ih) { for (index_t iw = 0; iw < out_shape[3]; ++iw) { - DepthwiseConv2dPixel(in_base, - filter_ptr, - ih, - iw, - ih - pad_top, - iw - pad_left, - out_width, - in_height, - in_width, - 3, - 3, - out_base); + DepthwiseConv2dPixel(in_base, filter_ptr, ih, iw, ih - pad_top, + iw - pad_left, out_width, in_height, in_width, 3, + 3, out_base); } } #endif @@ -275,29 +221,20 @@ void DepthwiseConv2dNeonK3x3S1(const float *input, // bottom for (; h < out_shape[2]; ++h) { for (w = 0; w < out_shape[3]; ++w) { - DepthwiseConv2dPixel(in_base, - filter_ptr, - h, - w, - h - pad_top, - w - pad_left, - out_width, - in_height, - in_width, - 3, - 3, - out_base); + DepthwiseConv2dPixel(in_base, filter_ptr, h, w, h - pad_top, + w - pad_left, out_width, in_height, in_width, 3, + 3, out_base); } } } // m - } // b + } // b } void DepthwiseConv2dNeonK3x3S2(const float *input, const float *filter, - const index_t* in_shape, - const index_t* out_shape, - const int* pad_hw, + const index_t *in_shape, + const index_t *out_shape, + const int *pad_hw, const index_t valid_h_start, const index_t valid_h_stop, const index_t valid_w_start, @@ -330,18 +267,9 @@ void DepthwiseConv2dNeonK3x3S2(const float *input, // top for (h = 0; h < valid_h_start; ++h) { for (w = 0; w < out_width; ++w) { - DepthwiseConv2dPixel(in_base, - filter_ptr, - h, - w, - h * 2 - pad_top, - w * 2 - pad_left, - out_width, - in_height, - in_width, - 3, - 3, - out_base); + DepthwiseConv2dPixel(in_base, filter_ptr, h, w, h * 2 - pad_top, + w * 2 - pad_left, out_width, in_height, in_width, + 3, 3, out_base); } } @@ -355,18 +283,9 @@ void DepthwiseConv2dNeonK3x3S2(const float *input, for (h = valid_h_start; h < valid_h_stop; ++h) { // left for (w = 0; w < valid_w_start; ++w) { - DepthwiseConv2dPixel(in_base, - filter_ptr, - h, - w, - h * 2 - pad_top, - w * 2 - pad_left, - out_width, - in_height, - in_width, - 3, - 3, - out_base); + DepthwiseConv2dPixel(in_base, filter_ptr, h, w, h * 2 - pad_top, + w * 2 - pad_left, out_width, in_height, in_width, + 3, 3, out_base); } for (w = valid_w_start; w + 3 < valid_w_stop; w += 4) { @@ -397,8 +316,8 @@ void DepthwiseConv2dNeonK3x3S2(const float *input, index_t out_offset = h * out_width + w; vo = vld1q_f32(out_base + out_offset); - vi00 = vi0.val[0]; // [0.2.4.6] - vi01 = vi0.val[1]; // [1.3.5.7] + vi00 = vi0.val[0]; // [0.2.4.6] + vi01 = vi0.val[1]; // [1.3.5.7] vi02 = vextq_f32(vi00, vi0n, 1); // [2.4.6.8] vi10 = vi1.val[0]; vi11 = vi1.val[1]; @@ -435,35 +354,17 @@ void DepthwiseConv2dNeonK3x3S2(const float *input, // right for (; w < out_width; ++w) { - DepthwiseConv2dPixel(in_base, - filter_ptr, - h, - w, - h * 2 - pad_top, - w * 2 - pad_left, - out_width, - in_height, - in_width, - 3, - 3, - out_base); + DepthwiseConv2dPixel(in_base, filter_ptr, h, w, h * 2 - pad_top, + w * 2 - pad_left, out_width, in_height, in_width, + 3, 3, out_base); } } // h #else for (index_t ih = valid_h_start; ih < valid_h_stop; ++ih) { for (index_t iw = 0; iw < out_width; ++iw) { - DepthwiseConv2dPixel(in_base, - filter_ptr, - ih, - iw, - ih * 2 - pad_top, - iw * 2 - pad_left, - out_width, - in_height, - in_width, - 3, - 3, - out_base); + DepthwiseConv2dPixel(in_base, filter_ptr, ih, iw, ih * 2 - pad_top, + iw * 2 - pad_left, out_width, in_height, + in_width, 3, 3, out_base); } } #endif @@ -471,22 +372,13 @@ void DepthwiseConv2dNeonK3x3S2(const float *input, // bottom for (; h < out_shape[2]; ++h) { for (w = 0; w < out_shape[3]; ++w) { - DepthwiseConv2dPixel(in_base, - filter_ptr, - h, - w, - h * 2 - pad_top, - w * 2 - pad_left, - out_width, - in_height, - in_width, - 3, - 3, - out_base); + DepthwiseConv2dPixel(in_base, filter_ptr, h, w, h * 2 - pad_top, + w * 2 - pad_left, out_width, in_height, in_width, + 3, 3, out_base); } } } // m - } // b + } // b } } // namespace kernels diff --git a/mace/kernels/channel_shuffle.h b/mace/kernels/channel_shuffle.h index 5310377b..920e1e1a 100644 --- a/mace/kernels/channel_shuffle.h +++ b/mace/kernels/channel_shuffle.h @@ -32,7 +32,7 @@ struct ChannelShuffleFunctor { Tensor *output, StatsFuture *future) { MACE_UNUSED(future); - MACE_FAILURE_RETURN(output->ResizeLike(input)); + MACE_RETURN_IF_ERROR(output->ResizeLike(input)); Tensor::MappingGuard logits_guard(input); Tensor::MappingGuard output_guard(output); diff --git a/mace/kernels/concat.h b/mace/kernels/concat.h index 6425f9f7..1728ca08 100644 --- a/mace/kernels/concat.h +++ b/mace/kernels/concat.h @@ -68,7 +68,7 @@ struct ConcatFunctor : ConcatFunctorBase { outer_sizes[i] = input->size() / inner_size; output_shape[axis_] += input->dim(axis_); } - MACE_FAILURE_RETURN(output->Resize(output_shape)); + MACE_RETURN_IF_ERROR(output->Resize(output_shape)); T *output_ptr = output->mutable_data(); diff --git a/mace/kernels/conv_2d.h b/mace/kernels/conv_2d.h index 99973a45..ecbd6608 100644 --- a/mace/kernels/conv_2d.h +++ b/mace/kernels/conv_2d.h @@ -296,7 +296,7 @@ struct Conv2dFunctor : Conv2dFunctorBase { RoundType::FLOOR, output_shape.data()); } - MACE_FAILURE_RETURN(output->Resize(output_shape)); + MACE_RETURN_IF_ERROR(output->Resize(output_shape)); index_t batch = output->dim(0); index_t channels = output->dim(1); @@ -497,7 +497,7 @@ struct Conv2dFunctor : Conv2dFunctorBase { if (is_filter_transformed_) { transformed_filter_ptr = filter_data; } else { - MACE_FAILURE_RETURN(transformed_filter_.Resize( + MACE_RETURN_IF_ERROR(transformed_filter_.Resize( transformed_filter_shape)); switch (winograd_out_tile_size) { case 2: @@ -644,7 +644,7 @@ struct Conv2dFunctor : Conv2dFunctorBase { const Tensor *pad_input_ptr = input; if (extra_input_height != input_height || extra_input_width != input_width) { - MACE_FAILURE_RETURN(ConstructNCHWInputWithSpecificPadding(input, + MACE_RETURN_IF_ERROR(ConstructNCHWInputWithSpecificPadding(input, pad_top, pad_bottom, pad_left, diff --git a/mace/kernels/conv_pool_2d_util.cc b/mace/kernels/conv_pool_2d_util.cc index 0d385401..2af765e3 100644 --- a/mace/kernels/conv_pool_2d_util.cc +++ b/mace/kernels/conv_pool_2d_util.cc @@ -306,7 +306,7 @@ MaceStatus ConstructNCHWInputWithPadding(const Tensor *input_tensor, const int padded_top = paddings[0] / 2; const int padded_left = paddings[1] / 2; - MACE_FAILURE_RETURN(output_tensor->Resize(output_shape)); + MACE_RETURN_IF_ERROR(output_tensor->Resize(output_shape)); Tensor::MappingGuard padded_output_mapper(output_tensor); float *output_data = output_tensor->mutable_data(); @@ -378,7 +378,7 @@ MaceStatus ConstructNCHWInputWithSpecificPadding(const Tensor *input_tensor, const int pad_width = pad_left + pad_right; std::vector output_shape( {batch, channels, height + pad_height, width + pad_width}); - MACE_FAILURE_RETURN(output_tensor->Resize(output_shape)); + MACE_RETURN_IF_ERROR(output_tensor->Resize(output_shape)); output_tensor->Clear(); Tensor::MappingGuard padded_output_mapper(output_tensor); float *output_data = output_tensor->mutable_data(); @@ -428,7 +428,7 @@ MaceStatus ConstructNHWCInputWithPadding(const Tensor *input_tensor, const int padded_top = paddings[0] / 2; const int padded_left = paddings[1] / 2; - MACE_FAILURE_RETURN(output_tensor->Resize(output_shape)); + MACE_RETURN_IF_ERROR(output_tensor->Resize(output_shape)); Tensor::MappingGuard padded_output_mapper(output_tensor); float *output_data = output_tensor->mutable_data(); diff --git a/mace/kernels/deconv_2d.h b/mace/kernels/deconv_2d.h index 7ccef5e9..e3080e6f 100644 --- a/mace/kernels/deconv_2d.h +++ b/mace/kernels/deconv_2d.h @@ -250,7 +250,7 @@ struct Deconv2dFunctor : Deconv2dFunctorBase { strides_, padding_type_, output_shape.data(), paddings_.data(), true); - MACE_FAILURE_RETURN(output->Resize(output_shape)); + MACE_RETURN_IF_ERROR(output->Resize(output_shape)); } else { output_shape_.clear(); output_shape_ = std::vector(4, 0); @@ -259,7 +259,7 @@ struct Deconv2dFunctor : Deconv2dFunctorBase { strides_, output_shape_.data(), paddings_.data(), true); - MACE_FAILURE_RETURN(output->Resize(output_shape_)); + MACE_RETURN_IF_ERROR(output->Resize(output_shape_)); } index_t kernel_h = filter->dim(2); index_t kernel_w = filter->dim(3); diff --git a/mace/kernels/depth_to_space.h b/mace/kernels/depth_to_space.h index cdd4a91f..c0e0f267 100644 --- a/mace/kernels/depth_to_space.h +++ b/mace/kernels/depth_to_space.h @@ -55,7 +55,7 @@ struct DepthToSpaceOpFunctor { std::vector output_shape = {batch_size, output_depth, output_height, output_width}; - MACE_FAILURE_RETURN(output->Resize(output_shape)); + MACE_RETURN_IF_ERROR(output->Resize(output_shape)); Tensor::MappingGuard logits_guard(input); Tensor::MappingGuard output_guard(output); diff --git a/mace/kernels/depthwise_conv2d.h b/mace/kernels/depthwise_conv2d.h index c864ceca..dd63be6f 100644 --- a/mace/kernels/depthwise_conv2d.h +++ b/mace/kernels/depthwise_conv2d.h @@ -161,7 +161,7 @@ struct DepthwiseConv2dFunctor RoundType::FLOOR, output_shape.data()); } - MACE_FAILURE_RETURN(output->Resize(output_shape)); + MACE_RETURN_IF_ERROR(output->Resize(output_shape)); output->Clear(); index_t batch = output->dim(0); diff --git a/mace/kernels/eltwise.h b/mace/kernels/eltwise.h index d5d0e77e..44a60e38 100644 --- a/mace/kernels/eltwise.h +++ b/mace/kernels/eltwise.h @@ -494,7 +494,7 @@ struct EltwiseFunctor: EltwiseFunctorBase { } } } - MACE_FAILURE_RETURN(output->ResizeLike(input0)); + MACE_RETURN_IF_ERROR(output->ResizeLike(input0)); Tensor::MappingGuard input0_guard(input0); Tensor::MappingGuard output_guard(output); diff --git a/mace/kernels/fully_connected.h b/mace/kernels/fully_connected.h index 005dd150..a6fbebd6 100644 --- a/mace/kernels/fully_connected.h +++ b/mace/kernels/fully_connected.h @@ -57,7 +57,7 @@ struct FullyConnectedFunctor: FullyConnectedBase { StatsFuture *future) { MACE_UNUSED(future); std::vector output_shape = {input->dim(0), weight->dim(0), 1, 1}; - MACE_FAILURE_RETURN(output->Resize(output_shape)); + MACE_RETURN_IF_ERROR(output->Resize(output_shape)); const index_t N = output->dim(0); const index_t input_size = weight->dim(1) * weight->dim(2) * weight->dim(3); const index_t output_size = weight->dim(0); diff --git a/mace/kernels/matmul.h b/mace/kernels/matmul.h index 303c34c9..dee53c4a 100644 --- a/mace/kernels/matmul.h +++ b/mace/kernels/matmul.h @@ -44,7 +44,7 @@ struct MatMulFunctor { StatsFuture *future) { MACE_UNUSED(future); std::vector c_shape = {A->dim(0), A->dim(1), B->dim(2), 1}; - MACE_FAILURE_RETURN(C->Resize(c_shape)); + MACE_RETURN_IF_ERROR(C->Resize(c_shape)); Tensor::MappingGuard guarda(A); Tensor::MappingGuard guardb(B); diff --git a/mace/kernels/opencl/activation.cc b/mace/kernels/opencl/activation.cc index 3288127a..edb67873 100644 --- a/mace/kernels/opencl/activation.cc +++ b/mace/kernels/opencl/activation.cc @@ -21,12 +21,12 @@ namespace mace { namespace kernels { -template -MaceStatus ActivationFunctor::operator()(const Tensor *input, - const Tensor *alpha, - Tensor *output, - StatsFuture *future) { +template +MaceStatus ActivationFunctor::operator()( + const Tensor *input, + const Tensor *alpha, + Tensor *output, + StatsFuture *future) { const index_t batch = input->dim(0); const index_t height = input->dim(1); const index_t width = input->dim(2); @@ -47,7 +47,7 @@ MaceStatus ActivationFunctor( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - kernel_error_->Allocate(1); + MACE_RETURN_IF_ERROR(kernel_error_->Allocate(1)); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -56,22 +56,28 @@ MaceStatus ActivationFunctorBuildKernel("activation", kernel_name, built_options); @@ -121,9 +127,7 @@ MaceStatus ActivationFunctor; -template -struct ActivationFunctor; +template struct ActivationFunctor; +template struct ActivationFunctor; } // namespace kernels } // namespace mace diff --git a/mace/kernels/opencl/addn.cc b/mace/kernels/opencl/addn.cc index 5325f9cd..d7270a6e 100644 --- a/mace/kernels/opencl/addn.cc +++ b/mace/kernels/opencl/addn.cc @@ -59,7 +59,7 @@ MaceStatus AddNFunctor::operator()( built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - kernel_error_->Allocate(1); + MACE_RETURN_IF_ERROR(kernel_error_->Allocate(1)); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -71,7 +71,7 @@ MaceStatus AddNFunctor::operator()( kernel_ = runtime->BuildKernel("addn", kernel_name, built_options); kwg_size_ = - static_cast(runtime->GetKernelMaxWorkGroupSize(kernel_)); + static_cast(runtime->GetKernelMaxWorkGroupSize(kernel_)); } std::vector output_shape = input_tensors[0]->shape(); @@ -87,13 +87,13 @@ MaceStatus AddNFunctor::operator()( std::vector output_image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &output_image_shape); - MACE_FAILURE_RETURN(output_tensor->ResizeImage(output_shape, - output_image_shape)); + MACE_RETURN_IF_ERROR( + output_tensor->ResizeImage(output_shape, output_image_shape)); uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel_.setArg(idx++, - *(static_cast(kernel_error_->buffer()))); + *(static_cast(kernel_error_->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel_.setArg(idx++, gws[0]); diff --git a/mace/kernels/opencl/batch_norm.cc b/mace/kernels/opencl/batch_norm.cc index 01bb3399..76f246df 100644 --- a/mace/kernels/opencl/batch_norm.cc +++ b/mace/kernels/opencl/batch_norm.cc @@ -23,14 +23,15 @@ namespace mace { namespace kernels { template -MaceStatus BatchNormFunctor::operator()(const Tensor *input, - const Tensor *scale, - const Tensor *offset, - const Tensor *mean, - const Tensor *var, - const float epsilon, - Tensor *output, - StatsFuture *future) { +MaceStatus BatchNormFunctor::operator()( + const Tensor *input, + const Tensor *scale, + const Tensor *offset, + const Tensor *mean, + const Tensor *var, + const float epsilon, + Tensor *output, + StatsFuture *future) { MACE_CHECK(folded_constant_ || (mean != nullptr && var != nullptr)); const index_t batch = input->dim(0); @@ -57,7 +58,7 @@ MaceStatus BatchNormFunctor::operator()(const Tensor *input, built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - kernel_error_->Allocate(1); + MACE_RETURN_IF_ERROR(kernel_error_->Allocate(1)); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -96,7 +97,7 @@ MaceStatus BatchNormFunctor::operator()(const Tensor *input, uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel_.setArg(idx++, - *(static_cast(kernel_error_->buffer()))); + *(static_cast(kernel_error_->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel_.setArg(idx++, gws[0]); diff --git a/mace/kernels/opencl/bias_add.cc b/mace/kernels/opencl/bias_add.cc index 136cd114..63fd1033 100644 --- a/mace/kernels/opencl/bias_add.cc +++ b/mace/kernels/opencl/bias_add.cc @@ -23,9 +23,9 @@ namespace kernels { template MaceStatus BiasAddFunctor::operator()(const Tensor *input, - const Tensor *bias, - Tensor *output, - StatsFuture *future) { + const Tensor *bias, + Tensor *output, + StatsFuture *future) { const index_t batch = input->dim(0); const index_t height = input->dim(1); const index_t width = input->dim(2); @@ -50,7 +50,7 @@ MaceStatus BiasAddFunctor::operator()(const Tensor *input, built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - kernel_error_->Allocate(1); + MACE_RETURN_IF_ERROR(kernel_error_->Allocate(1)); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -67,7 +67,7 @@ MaceStatus BiasAddFunctor::operator()(const Tensor *input, uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel_.setArg(idx++, - *(static_cast(kernel_error_->buffer()))); + *(static_cast(kernel_error_->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel_.setArg(idx++, gws[0]); @@ -91,8 +91,7 @@ MaceStatus BiasAddFunctor::operator()(const Tensor *input, } else { std::vector roundup_gws(lws.size()); for (size_t i = 0; i < lws.size(); ++i) { - if (lws[i] != 0) - roundup_gws[i] = RoundUp(gws[i], lws[i]); + if (lws[i] != 0) roundup_gws[i] = RoundUp(gws[i], lws[i]); } error = runtime->command_queue().enqueueNDRangeKernel( diff --git a/mace/kernels/opencl/buffer_to_image.cc b/mace/kernels/opencl/buffer_to_image.cc index 7a23ad89..c21d67a1 100644 --- a/mace/kernels/opencl/buffer_to_image.cc +++ b/mace/kernels/opencl/buffer_to_image.cc @@ -25,14 +25,13 @@ MaceStatus BufferToImageFunctor::operator()( const BufferType type, Tensor *image, StatsFuture *future) { - std::vector image_shape; CalImage2DShape(buffer->shape(), type, &image_shape); if (type == WINOGRAD_FILTER) { std::vector new_shape = CalWinogradShape(buffer->shape(), type); - MACE_FAILURE_RETURN(image->ResizeImage(new_shape, image_shape)); + MACE_RETURN_IF_ERROR(image->ResizeImage(new_shape, image_shape)); } else { - MACE_FAILURE_RETURN(image->ResizeImage(buffer->shape(), image_shape)); + MACE_RETURN_IF_ERROR(image->ResizeImage(buffer->shape(), image_shape)); } uint32_t gws[2] = {static_cast(image_shape[0]), @@ -94,7 +93,7 @@ MaceStatus BufferToImageFunctor::operator()( if (!kernel_error_) { kernel_error_ = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - kernel_error_->Allocate(1); + MACE_RETURN_IF_ERROR(kernel_error_->Allocate(1)); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -107,7 +106,7 @@ MaceStatus BufferToImageFunctor::operator()( uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { b2f_kernel.setArg(idx++, - *(static_cast(kernel_error_->buffer()))); + *(static_cast(kernel_error_->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { b2f_kernel.setArg(idx++, gws[0]); @@ -120,8 +119,7 @@ MaceStatus BufferToImageFunctor::operator()( static_cast(buffer->buffer_offset() / GetEnumTypeSize(buffer->dtype()))); if (type == CONV2D_FILTER) { - const index_t inner_size = - buffer->dim(1) * buffer->dim(2) * buffer->dim(3); + const index_t inner_size = buffer->dim(1) * buffer->dim(2) * buffer->dim(3); b2f_kernel.setArg(idx++, static_cast(buffer->dim(0))); b2f_kernel.setArg(idx++, static_cast(buffer->dim(2))); b2f_kernel.setArg(idx++, static_cast(buffer->dim(3))); diff --git a/mace/kernels/opencl/channel_shuffle.cc b/mace/kernels/opencl/channel_shuffle.cc index d6715e1f..63e47267 100644 --- a/mace/kernels/opencl/channel_shuffle.cc +++ b/mace/kernels/opencl/channel_shuffle.cc @@ -16,18 +16,16 @@ #include "mace/core/runtime/opencl/cl2_header.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/kernels/opencl/helper.h" -#include "mace/utils/utils.h" #include "mace/utils/tuner.h" +#include "mace/utils/utils.h" namespace mace { namespace kernels { template MaceStatus ChannelShuffleFunctor::operator()( - const Tensor *input, - Tensor *output, - StatsFuture *future) { - MACE_FAILURE_RETURN(output->ResizeLike(input)); + const Tensor *input, Tensor *output, StatsFuture *future) { + MACE_RETURN_IF_ERROR(output->ResizeLike(input)); const index_t batch = input->dim(0); const index_t height = input->dim(1); @@ -36,8 +34,7 @@ MaceStatus ChannelShuffleFunctor::operator()( const index_t channels_per_group = channels / groups_; MACE_CHECK(channels_per_group % 4 == 0, "channels per group must be multiple of 4"); - MACE_CHECK(groups_ % 4 == 0, - "groups must be multiple of 4"); + MACE_CHECK(groups_ % 4 == 0, "groups must be multiple of 4"); const index_t group_channel_blocks = RoundUpDiv4(channels_per_group); const uint32_t gws[3] = {static_cast(group_channel_blocks), @@ -57,7 +54,7 @@ MaceStatus ChannelShuffleFunctor::operator()( built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - kernel_error_->Allocate(1); + MACE_RETURN_IF_ERROR(kernel_error_->Allocate(1)); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -65,8 +62,8 @@ MaceStatus ChannelShuffleFunctor::operator()( if (runtime->IsNonUniformWorkgroupsSupported()) { built_options.emplace("-DNON_UNIFORM_WORK_GROUP"); } - kernel_ = runtime->BuildKernel("channel_shuffle", kernel_name, - built_options); + kernel_ = + runtime->BuildKernel("channel_shuffle", kernel_name, built_options); kwg_size_ = static_cast(runtime->GetKernelMaxWorkGroupSize(kernel_)); @@ -76,7 +73,7 @@ MaceStatus ChannelShuffleFunctor::operator()( uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel_.setArg(idx++, - *(static_cast(kernel_error_->buffer()))); + *(static_cast(kernel_error_->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel_.setArg(idx++, gws[0]); @@ -93,8 +90,8 @@ MaceStatus ChannelShuffleFunctor::operator()( const std::vector lws = Default3DLocalWS(gws, kwg_size_); std::string tuning_key = - Concat("channel_shuffle_opencl_kernel", output->dim(0), - output->dim(1), output->dim(2), output->dim(3)); + Concat("channel_shuffle_opencl_kernel", output->dim(0), output->dim(1), + output->dim(2), output->dim(3)); TuningOrRun3DKernel(kernel_, tuning_key, gws, lws, future); if (runtime->IsOutOfRangeCheckEnabled()) { @@ -107,9 +104,7 @@ MaceStatus ChannelShuffleFunctor::operator()( return MACE_SUCCESS; } -template -struct ChannelShuffleFunctor; -template -struct ChannelShuffleFunctor; +template struct ChannelShuffleFunctor; +template struct ChannelShuffleFunctor; } // namespace kernels } // namespace mace diff --git a/mace/kernels/opencl/concat.cc b/mace/kernels/opencl/concat.cc index 32343c11..4abcbcfe 100644 --- a/mace/kernels/opencl/concat.cc +++ b/mace/kernels/opencl/concat.cc @@ -22,11 +22,9 @@ namespace mace { namespace kernels { namespace { -std::vector LocalWS(const uint32_t *gws, - const uint32_t kwg_size) { +std::vector LocalWS(const uint32_t *gws, const uint32_t kwg_size) { std::vector lws(4, 0); - uint64_t cache_size = - OpenCLRuntime::Global()->device_global_mem_cache_size(); + uint64_t cache_size = OpenCLRuntime::Global()->device_global_mem_cache_size(); uint32_t base = cache_size / kBaseGPUMemCacheSize; lws[1] = std::min(gws[1], kwg_size); lws[0] = std::min(base, kwg_size / lws[1]); @@ -37,16 +35,15 @@ std::vector LocalWS(const uint32_t *gws, } // namespace - -static void Concat2(cl::Kernel *kernel, - const Tensor *input0, - const Tensor *input1, - const DataType dt, - std::vector *prev_input_shape, - Tensor *output, - StatsFuture *future, - uint32_t *kwg_size, - std::unique_ptr *kernel_error) { +static MaceStatus Concat2(cl::Kernel *kernel, + const Tensor *input0, + const Tensor *input1, + const DataType dt, + std::vector *prev_input_shape, + Tensor *output, + StatsFuture *future, + uint32_t *kwg_size, + std::unique_ptr *kernel_error) { const index_t batch = output->dim(0); const index_t height = output->dim(1); const index_t width = output->dim(2); @@ -67,8 +64,8 @@ static void Concat2(cl::Kernel *kernel, if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( - new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - (*kernel_error)->Allocate(1); + new Buffer(GetDeviceAllocator(DeviceType::GPU)))); + MACE_RETURN_IF_ERROR((*kernel_error)->Allocate(1)); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); @@ -95,7 +92,7 @@ static void Concat2(cl::Kernel *kernel, uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel->setArg(idx++, - *(static_cast((*kernel_error)->buffer()))); + *(static_cast((*kernel_error)->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel->setArg(idx++, gws[0]); @@ -115,8 +112,8 @@ static void Concat2(cl::Kernel *kernel, const std::vector lws = LocalWS(gws, *kwg_size); std::string tuning_key = - Concat("concat_opencl_kernel", output->dim(0), - output->dim(1), output->dim(2), output->dim(3)); + Concat("concat_opencl_kernel", output->dim(0), output->dim(1), + output->dim(2), output->dim(3)); TuningOrRun3DKernel(*kernel, tuning_key, gws, lws, future); if (runtime->IsOutOfRangeCheckEnabled()) { @@ -125,15 +122,17 @@ static void Concat2(cl::Kernel *kernel, MACE_CHECK(*kerror_code == 0) << "Kernel error code: " << *kerror_code; (*kernel_error)->UnMap(); } + + return MACE_SUCCESS; } -static void ConcatN(cl::Kernel *kernel, - const std::vector &input_list, - const DataType dt, - Tensor *output, - StatsFuture *future, - uint32_t *kwg_size, - std::unique_ptr *kernel_error) { +static MaceStatus ConcatN(cl::Kernel *kernel, + const std::vector &input_list, + const DataType dt, + Tensor *output, + StatsFuture *future, + uint32_t *kwg_size, + std::unique_ptr *kernel_error) { const index_t batch = output->dim(0); const index_t height = output->dim(1); const index_t width = output->dim(2); @@ -150,7 +149,7 @@ static void ConcatN(cl::Kernel *kernel, built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - (*kernel_error)->Allocate(1); + MACE_RETURN_IF_ERROR((*kernel_error)->Allocate(1)); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); @@ -179,7 +178,7 @@ static void ConcatN(cl::Kernel *kernel, uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel->setArg(idx++, - *(static_cast((*kernel_error)->buffer()))); + *(static_cast((*kernel_error)->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel->setArg(idx++, gws[0]); @@ -218,8 +217,8 @@ static void ConcatN(cl::Kernel *kernel, if (runtime->is_profiling_enabled()) { CallStats tmp_stats; runtime->GetCallStats(event, &tmp_stats); - call_stats.start_micros = std::min(tmp_stats.start_micros, - call_stats.start_micros); + call_stats.start_micros = + std::min(tmp_stats.start_micros, call_stats.start_micros); call_stats.end_micros += tmp_stats.end_micros - tmp_stats.start_micros; } } @@ -232,6 +231,8 @@ static void ConcatN(cl::Kernel *kernel, } }; } + + return MACE_SUCCESS; } template @@ -266,17 +267,17 @@ MaceStatus ConcatFunctor::operator()( "Dimensions of inputs should be divisible by 4 when inputs_count > 2."); std::vector image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &image_shape); - MACE_FAILURE_RETURN(output->ResizeImage(output_shape, image_shape)); + MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, image_shape)); switch (inputs_count) { case 2: - Concat2(&kernel_, input_list[0], input_list[1], DataTypeToEnum::value, - &input_shape_, output, future, &kwg_size_, &kernel_error_); - break; + return Concat2(&kernel_, input_list[0], input_list[1], + DataTypeToEnum::value, &input_shape_, output, future, + &kwg_size_, &kernel_error_); default: if (divisible_four) { - ConcatN(&kernel_, input_list, DataTypeToEnum::value, output, future, - &kwg_size_, &kernel_error_); + return ConcatN(&kernel_, input_list, DataTypeToEnum::value, output, + future, &kwg_size_, &kernel_error_); } else { MACE_NOT_IMPLEMENTED; } diff --git a/mace/kernels/opencl/conv_2d.cc b/mace/kernels/opencl/conv_2d.cc index ce15dad0..d7706cb5 100644 --- a/mace/kernels/opencl/conv_2d.cc +++ b/mace/kernels/opencl/conv_2d.cc @@ -18,61 +18,61 @@ namespace mace { namespace kernels { -extern void Conv2dOpenclK1x1(cl::Kernel *kernel, - const Tensor *input, - const Tensor *filter, - const Tensor *bias, - const int stride, - const int *padding, - const int *dilations, - const ActivationType activation, - const float relux_max_limit, - const DataType dt, - std::vector *prev_input_shape, - Tensor *output, - StatsFuture *future, - uint32_t *kwg_size, - std::unique_ptr *kernel_error); +extern MaceStatus Conv2dOpenclK1x1(cl::Kernel *kernel, + const Tensor *input, + const Tensor *filter, + const Tensor *bias, + const int stride, + const int *padding, + const int *dilations, + const ActivationType activation, + const float relux_max_limit, + const DataType dt, + std::vector *prev_input_shape, + Tensor *output, + StatsFuture *future, + uint32_t *kwg_size, + std::unique_ptr *kernel_error); -extern void Conv2dOpenclK3x3(cl::Kernel *kernel, - const Tensor *input, - const Tensor *filter, - const Tensor *bias, - const int stride, - const int *padding, - const int *dilations, - const ActivationType activation, - const float relux_max_limit, - const DataType dt, - std::vector *prev_input_shape, - Tensor *output, - StatsFuture *future, - uint32_t *kwg_size, - std::unique_ptr *kernel_error); +extern MaceStatus Conv2dOpenclK3x3(cl::Kernel *kernel, + const Tensor *input, + const Tensor *filter, + const Tensor *bias, + const int stride, + const int *padding, + const int *dilations, + const ActivationType activation, + const float relux_max_limit, + const DataType dt, + std::vector *prev_input_shape, + Tensor *output, + StatsFuture *future, + uint32_t *kwg_size, + std::unique_ptr *kernel_error); -extern void Conv2dOpencl(cl::Kernel *kernel, - const Tensor *input, - const Tensor *filter, - const Tensor *bias, - const int stride, - const int *padding, - const int *dilations, - const ActivationType activation, - const float relux_max_limit, - const DataType dt, - std::vector *prev_input_shape, - Tensor *output, - StatsFuture *future, - uint32_t *kwg_size, - std::unique_ptr *kernel_error); +extern MaceStatus Conv2dOpencl(cl::Kernel *kernel, + const Tensor *input, + const Tensor *filter, + const Tensor *bias, + const int stride, + const int *padding, + const int *dilations, + const ActivationType activation, + const float relux_max_limit, + const DataType dt, + std::vector *prev_input_shape, + Tensor *output, + StatsFuture *future, + uint32_t *kwg_size, + std::unique_ptr *kernel_error); template MaceStatus Conv2dFunctor::operator()(const Tensor *input, - const Tensor *filter, - const Tensor *bias, - Tensor *output, - StatsFuture *future) { - typedef void (*Conv2dOpenclFunction)( + const Tensor *filter, + const Tensor *bias, + Tensor *output, + StatsFuture *future) { + typedef MaceStatus (*Conv2dOpenclFunction)( cl::Kernel * kernel, const Tensor *input, const Tensor *filter, const Tensor *bias, const int stride, const int *padding, const int *dilations, const ActivationType activation, @@ -111,23 +111,21 @@ MaceStatus Conv2dFunctor::operator()(const Tensor *input, std::vector output_image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &output_image_shape); - MACE_FAILURE_RETURN(output->ResizeImage(output_shape, output_image_shape)); + MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape)); if (kernel_h == kernel_w && kernel_h <= 5 && selector[kernel_h - 1] != nullptr) { auto conv2d_func = selector[kernel_h - 1]; - conv2d_func(&kernel_, input, filter, bias, strides_[0], paddings.data(), - dilations_, activation_, relux_max_limit_, - DataTypeToEnum::value, &input_shape_, output, future, - &kwg_size_, &kernel_error_); + return conv2d_func( + &kernel_, input, filter, bias, strides_[0], paddings.data(), dilations_, + activation_, relux_max_limit_, DataTypeToEnum::value, &input_shape_, + output, future, &kwg_size_, &kernel_error_); } else { - Conv2dOpencl(&kernel_, input, filter, bias, strides_[0], paddings.data(), - dilations_, activation_, relux_max_limit_, - DataTypeToEnum::value, &input_shape_, output, future, - &kwg_size_, &kernel_error_); + return Conv2dOpencl( + &kernel_, input, filter, bias, strides_[0], paddings.data(), dilations_, + activation_, relux_max_limit_, DataTypeToEnum::value, &input_shape_, + output, future, &kwg_size_, &kernel_error_); } - - return MACE_SUCCESS; } template struct Conv2dFunctor; diff --git a/mace/kernels/opencl/conv_2d_1x1.cc b/mace/kernels/opencl/conv_2d_1x1.cc index 5b79ea66..3f9596df 100644 --- a/mace/kernels/opencl/conv_2d_1x1.cc +++ b/mace/kernels/opencl/conv_2d_1x1.cc @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/kernels/conv_2d.h" #include "mace/core/runtime/opencl/opencl_runtime.h" +#include "mace/kernels/conv_2d.h" #include "mace/kernels/opencl/helper.h" #include "mace/utils/tuner.h" @@ -25,11 +25,9 @@ namespace { const uint32_t kernel_cache_size = (4 + 4 + 4) * 4 * 4; // TODO(liuqi): Fix the specific value. const uint32_t lws_limit = 128; -std::vector LocalWS(const uint32_t *gws, - const uint32_t kwg_size) { +std::vector LocalWS(const uint32_t *gws, const uint32_t kwg_size) { std::vector lws(4, 0); - uint64_t cache_size = - OpenCLRuntime::Global()->device_global_mem_cache_size(); + uint64_t cache_size = OpenCLRuntime::Global()->device_global_mem_cache_size(); uint32_t compute_units = OpenCLRuntime::Global()->device_compute_units(); uint32_t base = cache_size / kBaseGPUMemCacheSize; lws[1] = std::min(gws[1], kwg_size); @@ -46,8 +44,7 @@ std::vector LocalWS(const uint32_t *gws, lws[0] = std::min(lws[0], kwg_size / lws[1]); const uint32_t lws_size = lws[0] * lws[1]; lws[2] = std::min( - (cache_size / kernel_cache_size / lws_size / compute_units) * 8, - gws[2]); + (cache_size / kernel_cache_size / lws_size / compute_units) * 8, gws[2]); if (lws[2] == 0) { lws[2] = std::min(gws[2], base); } @@ -57,21 +54,21 @@ std::vector LocalWS(const uint32_t *gws, } // namespace -extern void Conv2dOpenclK1x1(cl::Kernel *kernel, - const Tensor *input, - const Tensor *filter, - const Tensor *bias, - const int stride, - const int *padding, - const int *dilations, - const ActivationType activation, - const float relux_max_limit, - const DataType dt, - std::vector *prev_input_shape, - Tensor *output, - StatsFuture *future, - uint32_t *kwg_size, - std::unique_ptr *kernel_error) { +extern MaceStatus Conv2dOpenclK1x1(cl::Kernel *kernel, + const Tensor *input, + const Tensor *filter, + const Tensor *bias, + const int stride, + const int *padding, + const int *dilations, + const ActivationType activation, + const float relux_max_limit, + const DataType dt, + std::vector *prev_input_shape, + Tensor *output, + StatsFuture *future, + uint32_t *kwg_size, + std::unique_ptr *kernel_error) { MACE_UNUSED(padding); MACE_UNUSED(dilations); const index_t batch = output->dim(0); @@ -101,7 +98,7 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - (*kernel_error)->Allocate(1); + MACE_RETURN_IF_ERROR((*kernel_error)->Allocate(1)); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); @@ -145,7 +142,7 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel->setArg(idx++, - *(static_cast((*kernel_error)->buffer()))); + *(static_cast((*kernel_error)->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel->setArg(idx++, gws[0]); @@ -172,8 +169,8 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, std::vector lws = LocalWS(gws, *kwg_size); std::string tuning_key = - Concat("conv2d_1x1_opencl_kernel", output->dim(0), - output->dim(1), output->dim(2), output->dim(3)); + Concat("conv2d_1x1_opencl_kernel", output->dim(0), output->dim(1), + output->dim(2), output->dim(3)); TuningOrRun3DKernel(*kernel, tuning_key, gws, lws, future); if (runtime->IsOutOfRangeCheckEnabled()) { @@ -182,6 +179,8 @@ extern void Conv2dOpenclK1x1(cl::Kernel *kernel, MACE_CHECK(*kerror_code == 0) << "Kernel error code: " << *kerror_code; (*kernel_error)->UnMap(); } + + return MACE_SUCCESS; } } // namespace kernels diff --git a/mace/kernels/opencl/conv_2d_3x3.cc b/mace/kernels/opencl/conv_2d_3x3.cc index 5386c417..0da37fc1 100644 --- a/mace/kernels/opencl/conv_2d_3x3.cc +++ b/mace/kernels/opencl/conv_2d_3x3.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/kernels/conv_2d.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/kernels/activation.h" +#include "mace/kernels/conv_2d.h" #include "mace/kernels/opencl/helper.h" #include "mace/utils/tuner.h" #include "mace/utils/utils.h" @@ -24,22 +24,20 @@ namespace kernels { namespace { // (inputs + weights + outputs) * array_size * sizeof(float) const uint32_t kernel_cache_size = (5 + 4 + 5) * 4 * 4; -std::vector LocalWS(const uint32_t *gws, - const uint32_t kwg_size) { +std::vector LocalWS(const uint32_t *gws, const uint32_t kwg_size) { std::vector lws(4, 0); - uint64_t cache_size = - OpenCLRuntime::Global()->device_global_mem_cache_size(); + uint64_t cache_size = OpenCLRuntime::Global()->device_global_mem_cache_size(); uint32_t compute_units = std::max( OpenCLRuntime::Global()->device_compute_units() / 2, 1); - const uint32_t base = std::min(cache_size / kBaseGPUMemCacheSize, - 4); + const uint32_t base = + std::min(cache_size / kBaseGPUMemCacheSize, 4); lws[1] = std::min(gws[1], kwg_size); - lws[0] = std::min(std::min(gws[0], base), - kwg_size / lws[1]); + lws[0] = + std::min(std::min(gws[0], base), kwg_size / lws[1]); const uint32_t lws_size = lws[0] * lws[1]; lws[2] = std::min( - RoundUp(cache_size / kernel_cache_size / - lws_size / compute_units, base), + RoundUp( + cache_size / kernel_cache_size / lws_size / compute_units, base), gws[2]); if (lws[2] == 0) { lws[2] = std::min(gws[2], base); @@ -50,21 +48,21 @@ std::vector LocalWS(const uint32_t *gws, } // namespace -extern void Conv2dOpenclK3x3(cl::Kernel *kernel, - const Tensor *input, - const Tensor *filter, - const Tensor *bias, - const int stride, - const int *padding, - const int *dilations, - const ActivationType activation, - const float relux_max_limit, - const DataType dt, - std::vector *prev_input_shape, - Tensor *output, - StatsFuture *future, - uint32_t *kwg_size, - std::unique_ptr *kernel_error) { +extern MaceStatus Conv2dOpenclK3x3(cl::Kernel *kernel, + const Tensor *input, + const Tensor *filter, + const Tensor *bias, + const int stride, + const int *padding, + const int *dilations, + const ActivationType activation, + const float relux_max_limit, + const DataType dt, + std::vector *prev_input_shape, + Tensor *output, + StatsFuture *future, + uint32_t *kwg_size, + std::unique_ptr *kernel_error) { const index_t batch = output->dim(0); const index_t height = output->dim(1); const index_t width = output->dim(2); @@ -87,7 +85,7 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel, built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - (*kernel_error)->Allocate(1); + MACE_RETURN_IF_ERROR((*kernel_error)->Allocate(1)); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); @@ -129,7 +127,7 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel, uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel->setArg(idx++, - *(static_cast((*kernel_error)->buffer()))); + *(static_cast((*kernel_error)->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel->setArg(idx++, gws[0]); @@ -159,8 +157,8 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel, std::vector lws = LocalWS(gws, *kwg_size); std::string tuning_key = - Concat("conv2d_3x3_opencl_kernel", output->dim(0), - output->dim(1), output->dim(2), output->dim(3)); + Concat("conv2d_3x3_opencl_kernel", output->dim(0), output->dim(1), + output->dim(2), output->dim(3)); TuningOrRun3DKernel(*kernel, tuning_key, gws, lws, future); if (runtime->IsOutOfRangeCheckEnabled()) { @@ -169,6 +167,8 @@ extern void Conv2dOpenclK3x3(cl::Kernel *kernel, MACE_CHECK(*kerror_code == 0) << "Kernel error code: " << *kerror_code; (*kernel_error)->UnMap(); } + + return MACE_SUCCESS; } } // namespace kernels diff --git a/mace/kernels/opencl/conv_2d_general.cc b/mace/kernels/opencl/conv_2d_general.cc index e44d8981..127df7e9 100644 --- a/mace/kernels/opencl/conv_2d_general.cc +++ b/mace/kernels/opencl/conv_2d_general.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "mace/kernels/conv_2d.h" #include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/kernels/activation.h" +#include "mace/kernels/conv_2d.h" #include "mace/kernels/opencl/helper.h" #include "mace/utils/tuner.h" #include "mace/utils/utils.h" @@ -30,8 +30,7 @@ std::vector LocalWS(const uint32_t *gws, const uint32_t kernel_size, const uint32_t kwg_size) { std::vector lws(4, 0); - uint64_t cache_size = - OpenCLRuntime::Global()->device_global_mem_cache_size(); + uint64_t cache_size = OpenCLRuntime::Global()->device_global_mem_cache_size(); uint32_t compute_units = OpenCLRuntime::Global()->device_compute_units(); uint32_t base = cache_size / kBaseGPUMemCacheSize; lws[1] = std::min(gws[1], kwg_size); @@ -41,10 +40,10 @@ std::vector LocalWS(const uint32_t *gws, } lws[0] = std::min(lws[0], kwg_size / lws[1]); const uint32_t lws_size = lws[0] * lws[1]; - lws[2] = std::min( - (cache_size / kernel_cache_size / kernel_size / lws_size / compute_units) - * 8, - gws[2]); + lws[2] = std::min((cache_size / kernel_cache_size / kernel_size / + lws_size / compute_units) * + 8, + gws[2]); if (lws[2] == 0) { if (gws[2] < lws_limit) { lws[2] = gws[2]; @@ -58,21 +57,21 @@ std::vector LocalWS(const uint32_t *gws, } // namespace -extern void Conv2dOpencl(cl::Kernel *kernel, - const Tensor *input, - const Tensor *filter, - const Tensor *bias, - const int stride, - const int *padding, - const int *dilations, - const ActivationType activation, - const float relux_max_limit, - const DataType dt, - std::vector *prev_input_shape, - Tensor *output, - StatsFuture *future, - uint32_t *kwg_size, - std::unique_ptr *kernel_error) { +extern MaceStatus Conv2dOpencl(cl::Kernel *kernel, + const Tensor *input, + const Tensor *filter, + const Tensor *bias, + const int stride, + const int *padding, + const int *dilations, + const ActivationType activation, + const float relux_max_limit, + const DataType dt, + std::vector *prev_input_shape, + Tensor *output, + StatsFuture *future, + uint32_t *kwg_size, + std::unique_ptr *kernel_error) { const index_t batch = output->dim(0); const index_t height = output->dim(1); const index_t width = output->dim(2); @@ -95,7 +94,7 @@ extern void Conv2dOpencl(cl::Kernel *kernel, built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - (*kernel_error)->Allocate(1); + MACE_RETURN_IF_ERROR((*kernel_error)->Allocate(1)); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); @@ -137,7 +136,7 @@ extern void Conv2dOpencl(cl::Kernel *kernel, uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel->setArg(idx++, - *(static_cast((*kernel_error)->buffer()))); + *(static_cast((*kernel_error)->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel->setArg(idx++, gws[0]); @@ -168,11 +167,10 @@ extern void Conv2dOpencl(cl::Kernel *kernel, } std::string tuning_key = - Concat("conv2d_general_opencl_kernel", output->dim(0), - output->dim(1), output->dim(2), output->dim(3), - filter->dim(2), filter->dim(3)); + Concat("conv2d_general_opencl_kernel", output->dim(0), output->dim(1), + output->dim(2), output->dim(3), filter->dim(2), filter->dim(3)); std::vector lws = - LocalWS(gws, filter->dim(2) * filter->dim(3), *kwg_size); + LocalWS(gws, filter->dim(2) * filter->dim(3), *kwg_size); TuningOrRun3DKernel(*kernel, tuning_key, gws, lws, future); if (runtime->IsOutOfRangeCheckEnabled()) { @@ -181,6 +179,8 @@ extern void Conv2dOpencl(cl::Kernel *kernel, MACE_CHECK(*kerror_code == 0) << "Kernel error code: " << *kerror_code; (*kernel_error)->UnMap(); } + + return MACE_SUCCESS; } } // namespace kernels diff --git a/mace/kernels/opencl/deconv_2d_opencl.cc b/mace/kernels/opencl/deconv_2d_opencl.cc index f45c2824..486bf064 100644 --- a/mace/kernels/opencl/deconv_2d_opencl.cc +++ b/mace/kernels/opencl/deconv_2d_opencl.cc @@ -20,20 +20,20 @@ namespace kernels { namespace { -void Deconv2dOpencl(cl::Kernel *kernel, - const Tensor *input, - const Tensor *filter, - const Tensor *bias, - const int stride, - const int *paddings, - const ActivationType activation, - const float relux_max_limit, - const DataType dt, - std::vector *prev_input_shape, - Tensor *output, - StatsFuture *future, - uint32_t *kwg_size, - std::unique_ptr *kernel_error) { +MaceStatus Deconv2dOpencl(cl::Kernel *kernel, + const Tensor *input, + const Tensor *filter, + const Tensor *bias, + const int stride, + const int *paddings, + const ActivationType activation, + const float relux_max_limit, + const DataType dt, + std::vector *prev_input_shape, + Tensor *output, + StatsFuture *future, + uint32_t *kwg_size, + std::unique_ptr *kernel_error) { const index_t batch = output->dim(0); const index_t height = output->dim(1); const index_t width = output->dim(2); @@ -46,10 +46,10 @@ void Deconv2dOpencl(cl::Kernel *kernel, #define MACE_WIDTH_BLK 5 const index_t n_strides = (width + stride - 1) / stride; const index_t width_blocks = - ((n_strides + MACE_WIDTH_BLK -1)/ MACE_WIDTH_BLK) * stride; + ((n_strides + MACE_WIDTH_BLK - 1) / MACE_WIDTH_BLK) * stride; const float stride_r = 1.f / static_cast(stride); - const int padding_h = (paddings[0]+1) >> 1; - const int padding_w = (paddings[0]+1) >> 1; + const int padding_h = (paddings[0] + 1) >> 1; + const int padding_w = (paddings[0] + 1) >> 1; const int align_h = stride - 1 - padding_h; const int align_w = stride - 1 - padding_w; @@ -67,7 +67,7 @@ void Deconv2dOpencl(cl::Kernel *kernel, built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - (*kernel_error)->Allocate(1); + MACE_RETURN_IF_ERROR((*kernel_error)->Allocate(1)); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); @@ -77,16 +77,22 @@ void Deconv2dOpencl(cl::Kernel *kernel, } built_options.emplace(bias != nullptr ? "-DBIAS" : ""); switch (activation) { - case NOOP:break; - case RELU:built_options.emplace("-DUSE_RELU"); + case NOOP: break; - case RELUX:built_options.emplace("-DUSE_RELUX"); + case RELU: + built_options.emplace("-DUSE_RELU"); break; - case TANH:built_options.emplace("-DUSE_TANH"); + case RELUX: + built_options.emplace("-DUSE_RELUX"); break; - case SIGMOID:built_options.emplace("-DUSE_SIGMOID"); + case TANH: + built_options.emplace("-DUSE_TANH"); break; - default:LOG(FATAL) << "Unknown activation type: " << activation; + case SIGMOID: + built_options.emplace("-DUSE_SIGMOID"); + break; + default: + LOG(FATAL) << "Unknown activation type: " << activation; } *kernel = runtime->BuildKernel("deconv_2d", kernel_name, built_options); @@ -150,16 +156,19 @@ void Deconv2dOpencl(cl::Kernel *kernel, MACE_CHECK(*kerror_code == 0) << "Kernel error code: " << *kerror_code; (*kernel_error)->UnMap(); } + + return MACE_SUCCESS; } } // namespace template -MaceStatus Deconv2dFunctor::operator()(const Tensor *input, - const Tensor *filter, - const Tensor *bias, - Tensor *output, - StatsFuture *future) { +MaceStatus Deconv2dFunctor::operator()( + const Tensor *input, + const Tensor *filter, + const Tensor *bias, + Tensor *output, + StatsFuture *future) { MACE_CHECK_NOTNULL(input); MACE_CHECK_NOTNULL(filter); MACE_CHECK_NOTNULL(output); @@ -167,34 +176,25 @@ MaceStatus Deconv2dFunctor::operator()(const Tensor *input, if (output_shape_.size() == 4) { paddings_.clear(); paddings_ = std::vector(2, 0); - CalcDeconvPaddingAndInputSize( - input->shape().data(), - filter->shape().data(), - strides_, padding_type_, - output_shape_.data(), - paddings_.data()); + CalcDeconvPaddingAndInputSize(input->shape().data(), filter->shape().data(), + strides_, padding_type_, output_shape_.data(), + paddings_.data()); } else { output_shape_.clear(); output_shape_ = std::vector(4, 0); - CalcDeconvOutputSize(input->shape().data(), - filter->shape().data(), - strides_, - output_shape_.data(), - paddings_.data()); + CalcDeconvOutputSize(input->shape().data(), filter->shape().data(), + strides_, output_shape_.data(), paddings_.data()); } std::vector output_image_shape; CalImage2DShape(output_shape_, BufferType::IN_OUT_CHANNEL, &output_image_shape); - MACE_FAILURE_RETURN(output->ResizeImage(output_shape_, output_image_shape)); + MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape_, output_image_shape)); - Deconv2dOpencl(&kernel_, input, filter, bias, - strides_[0], paddings_.data(), - activation_, relux_max_limit_, - DataTypeToEnum::value, &input_shape_, - output, future, &kwg_size_, &kernel_error_); - - return MACE_SUCCESS; + return Deconv2dOpencl(&kernel_, input, filter, bias, strides_[0], + paddings_.data(), activation_, relux_max_limit_, + DataTypeToEnum::value, &input_shape_, output, future, + &kwg_size_, &kernel_error_); } template struct Deconv2dFunctor; diff --git a/mace/kernels/opencl/depth_to_space.cc b/mace/kernels/opencl/depth_to_space.cc index ab713161..bf339f40 100644 --- a/mace/kernels/opencl/depth_to_space.cc +++ b/mace/kernels/opencl/depth_to_space.cc @@ -40,7 +40,7 @@ MaceStatus DepthToSpaceOpFunctor::operator()( output_width = input_width * block_size_; output_depth = input_depth / (block_size_ * block_size_); MACE_CHECK(output_depth % 4 == 0, "output channel not support:") - << output_depth; + << output_depth; kernel_name = "depth_to_space"; gws[0] = static_cast(RoundUpDiv4(output_depth)); @@ -53,7 +53,7 @@ MaceStatus DepthToSpaceOpFunctor::operator()( output_width = input_width / block_size_; output_depth = input_depth * block_size_ * block_size_; MACE_CHECK(input_depth % 4 == 0, "input channel not support:") - << input_depth; + << input_depth; kernel_name = "space_to_depth"; gws[0] = static_cast(RoundUpDiv4(input_depth)); @@ -70,7 +70,7 @@ MaceStatus DepthToSpaceOpFunctor::operator()( std::vector image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &image_shape); - MACE_FAILURE_RETURN(output->ResizeImage(output_shape, image_shape)); + MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, image_shape)); auto runtime = OpenCLRuntime::Global(); @@ -87,7 +87,7 @@ MaceStatus DepthToSpaceOpFunctor::operator()( built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - kernel_error_->Allocate(1); + MACE_RETURN_IF_ERROR(kernel_error_->Allocate(1)); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -95,9 +95,8 @@ MaceStatus DepthToSpaceOpFunctor::operator()( if (runtime->IsNonUniformWorkgroupsSupported()) { built_options.emplace("-DNON_UNIFORM_WORK_GROUP"); } - kernel_ = - runtime->BuildKernel("depth_to_space", - obfuscated_kernel_name, built_options); + kernel_ = runtime->BuildKernel("depth_to_space", obfuscated_kernel_name, + built_options); kwg_size_ = static_cast(runtime->GetKernelMaxWorkGroupSize(kernel_)); @@ -107,7 +106,7 @@ MaceStatus DepthToSpaceOpFunctor::operator()( uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel_.setArg(idx++, - *(static_cast(kernel_error_->buffer()))); + *(static_cast(kernel_error_->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel_.setArg(idx++, gws[0]); diff --git a/mace/kernels/opencl/depthwise_conv.cc b/mace/kernels/opencl/depthwise_conv.cc index 2711ee97..7ca2d5d7 100644 --- a/mace/kernels/opencl/depthwise_conv.cc +++ b/mace/kernels/opencl/depthwise_conv.cc @@ -24,8 +24,7 @@ namespace kernels { namespace { // (inputs + weights + outputs) * array_size * sizeof(float) const uint32_t kernel_cache_size = (4 + 4 + 1) * 4 * 4; -std::vector LocalWS(const uint32_t *gws, - const uint32_t kwg_size) { +std::vector LocalWS(const uint32_t *gws, const uint32_t kwg_size) { std::vector lws(4, 0); uint64_t cache_size = OpenCLRuntime::Global()->device_global_mem_cache_size(); uint32_t min_lws0 = cache_size / kBaseGPUMemCacheSize; @@ -40,9 +39,8 @@ std::vector LocalWS(const uint32_t *gws, } } const uint32_t lws_size = lws[0] * lws[1]; - lws[2] = std::min( - (cache_size / kernel_cache_size / lws_size) * 4, - gws[2]); + lws[2] = std::min((cache_size / kernel_cache_size / lws_size) * 4, + gws[2]); if (lws[2] == 0) { lws[2] = gws[2]; } @@ -52,21 +50,21 @@ std::vector LocalWS(const uint32_t *gws, } // namespace -static void DepthwiseConv2d(cl::Kernel *kernel, - const Tensor *input, // NHWC - const Tensor *filter, // HWIM - const Tensor *bias, - const int stride, - const int *paddings, - const int *dilations, - const ActivationType activation, - const float relux_max_limit, - const DataType dt, - std::vector *prev_input_shape, - Tensor *output, - StatsFuture *future, - uint32_t *kwg_size, - std::unique_ptr *kernel_error) { +static MaceStatus DepthwiseConv2d(cl::Kernel *kernel, + const Tensor *input, // NHWC + const Tensor *filter, // HWIM + const Tensor *bias, + const int stride, + const int *paddings, + const int *dilations, + const ActivationType activation, + const float relux_max_limit, + const DataType dt, + std::vector *prev_input_shape, + Tensor *output, + StatsFuture *future, + uint32_t *kwg_size, + std::unique_ptr *kernel_error) { const index_t batch = output->dim(0); const index_t height = output->dim(1); const index_t width = output->dim(2); @@ -98,7 +96,7 @@ static void DepthwiseConv2d(cl::Kernel *kernel, built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - (*kernel_error)->Allocate(1); + MACE_RETURN_IF_ERROR((*kernel_error)->Allocate(1)); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); @@ -149,7 +147,7 @@ static void DepthwiseConv2d(cl::Kernel *kernel, uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel->setArg(idx++, - *(static_cast((*kernel_error)->buffer()))); + *(static_cast((*kernel_error)->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel->setArg(idx++, gws[0]); @@ -181,8 +179,8 @@ static void DepthwiseConv2d(cl::Kernel *kernel, } const std::vector lws = LocalWS(gws, *kwg_size); - std::string tuning_key = Concat("depthwise_conv2d_ocl_kernel", - gws[0], gws[1], gws[2], multiplier); + std::string tuning_key = + Concat("depthwise_conv2d_ocl_kernel", gws[0], gws[1], gws[2], multiplier); TuningOrRun3DKernel(*kernel, tuning_key, gws, lws, future); if (runtime->IsOutOfRangeCheckEnabled()) { @@ -191,6 +189,8 @@ static void DepthwiseConv2d(cl::Kernel *kernel, MACE_CHECK(*kerror_code == 0) << "Kernel error code: " << *kerror_code; (*kernel_error)->UnMap(); } + + return MACE_SUCCESS; } template @@ -200,7 +200,6 @@ MaceStatus DepthwiseConv2dFunctor::operator()( const Tensor *bias, Tensor *output, StatsFuture *future) { - index_t kernel_h = filter->dim(2); index_t kernel_w = filter->dim(3); if (strides_[0] != strides_[1]) { @@ -237,14 +236,12 @@ MaceStatus DepthwiseConv2dFunctor::operator()( std::vector output_image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &output_image_shape); - MACE_FAILURE_RETURN(output->ResizeImage(output_shape, output_image_shape)); + MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape)); - DepthwiseConv2d(&kernel_, input, filter, bias, strides_[0], paddings.data(), - dilations_, activation_, relux_max_limit_, - DataTypeToEnum::value, &input_shape_, output, future, - &kwg_size_, &kernel_error_); - - return MACE_SUCCESS; + return DepthwiseConv2d( + &kernel_, input, filter, bias, strides_[0], paddings.data(), dilations_, + activation_, relux_max_limit_, DataTypeToEnum::value, &input_shape_, + output, future, &kwg_size_, &kernel_error_); } template struct DepthwiseConv2dFunctor; diff --git a/mace/kernels/opencl/eltwise.cc b/mace/kernels/opencl/eltwise.cc index 4cedb051..c0a74c42 100644 --- a/mace/kernels/opencl/eltwise.cc +++ b/mace/kernels/opencl/eltwise.cc @@ -22,16 +22,15 @@ namespace kernels { template MaceStatus EltwiseFunctor::operator()(const Tensor *input0, - const Tensor *input1, - Tensor *output, - StatsFuture *future) { + const Tensor *input1, + Tensor *output, + StatsFuture *future) { MACE_UNUSED(future); bool swapped = false; if (input1 != nullptr) { - MACE_CHECK(input0->dim_size() == input1->dim_size() - || input0->dim_size() == 1 - || input1->dim_size() == 1) - << "Inputs of Eltwise op must be same shape"; + MACE_CHECK(input0->dim_size() == input1->dim_size() || + input0->dim_size() == 1 || input1->dim_size() == 1) + << "Inputs of Eltwise op must be same shape"; if (input0->size() != input1->size()) { if (input0->size() < input1->size()) { std::swap(input0, input1); @@ -39,28 +38,26 @@ MaceStatus EltwiseFunctor::operator()(const Tensor *input0, } if (input1->dim_size() == 1) { MACE_CHECK(input0->dim(3) == input1->dim(0)) - << "Element-Wise op only support channel dimension broadcast"; + << "Element-Wise op only support channel dimension broadcast"; } else { MACE_CHECK((input0->dim(0) == input1->dim(0) || input1->dim(0) == 1) && - input0->dim(3) == input1->dim(3) && - input1->dim(1) == 1 && - input1->dim(2) == 1) - << "Element-Wise op only support channel dimension broadcast"; + input0->dim(3) == input1->dim(3) && input1->dim(1) == 1 && + input1->dim(2) == 1) + << "Element-Wise op only support channel dimension broadcast"; } } } - std::vector output_shape(4); + std::vector output_shape(4); output_shape[0] = input0->dim(0); output_shape[1] = input0->dim(1); output_shape[2] = input0->dim(2); output_shape[3] = input0->dim(3); std::vector output_image_shape; - CalImage2DShape(output_shape, - BufferType::IN_OUT_CHANNEL, + CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &output_image_shape); - MACE_FAILURE_RETURN(output->ResizeImage(output_shape, output_image_shape)); + MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape)); const index_t batch = output->dim(0); const index_t height = output->dim(1); @@ -98,7 +95,7 @@ MaceStatus EltwiseFunctor::operator()(const Tensor *input0, built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - kernel_error_->Allocate(1); + MACE_RETURN_IF_ERROR(kernel_error_->Allocate(1)); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -115,7 +112,7 @@ MaceStatus EltwiseFunctor::operator()(const Tensor *input0, uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel_.setArg(idx++, - *(static_cast(kernel_error_->buffer()))); + *(static_cast(kernel_error_->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel_.setArg(idx++, gws[0]); @@ -142,8 +139,8 @@ MaceStatus EltwiseFunctor::operator()(const Tensor *input0, const std::vector lws = Default3DLocalWS(gws, kwg_size_); std::string tuning_key = - Concat("eltwise_opencl_kernel", output->dim(0), - output->dim(1), output->dim(2), output->dim(3)); + Concat("eltwise_opencl_kernel", output->dim(0), output->dim(1), + output->dim(2), output->dim(3)); TuningOrRun3DKernel(kernel_, tuning_key, gws, lws, future); if (runtime->IsOutOfRangeCheckEnabled()) { kernel_error_->Map(nullptr); diff --git a/mace/kernels/opencl/fully_connected.cc b/mace/kernels/opencl/fully_connected.cc index 6ebfdef0..a6ca36bf 100644 --- a/mace/kernels/opencl/fully_connected.cc +++ b/mace/kernels/opencl/fully_connected.cc @@ -20,18 +20,18 @@ namespace kernels { namespace { template -void FCWXKernel(cl::Kernel *kernel, - const Tensor *input, - const Tensor *weight, - const Tensor *bias, - std::vector *prev_input_shape, - Tensor *output, - const ActivationType activation, - std::vector *gws, - std::vector *lws, - const float relux_max_limit, - StatsFuture *future, - std::unique_ptr *kernel_error) { +MaceStatus FCWXKernel(cl::Kernel *kernel, + const Tensor *input, + const Tensor *weight, + const Tensor *bias, + std::vector *prev_input_shape, + Tensor *output, + const ActivationType activation, + std::vector *gws, + std::vector *lws, + const float relux_max_limit, + StatsFuture *future, + std::unique_ptr *kernel_error) { MACE_CHECK_NOTNULL(gws); MACE_CHECK_NOTNULL(lws); auto runtime = OpenCLRuntime::Global(); @@ -75,7 +75,7 @@ void FCWXKernel(cl::Kernel *kernel, built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - (*kernel_error)->Allocate(1); + MACE_RETURN_IF_ERROR((*kernel_error)->Allocate(1)); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); @@ -115,7 +115,7 @@ void FCWXKernel(cl::Kernel *kernel, uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel->setArg(idx++, - *(static_cast((*kernel_error)->buffer()))); + *(static_cast((*kernel_error)->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel->setArg(idx++, (*gws)[0]); @@ -170,21 +170,23 @@ void FCWXKernel(cl::Kernel *kernel, } }; } + + return MACE_SUCCESS; } template -void FCWTXKernel(cl::Kernel *kernel, - const Tensor *input, - const Tensor *weight, - const Tensor *bias, - std::vector *prev_input_shape, - Tensor *output, - const ActivationType activation, - std::vector *gws, - std::vector *lws, - const float relux_max_limit, - StatsFuture *future, - std::unique_ptr *kernel_error) { +MaceStatus FCWTXKernel(cl::Kernel *kernel, + const Tensor *input, + const Tensor *weight, + const Tensor *bias, + std::vector *prev_input_shape, + Tensor *output, + const ActivationType activation, + std::vector *gws, + std::vector *lws, + const float relux_max_limit, + StatsFuture *future, + std::unique_ptr *kernel_error) { MACE_CHECK_NOTNULL(gws); MACE_CHECK_NOTNULL(lws); auto runtime = OpenCLRuntime::Global(); @@ -202,7 +204,7 @@ void FCWTXKernel(cl::Kernel *kernel, built_options.emplace("-DOUT_OF_RANGE_CHECK"); *kernel_error = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - (*kernel_error)->Allocate(1); + MACE_RETURN_IF_ERROR((*kernel_error)->Allocate(1)); (*kernel_error)->Map(nullptr); *((*kernel_error)->mutable_data()) = 0; (*kernel_error)->UnMap(); @@ -233,7 +235,7 @@ void FCWTXKernel(cl::Kernel *kernel, uint32_t kwg_size = static_cast(runtime->GetKernelMaxWorkGroupSize(*kernel)); - *lws = {16, kwg_size/16, 0}; + *lws = {16, kwg_size / 16, 0}; } if (!IsVecEqual(*prev_input_shape, input->shape())) { const index_t batch = output->dim(0); @@ -246,7 +248,7 @@ void FCWTXKernel(cl::Kernel *kernel, uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel->setArg(idx++, - *(static_cast((*kernel_error)->buffer()))); + *(static_cast((*kernel_error)->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel->setArg(idx++, (*gws)[0]); @@ -268,8 +270,8 @@ void FCWTXKernel(cl::Kernel *kernel, } std::string tuning_key = - Concat("fc_opencl_kernel", output->dim(0), - output->dim(1), output->dim(2), output->dim(3)); + Concat("fc_opencl_kernel", output->dim(0), output->dim(1), output->dim(2), + output->dim(3)); TuningOrRun2DKernel(*kernel, tuning_key, gws->data(), *lws, future); if (runtime->IsOutOfRangeCheckEnabled()) { @@ -278,6 +280,8 @@ void FCWTXKernel(cl::Kernel *kernel, MACE_CHECK(*kerror_code == 0) << "Kernel error code: " << *kerror_code; (*kernel_error)->UnMap(); } + + return MACE_SUCCESS; } } // namespace @@ -292,13 +296,11 @@ MaceStatus FullyConnectedFunctor::operator()( std::vector output_image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &output_image_shape); - MACE_FAILURE_RETURN(output->ResizeImage(output_shape, output_image_shape)); - - FCWXKernel(&kernel_, input, weight, bias, &input_shape_, output, - activation_, &gws_, &lws_, relux_max_limit_, future, - &kernel_error_); + MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape)); - return MACE_SUCCESS; + return FCWXKernel(&kernel_, input, weight, bias, &input_shape_, output, + activation_, &gws_, &lws_, relux_max_limit_, future, + &kernel_error_); } template struct FullyConnectedFunctor; diff --git a/mace/kernels/opencl/helper.cc b/mace/kernels/opencl/helper.cc index f6c3d83c..c14aac1e 100644 --- a/mace/kernels/opencl/helper.cc +++ b/mace/kernels/opencl/helper.cc @@ -209,12 +209,11 @@ std::string DtToUpstreamCLCMDDt(const DataType dt) { std::vector Default3DLocalWS(const uint32_t *gws, const uint32_t kwg_size) { std::vector lws(4, 0); - uint64_t cache_size = - OpenCLRuntime::Global()->device_global_mem_cache_size(); + uint64_t cache_size = OpenCLRuntime::Global()->device_global_mem_cache_size(); uint32_t base = cache_size / kBaseGPUMemCacheSize; lws[1] = std::min(gws[1], kwg_size); - lws[2] = std::min(std::min(gws[2], base), - kwg_size / lws[1]); + lws[2] = + std::min(std::min(gws[2], base), kwg_size / lws[1]); const uint32_t lws_size = lws[1] * lws[2]; lws[0] = std::min(base, kwg_size / lws_size); return lws; @@ -278,7 +277,7 @@ void TuningOrRun3DKernel(const cl::Kernel &kernel, MACE_CHECK(params.size() == 4) << "Tuning parameters of 3D kernel must be 4D"; cl_int error = CL_SUCCESS; - std::vector internal_gws(gws, gws+3); + std::vector internal_gws(gws, gws + 3); if (!runtime->IsNonUniformWorkgroupsSupported()) { for (size_t i = 0; i < 3; ++i) { internal_gws[i] = RoundUp(gws[i], params[i]); @@ -287,12 +286,12 @@ void TuningOrRun3DKernel(const cl::Kernel &kernel, if (timer == nullptr) { uint32_t block_size = params[3] == 0 ? internal_gws[2] : params[3]; - const uint32_t num_blocks = RoundUpDiv(internal_gws[2], - block_size); + const uint32_t num_blocks = + RoundUpDiv(internal_gws[2], block_size); for (uint32_t i = 0; i < num_blocks; ++i) { uint32_t gws2 = block_size; - if (runtime->IsNonUniformWorkgroupsSupported() - && (i == num_blocks - 1)) { + if (runtime->IsNonUniformWorkgroupsSupported() && + (i == num_blocks - 1)) { gws2 = (internal_gws[2] - (i * block_size)); } error = runtime->command_queue().enqueueNDRangeKernel( @@ -324,8 +323,8 @@ void TuningOrRun3DKernel(const cl::Kernel &kernel, num_blocks = RoundUpDiv(internal_gws[2], block_size); for (uint32_t i = 0; i < num_blocks; ++i) { uint32_t gws2 = block_size; - if (runtime->IsNonUniformWorkgroupsSupported() - && (i == num_blocks - 1)) { + if (runtime->IsNonUniformWorkgroupsSupported() && + (i == num_blocks - 1)) { gws2 = (internal_gws[2] - (i * block_size)); } error = runtime->command_queue().enqueueNDRangeKernel( @@ -365,17 +364,11 @@ void TuningOrRun2DKernel(const cl::Kernel &kernel, static_cast(runtime->GetKernelMaxWorkGroupSize(kernel)); std::vector> results; std::vector> candidates = { - {kwg_size / 2, 2, 0}, - {kwg_size / 4, 4, 0}, - {kwg_size / 8, 8, 0}, - {kwg_size / 16, 16, 0}, - {kwg_size / 32, 32, 0}, - {kwg_size / 64, 64, 0}, - {kwg_size / 128, 128, 0}, - {kwg_size / 256, 256, 0}, - {kwg_size, 1, 0}, - {1, kwg_size, 0} - }; + {kwg_size / 2, 2, 0}, {kwg_size / 4, 4, 0}, + {kwg_size / 8, 8, 0}, {kwg_size / 16, 16, 0}, + {kwg_size / 32, 32, 0}, {kwg_size / 64, 64, 0}, + {kwg_size / 128, 128, 0}, {kwg_size / 256, 256, 0}, + {kwg_size, 1, 0}, {1, kwg_size, 0}}; for (auto &ele : candidates) { const uint32_t tmp = ele[0] * ele[1] * ele[2]; if (0 < tmp && tmp <= kwg_size) { @@ -390,7 +383,7 @@ void TuningOrRun2DKernel(const cl::Kernel &kernel, MACE_CHECK(params.size() == 3) << "Tuning parameters of 2D kernel must be 3d"; cl_int error = CL_SUCCESS; - std::vector internal_gws(gws, gws+2); + std::vector internal_gws(gws, gws + 2); if (!runtime->IsNonUniformWorkgroupsSupported()) { for (size_t i = 0; i < 2; ++i) { internal_gws[i] = RoundUp(gws[i], params[i]); @@ -399,12 +392,12 @@ void TuningOrRun2DKernel(const cl::Kernel &kernel, if (timer == nullptr) { uint32_t block_size = params[2] == 0 ? internal_gws[1] : params[2]; - const uint32_t num_blocks = RoundUpDiv(internal_gws[1], - block_size); + const uint32_t num_blocks = + RoundUpDiv(internal_gws[1], block_size); for (uint32_t i = 0; i < num_blocks; ++i) { uint32_t gws1 = block_size; - if (runtime->IsNonUniformWorkgroupsSupported() - && (i == num_blocks - 1)) { + if (runtime->IsNonUniformWorkgroupsSupported() && + (i == num_blocks - 1)) { gws1 = (internal_gws[1] - (i * block_size)); } error = runtime->command_queue().enqueueNDRangeKernel( @@ -435,8 +428,8 @@ void TuningOrRun2DKernel(const cl::Kernel &kernel, num_blocks = RoundUpDiv(internal_gws[1], block_size); for (uint32_t i = 0; i < num_blocks; ++i) { uint32_t gws1 = block_size; - if (runtime->IsNonUniformWorkgroupsSupported() - && (i == num_blocks - 1)) { + if (runtime->IsNonUniformWorkgroupsSupported() && + (i == num_blocks - 1)) { gws1 = (internal_gws[1] - (i * block_size)); } error = runtime->command_queue().enqueueNDRangeKernel( @@ -463,6 +456,5 @@ void TuningOrRun2DKernel(const cl::Kernel &kernel, } } - } // namespace kernels } // namespace mace diff --git a/mace/kernels/opencl/helper.h b/mace/kernels/opencl/helper.h index ae3f06cf..ca9eef17 100644 --- a/mace/kernels/opencl/helper.h +++ b/mace/kernels/opencl/helper.h @@ -88,10 +88,9 @@ inline bool LimitKernelTime() { } template -bool IsVecEqual(const std::vector &input0, - const std::vector &input1) { +bool IsVecEqual(const std::vector &input0, const std::vector &input1) { return ((input0.size() == input1.size()) && - (std::equal(input0.begin(), input0.end(), input1.begin()))); + (std::equal(input0.begin(), input0.end(), input1.begin()))); } template diff --git a/mace/kernels/opencl/image_to_buffer.cc b/mace/kernels/opencl/image_to_buffer.cc index 8b83b88e..132b146c 100644 --- a/mace/kernels/opencl/image_to_buffer.cc +++ b/mace/kernels/opencl/image_to_buffer.cc @@ -25,10 +25,9 @@ MaceStatus ImageToBufferFunctor::operator()( const BufferType type, Tensor *buffer, StatsFuture *future) { - std::vector image_shape; CalImage2DShape(image->shape(), type, &image_shape); - MACE_FAILURE_RETURN(buffer->Resize(image->shape())); + MACE_RETURN_IF_ERROR(buffer->Resize(image->shape())); uint32_t gws[2] = {static_cast(image_shape[0]), static_cast(image_shape[1])}; @@ -87,7 +86,7 @@ MaceStatus ImageToBufferFunctor::operator()( if (!kernel_error_) { kernel_error_ = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - kernel_error_->Allocate(1); + MACE_RETURN_IF_ERROR(kernel_error_->Allocate(1)); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -100,7 +99,7 @@ MaceStatus ImageToBufferFunctor::operator()( uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { b2f_kernel.setArg(idx++, - *(static_cast(kernel_error_->buffer()))); + *(static_cast(kernel_error_->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { b2f_kernel.setArg(idx++, gws[0]); @@ -108,8 +107,7 @@ MaceStatus ImageToBufferFunctor::operator()( } b2f_kernel.setArg(idx++, *(buffer->opencl_buffer())); if (type == CONV2D_FILTER) { - const index_t inner_size = - buffer->dim(1) * buffer->dim(2) * buffer->dim(3); + const index_t inner_size = buffer->dim(1) * buffer->dim(2) * buffer->dim(3); b2f_kernel.setArg(idx++, static_cast(buffer->dim(0))); b2f_kernel.setArg(idx++, static_cast(buffer->dim(2))); b2f_kernel.setArg(idx++, static_cast(buffer->dim(3))); diff --git a/mace/kernels/opencl/matmul.cc b/mace/kernels/opencl/matmul.cc index e47698a8..eb7f0e53 100644 --- a/mace/kernels/opencl/matmul.cc +++ b/mace/kernels/opencl/matmul.cc @@ -22,14 +22,14 @@ namespace kernels { template MaceStatus MatMulFunctor::operator()(const Tensor *A, - const Tensor *B, - Tensor *C, - StatsFuture *future) { + const Tensor *B, + Tensor *C, + StatsFuture *future) { MACE_UNUSED(future); std::vector c_shape = {A->dim(0), A->dim(1), B->dim(2), 1}; std::vector c_image_shape; CalImage2DShape(c_shape, BufferType::IN_OUT_HEIGHT, &c_image_shape); - MACE_FAILURE_RETURN(C->ResizeImage(c_shape, c_image_shape)); + MACE_RETURN_IF_ERROR(C->ResizeImage(c_shape, c_image_shape)); const index_t batch = C->dim(0); const index_t height = C->dim(1); @@ -55,7 +55,7 @@ MaceStatus MatMulFunctor::operator()(const Tensor *A, built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - kernel_error_->Allocate(1); + MACE_RETURN_IF_ERROR(kernel_error_->Allocate(1)); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -71,7 +71,7 @@ MaceStatus MatMulFunctor::operator()(const Tensor *A, uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel_.setArg(idx++, - *(static_cast(kernel_error_->buffer()))); + *(static_cast(kernel_error_->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel_.setArg(idx++, gws[0]); @@ -87,9 +87,8 @@ MaceStatus MatMulFunctor::operator()(const Tensor *A, kernel_.setArg(idx++, static_cast(RoundUpDiv4(A->dim(2)))); const std::vector lws = {kwg_size_ / 64, 64, 0}; - std::string tuning_key = - Concat("matmul_opencl_kernel", C->dim(0), - C->dim(1), C->dim(2), C->dim(3)); + std::string tuning_key = Concat("matmul_opencl_kernel", C->dim(0), C->dim(1), + C->dim(2), C->dim(3)); TuningOrRun2DKernel(kernel_, tuning_key, gws, lws, future); if (runtime->IsOutOfRangeCheckEnabled()) { diff --git a/mace/kernels/opencl/out_of_range_check_test.cc b/mace/kernels/opencl/out_of_range_check_test.cc index 1a6bbe6e..71907c24 100644 --- a/mace/kernels/opencl/out_of_range_check_test.cc +++ b/mace/kernels/opencl/out_of_range_check_test.cc @@ -58,7 +58,7 @@ bool BufferToImageOpImpl(Tensor *buffer, built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - kernel_error->Allocate(1); + MACE_RETURN_IF_ERROR(kernel_error->Allocate(1)); kernel_error->Map(nullptr); *(kernel_error->mutable_data()) = 0; kernel_error->UnMap(); @@ -70,7 +70,7 @@ bool BufferToImageOpImpl(Tensor *buffer, uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { b2f_kernel.setArg(idx++, - *(static_cast(kernel_error->buffer()))); + *(static_cast(kernel_error->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { b2f_kernel.setArg(idx++, gws[0]); @@ -113,8 +113,7 @@ bool BufferToImageOpImpl(Tensor *buffer, bool is_out_of_range = false; if (runtime->IsOutOfRangeCheckEnabled()) { kernel_error->Map(nullptr); - is_out_of_range = - *(kernel_error->mutable_data()) == 1 ? true : false; + is_out_of_range = *(kernel_error->mutable_data()) == 1 ? true : false; kernel_error->UnMap(); } return is_out_of_range; @@ -124,9 +123,7 @@ bool BufferToImageOpImpl(Tensor *buffer, class OutOfRangeCheckTest : public ::testing::Test { protected: - virtual void SetUp() { - setenv("OUT_OF_RANGE_CHECK", "1", 1); - } + virtual void SetUp() { setenv("OUT_OF_RANGE_CHECK", "1", 1); } }; TEST(OutOfRangeCheckTest, RandomTest) { @@ -137,14 +134,13 @@ TEST(OutOfRangeCheckTest, RandomTest) { std::vector buffer_shape = {batch, height, width, channels}; Workspace ws; - Tensor *buffer = ws.CreateTensor("Buffer", - GetDeviceAllocator(DeviceType::GPU), - DataTypeToEnum::v()); + Tensor *buffer = + ws.CreateTensor("Buffer", GetDeviceAllocator(DeviceType::GPU), + DataTypeToEnum::v()); buffer->Resize(buffer_shape); std::vector image_shape; - Tensor *image = ws.CreateTensor("Image", - GetDeviceAllocator(DeviceType::GPU), + Tensor *image = ws.CreateTensor("Image", GetDeviceAllocator(DeviceType::GPU), DataTypeToEnum::v()); CalImage2DShape(buffer->shape(), IN_OUT_CHANNEL, &image_shape); image->ResizeImage(buffer->shape(), image_shape); diff --git a/mace/kernels/opencl/pad.cc b/mace/kernels/opencl/pad.cc index fe2a51ef..963f25e7 100644 --- a/mace/kernels/opencl/pad.cc +++ b/mace/kernels/opencl/pad.cc @@ -20,26 +20,25 @@ namespace mace { namespace kernels { -template -MaceStatus PadFunctor::operator()( - const Tensor *input, - Tensor *output, - StatsFuture *future) { - MACE_CHECK( - this->paddings_.size() == static_cast((input->dim_size() * 2))); - MACE_CHECK((this->paddings_[0] == 0) && (this->paddings_[1] == 0) - && (this->paddings_[6] == 0) && (this->paddings_[7] == 0)) - << "Mace only support height/width dimension now"; +template +MaceStatus PadFunctor::operator()(const Tensor *input, + Tensor *output, + StatsFuture *future) { + MACE_CHECK(this->paddings_.size() == + static_cast((input->dim_size() * 2))); + MACE_CHECK((this->paddings_[0] == 0) && (this->paddings_[1] == 0) && + (this->paddings_[6] == 0) && (this->paddings_[7] == 0)) + << "Mace only support height/width dimension now"; auto input_shape = input->shape(); - std::vector - output_shape = {input_shape[0] + this->paddings_[0] + this->paddings_[1], - input_shape[1] + this->paddings_[2] + this->paddings_[3], - input_shape[2] + this->paddings_[4] + this->paddings_[5], - input_shape[3] + this->paddings_[6] + this->paddings_[7]}; + std::vector output_shape = { + input_shape[0] + this->paddings_[0] + this->paddings_[1], + input_shape[1] + this->paddings_[2] + this->paddings_[3], + input_shape[2] + this->paddings_[4] + this->paddings_[5], + input_shape[3] + this->paddings_[6] + this->paddings_[7]}; std::vector image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &image_shape); - MACE_FAILURE_RETURN(output->ResizeImage(output_shape, image_shape)); + MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, image_shape)); const index_t batch = output->dim(0); const index_t height = output->dim(1); @@ -61,7 +60,7 @@ MaceStatus PadFunctor::operator()( built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - kernel_error_->Allocate(1); + MACE_RETURN_IF_ERROR(kernel_error_->Allocate(1)); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -103,9 +102,8 @@ MaceStatus PadFunctor::operator()( } const std::vector lws = Default3DLocalWS(gws, kwg_size_); - std::string tuning_key = - Concat("pad", output->dim(0), output->dim(1), output->dim(2), - output->dim(3)); + std::string tuning_key = Concat("pad", output->dim(0), output->dim(1), + output->dim(2), output->dim(3)); TuningOrRun3DKernel(kernel_, tuning_key, gws, lws, future); if (runtime->IsOutOfRangeCheckEnabled()) { @@ -118,10 +116,8 @@ MaceStatus PadFunctor::operator()( return MACE_SUCCESS; } -template -struct PadFunctor; -template -struct PadFunctor; +template struct PadFunctor; +template struct PadFunctor; } // namespace kernels } // namespace mace diff --git a/mace/kernels/opencl/pooling.cc b/mace/kernels/opencl/pooling.cc index b208c529..b408a9cd 100644 --- a/mace/kernels/opencl/pooling.cc +++ b/mace/kernels/opencl/pooling.cc @@ -23,15 +23,13 @@ namespace kernels { namespace { -std::vector LocalWS(const uint32_t *gws, - const uint32_t kwg_size) { +std::vector LocalWS(const uint32_t *gws, const uint32_t kwg_size) { std::vector lws(4, 0); - uint64_t cache_size = - OpenCLRuntime::Global()->device_global_mem_cache_size(); + uint64_t cache_size = OpenCLRuntime::Global()->device_global_mem_cache_size(); uint32_t base = cache_size / kBaseGPUMemCacheSize; lws[1] = std::min(gws[1], kwg_size); - lws[2] = std::min(std::min(gws[2], base), - kwg_size / lws[1]); + lws[2] = + std::min(std::min(gws[2], base), kwg_size / lws[1]); const uint32_t lws_size = lws[1] * lws[2]; lws[0] = gws[0] / 4; if (lws[0] == 0) { @@ -45,8 +43,8 @@ std::vector LocalWS(const uint32_t *gws, template MaceStatus PoolingFunctor::operator()(const Tensor *input, - Tensor *output, - StatsFuture *future) { + Tensor *output, + StatsFuture *future) { MACE_CHECK(dilations_[0] == 1 && dilations_[1] == 1) << "Pooling opencl kernel not support dilation yet"; @@ -73,7 +71,7 @@ MaceStatus PoolingFunctor::operator()(const Tensor *input, built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - kernel_error_->Allocate(1); + MACE_RETURN_IF_ERROR(kernel_error_->Allocate(1)); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -108,7 +106,7 @@ MaceStatus PoolingFunctor::operator()(const Tensor *input, std::vector output_image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &output_image_shape); - MACE_FAILURE_RETURN(output->ResizeImage(output_shape, output_image_shape)); + MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape)); index_t batch = output->dim(0); index_t out_height = output->dim(1); @@ -125,7 +123,7 @@ MaceStatus PoolingFunctor::operator()(const Tensor *input, uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel_.setArg(idx++, - *(static_cast(kernel_error_->buffer()))); + *(static_cast(kernel_error_->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel_.setArg(idx++, gws[0]); @@ -159,8 +157,8 @@ MaceStatus PoolingFunctor::operator()(const Tensor *input, const std::vector lws = LocalWS(gws.data(), kwg_size_); std::string tuning_key = - Concat("pooling_opencl_kernel_", output->dim(0), - output->dim(1), output->dim(2), output->dim(3)); + Concat("pooling_opencl_kernel_", output->dim(0), output->dim(1), + output->dim(2), output->dim(3)); TuningOrRun3DKernel(kernel_, tuning_key, gws.data(), lws, future); if (runtime->IsOutOfRangeCheckEnabled()) { diff --git a/mace/kernels/opencl/resize_bilinear.cc b/mace/kernels/opencl/resize_bilinear.cc index 5fba4af2..e935e905 100644 --- a/mace/kernels/opencl/resize_bilinear.cc +++ b/mace/kernels/opencl/resize_bilinear.cc @@ -23,11 +23,9 @@ namespace mace { namespace kernels { namespace { -std::vector LocalWS(const uint32_t *gws, - const uint32_t kwg_size) { +std::vector LocalWS(const uint32_t *gws, const uint32_t kwg_size) { std::vector lws(4, 0); - uint64_t cache_size = - OpenCLRuntime::Global()->device_global_mem_cache_size(); + uint64_t cache_size = OpenCLRuntime::Global()->device_global_mem_cache_size(); uint32_t base = cache_size / kBaseGPUMemCacheSize; lws[1] = std::min(gws[1], kwg_size); if (lws[1] >= base) { @@ -79,7 +77,7 @@ MaceStatus ResizeBilinearFunctor::operator()( built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - kernel_error_->Allocate(1); + MACE_RETURN_IF_ERROR(kernel_error_->Allocate(1)); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -100,7 +98,7 @@ MaceStatus ResizeBilinearFunctor::operator()( std::vector output_image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &output_image_shape); - MACE_FAILURE_RETURN(output->ResizeImage(output_shape, output_image_shape)); + MACE_RETURN_IF_ERROR(output->ResizeImage(output_shape, output_image_shape)); float height_scale = CalculateResizeScale(in_height, out_height, align_corners_); @@ -110,7 +108,7 @@ MaceStatus ResizeBilinearFunctor::operator()( uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel_.setArg(idx++, - *(static_cast(kernel_error_->buffer()))); + *(static_cast(kernel_error_->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel_.setArg(idx++, gws[0]); @@ -130,8 +128,8 @@ MaceStatus ResizeBilinearFunctor::operator()( const std::vector lws = LocalWS(gws, kwg_size_); std::string tuning_key = - Concat("resize_bilinear_opencl_kernel", output->dim(0), - output->dim(1), output->dim(2), output->dim(3)); + Concat("resize_bilinear_opencl_kernel", output->dim(0), output->dim(1), + output->dim(2), output->dim(3)); TuningOrRun3DKernel(kernel_, tuning_key, gws, lws, future); if (runtime->IsOutOfRangeCheckEnabled()) { diff --git a/mace/kernels/opencl/slice.cc b/mace/kernels/opencl/slice.cc index b77a0bdb..64d69693 100644 --- a/mace/kernels/opencl/slice.cc +++ b/mace/kernels/opencl/slice.cc @@ -20,7 +20,7 @@ namespace mace { namespace kernels { -template +template MaceStatus SliceFunctor::operator()( const Tensor *input, const std::vector &output_list, @@ -29,14 +29,15 @@ MaceStatus SliceFunctor::operator()( const size_t outputs_count = output_list.size(); const index_t output_channels = input_channels / outputs_count; MACE_CHECK(output_channels % 4 == 0) - << "output channels of slice op must be divisible by 4"; - std::vector output_shape({input->dim(0), input->dim(1), - input->dim(2), output_channels}); + << "output channels of slice op must be divisible by 4"; + std::vector output_shape( + {input->dim(0), input->dim(1), input->dim(2), output_channels}); std::vector image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &image_shape); - for (size_t i= 0; i < outputs_count; ++i) { - MACE_FAILURE_RETURN(output_list[i]->ResizeImage(output_shape, image_shape)); + for (size_t i = 0; i < outputs_count; ++i) { + MACE_RETURN_IF_ERROR( + output_list[i]->ResizeImage(output_shape, image_shape)); } auto runtime = OpenCLRuntime::Global(); @@ -46,13 +47,13 @@ MaceStatus SliceFunctor::operator()( std::string kernel_name = MACE_OBFUSCATE_SYMBOL("slice"); built_options.emplace("-Dslice=" + kernel_name); built_options.emplace("-DDATA_TYPE=" + DtToCLDt(DataTypeToEnum::value)); - built_options.emplace("-DCMD_DATA_TYPE=" - + DtToCLCMDDt(DataTypeToEnum::value)); + built_options.emplace("-DCMD_DATA_TYPE=" + + DtToCLCMDDt(DataTypeToEnum::value)); if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - kernel_error_->Allocate(1); + MACE_RETURN_IF_ERROR(kernel_error_->Allocate(1)); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -68,8 +69,7 @@ MaceStatus SliceFunctor::operator()( const index_t channel_blk = RoundUpDiv4(output_channels); const uint32_t gws[3] = { - static_cast(channel_blk), - static_cast(input->dim(2)), + static_cast(channel_blk), static_cast(input->dim(2)), static_cast(input->dim(0) * input->dim(1)), }; @@ -80,7 +80,7 @@ MaceStatus SliceFunctor::operator()( uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel_.setArg(idx++, - *(static_cast(kernel_error_->buffer()))); + *(static_cast(kernel_error_->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel_.setArg(idx++, gws[0]); @@ -117,8 +117,8 @@ MaceStatus SliceFunctor::operator()( if (runtime->is_profiling_enabled()) { CallStats tmp_stats; runtime->GetCallStats(event, &tmp_stats); - call_stats.start_micros = std::min(tmp_stats.start_micros, - call_stats.start_micros); + call_stats.start_micros = + std::min(tmp_stats.start_micros, call_stats.start_micros); call_stats.end_micros += tmp_stats.end_micros - tmp_stats.start_micros; } } @@ -135,10 +135,8 @@ MaceStatus SliceFunctor::operator()( return MACE_SUCCESS; } -template -struct SliceFunctor; -template -struct SliceFunctor; +template struct SliceFunctor; +template struct SliceFunctor; } // namespace kernels } // namespace mace diff --git a/mace/kernels/opencl/softmax.cc b/mace/kernels/opencl/softmax.cc index b1748ee3..cfaee93a 100644 --- a/mace/kernels/opencl/softmax.cc +++ b/mace/kernels/opencl/softmax.cc @@ -24,10 +24,8 @@ namespace kernels { namespace { -std::vector LocalWS(const uint32_t *gws, - const uint32_t kwg_size) { - uint64_t cache_size = - OpenCLRuntime::Global()->device_global_mem_cache_size(); +std::vector LocalWS(const uint32_t *gws, const uint32_t kwg_size) { + uint64_t cache_size = OpenCLRuntime::Global()->device_global_mem_cache_size(); uint32_t base = cache_size / kBaseGPUMemCacheSize; std::vector lws(4, 0); lws[1] = std::min(gws[1], kwg_size); @@ -45,8 +43,8 @@ std::vector LocalWS(const uint32_t *gws, template MaceStatus SoftmaxFunctor::operator()(const Tensor *logits, - Tensor *output, - StatsFuture *future) { + Tensor *output, + StatsFuture *future) { const index_t batch = logits->dim(0); const index_t height = logits->dim(1); const index_t width = logits->dim(2); @@ -71,7 +69,7 @@ MaceStatus SoftmaxFunctor::operator()(const Tensor *logits, built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - kernel_error_->Allocate(1); + MACE_RETURN_IF_ERROR(kernel_error_->Allocate(1)); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -88,7 +86,7 @@ MaceStatus SoftmaxFunctor::operator()(const Tensor *logits, uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel_.setArg(idx++, - *(static_cast(kernel_error_->buffer()))); + *(static_cast(kernel_error_->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel_.setArg(idx++, gws[0]); @@ -105,8 +103,8 @@ MaceStatus SoftmaxFunctor::operator()(const Tensor *logits, std::vector lws = LocalWS(gws, kwg_size_); std::string tuning_key = - Concat("softmax_opencl_kernel", output->dim(0), - output->dim(1), output->dim(2), output->dim(3)); + Concat("softmax_opencl_kernel", output->dim(0), output->dim(1), + output->dim(2), output->dim(3)); TuningOrRun3DKernel(kernel_, tuning_key, gws, lws, future); if (runtime->IsOutOfRangeCheckEnabled()) { diff --git a/mace/kernels/opencl/space_to_batch.cc b/mace/kernels/opencl/space_to_batch.cc index 456434b7..b558ba7d 100644 --- a/mace/kernels/opencl/space_to_batch.cc +++ b/mace/kernels/opencl/space_to_batch.cc @@ -26,17 +26,13 @@ namespace kernels { template MaceStatus SpaceToBatchFunctor::operator()( - Tensor *space_tensor, - Tensor *batch_tensor, - StatsFuture *future) { + Tensor *space_tensor, Tensor *batch_tensor, StatsFuture *future) { std::vector output_shape(4, 0); if (b2s_) { - CalculateBatchToSpaceOutputShape(batch_tensor, - DataFormat::NHWC, + CalculateBatchToSpaceOutputShape(batch_tensor, DataFormat::NHWC, output_shape.data()); } else { - CalculateSpaceToBatchOutputShape(space_tensor, - DataFormat::NHWC, + CalculateSpaceToBatchOutputShape(space_tensor, DataFormat::NHWC, output_shape.data()); } @@ -45,12 +41,12 @@ MaceStatus SpaceToBatchFunctor::operator()( CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &output_image_shape); if (b2s_) { - MACE_FAILURE_RETURN(space_tensor->ResizeImage(output_shape, - output_image_shape)); + MACE_RETURN_IF_ERROR( + space_tensor->ResizeImage(output_shape, output_image_shape)); kernel_name = "batch_to_space"; } else { - MACE_FAILURE_RETURN(batch_tensor->ResizeImage(output_shape, - output_image_shape)); + MACE_RETURN_IF_ERROR( + batch_tensor->ResizeImage(output_shape, output_image_shape)); kernel_name = "space_to_batch"; } const uint32_t chan_blk = RoundUpDiv4(batch_tensor->dim(3)); @@ -73,7 +69,7 @@ MaceStatus SpaceToBatchFunctor::operator()( built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - kernel_error_->Allocate(1); + MACE_RETURN_IF_ERROR(kernel_error_->Allocate(1)); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -81,9 +77,8 @@ MaceStatus SpaceToBatchFunctor::operator()( if (runtime->IsNonUniformWorkgroupsSupported()) { built_options.emplace("-DNON_UNIFORM_WORK_GROUP"); } - kernel_ = - runtime->BuildKernel("space_to_batch", - obfuscated_kernel_name, built_options); + kernel_ = runtime->BuildKernel("space_to_batch", obfuscated_kernel_name, + built_options); kwg_size_ = static_cast(runtime->GetKernelMaxWorkGroupSize(kernel_)); @@ -92,7 +87,7 @@ MaceStatus SpaceToBatchFunctor::operator()( uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel_.setArg(idx++, - *(static_cast(kernel_error_->buffer()))); + *(static_cast(kernel_error_->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel_.setArg(idx++, gws[0]); diff --git a/mace/kernels/opencl/winograd_transform.cc b/mace/kernels/opencl/winograd_transform.cc index 70e4dcc5..91f0ad07 100644 --- a/mace/kernels/opencl/winograd_transform.cc +++ b/mace/kernels/opencl/winograd_transform.cc @@ -24,7 +24,6 @@ namespace kernels { template MaceStatus WinogradTransformFunctor::operator()( const Tensor *input_tensor, Tensor *output_tensor, StatsFuture *future) { - auto runtime = OpenCLRuntime::Global(); if (kernel_.get() == nullptr) { @@ -40,7 +39,7 @@ MaceStatus WinogradTransformFunctor::operator()( built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - kernel_error_->Allocate(1); + MACE_RETURN_IF_ERROR(kernel_error_->Allocate(1)); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -78,12 +77,12 @@ MaceStatus WinogradTransformFunctor::operator()( output_shape = {16, input_tensor->dim(3), out_width, 1}; std::vector image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_HEIGHT, &image_shape); - MACE_FAILURE_RETURN(output_tensor->ResizeImage(output_shape, image_shape)); + MACE_RETURN_IF_ERROR(output_tensor->ResizeImage(output_shape, image_shape)); uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel_.setArg(idx++, - *(static_cast(kernel_error_->buffer()))); + *(static_cast(kernel_error_->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel_.setArg(idx++, gws[0]); @@ -103,10 +102,9 @@ MaceStatus WinogradTransformFunctor::operator()( } const std::vector lws = {kwg_size_ / 8, 8, 0}; - std::string tuning_key = - Concat("winograd_transform_kernel", output_tensor->dim(0), - output_tensor->dim(1), output_tensor->dim(2), - output_tensor->dim(3)); + std::string tuning_key = Concat("winograd_transform_kernel", + output_tensor->dim(0), output_tensor->dim(1), + output_tensor->dim(2), output_tensor->dim(3)); TuningOrRun2DKernel(kernel_, tuning_key, gws, lws, future); if (runtime->IsOutOfRangeCheckEnabled()) { @@ -125,7 +123,6 @@ MaceStatus WinogradInverseTransformFunctor::operator()( const Tensor *bias, Tensor *output_tensor, StatsFuture *future) { - auto runtime = OpenCLRuntime::Global(); if (kernel_.get() == nullptr) { @@ -142,7 +139,7 @@ MaceStatus WinogradInverseTransformFunctor::operator()( built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( new Buffer(GetDeviceAllocator(DeviceType::GPU)))); - kernel_error_->Allocate(1); + MACE_RETURN_IF_ERROR(kernel_error_->Allocate(1)); kernel_error_->Map(nullptr); *(kernel_error_->mutable_data()) = 0; kernel_error_->UnMap(); @@ -188,14 +185,14 @@ MaceStatus WinogradInverseTransformFunctor::operator()( input_tensor->dim(1)}; std::vector image_shape; CalImage2DShape(output_shape, BufferType::IN_OUT_CHANNEL, &image_shape); - MACE_FAILURE_RETURN(output_tensor->ResizeImage(output_shape, image_shape)); + MACE_RETURN_IF_ERROR(output_tensor->ResizeImage(output_shape, image_shape)); const uint32_t round_h = (height_ + 1) / 2; const uint32_t round_w = (width_ + 1) / 2; uint32_t idx = 0; if (runtime->IsOutOfRangeCheckEnabled()) { kernel_.setArg(idx++, - *(static_cast(kernel_error_->buffer()))); + *(static_cast(kernel_error_->buffer()))); } if (!runtime->IsNonUniformWorkgroupsSupported()) { kernel_.setArg(idx++, gws[0]); diff --git a/mace/kernels/pad.h b/mace/kernels/pad.h index 105cf242..de851bb7 100644 --- a/mace/kernels/pad.h +++ b/mace/kernels/pad.h @@ -51,7 +51,7 @@ struct PadFunctor : public PadFunctorBase { MACE_CHECK( this->paddings_.size() == static_cast(input->dim_size()) * 2); auto input_shape = input->shape(); - MACE_FAILURE_RETURN(output->Resize({input_shape[0] + this->paddings_[0] + MACE_RETURN_IF_ERROR(output->Resize({input_shape[0] + this->paddings_[0] + this->paddings_[1], input_shape[1] + this->paddings_[2] + this->paddings_[3], diff --git a/mace/kernels/pooling.h b/mace/kernels/pooling.h index 2632966f..936b5e36 100644 --- a/mace/kernels/pooling.h +++ b/mace/kernels/pooling.h @@ -190,7 +190,7 @@ struct PoolingFunctor: PoolingFunctorBase { RoundType::CEIL, output_shape.data()); } - MACE_FAILURE_RETURN(output_tensor->Resize(output_shape)); + MACE_RETURN_IF_ERROR(output_tensor->Resize(output_shape)); Tensor::MappingGuard input_guard(input_tensor); Tensor::MappingGuard output_guard(output_tensor); diff --git a/mace/kernels/proposal.h b/mace/kernels/proposal.h index c61a031b..a1277f4c 100644 --- a/mace/kernels/proposal.h +++ b/mace/kernels/proposal.h @@ -267,7 +267,7 @@ struct ProposalFunctor { // Our RPN implementation only supports a single input image, so all // batch inds are 0 size = static_cast(nms_result.size()); - MACE_FAILURE_RETURN(output->Resize({size, 1, 1, 5})); + MACE_RETURN_IF_ERROR(output->Resize({size, 1, 1, 5})); auto output_ptr = output->mutable_data(); #pragma omp parallel for for (int i = 0; i < size; ++i) { diff --git a/mace/kernels/psroi_align.h b/mace/kernels/psroi_align.h index 1830ff5d..757bec3c 100644 --- a/mace/kernels/psroi_align.h +++ b/mace/kernels/psroi_align.h @@ -50,7 +50,7 @@ struct PSROIAlignFunctor { const index_t num_rois = rois->dim(0); const index_t batch_size = input->dim(0); - MACE_FAILURE_RETURN(output->Resize({num_rois, pooled_height, pooled_width, + MACE_RETURN_IF_ERROR(output->Resize({num_rois, pooled_height, pooled_width, output_dim_})); T *output_ptr = output->mutable_data(); diff --git a/mace/kernels/resize_bilinear.h b/mace/kernels/resize_bilinear.h index 2c7ff3ef..cb41ef45 100644 --- a/mace/kernels/resize_bilinear.h +++ b/mace/kernels/resize_bilinear.h @@ -150,7 +150,7 @@ struct ResizeBilinearFunctor index_t out_width = out_width_; MACE_CHECK(out_height > 0 && out_width > 0); std::vector out_shape{batch, channels, out_height, out_width}; - MACE_FAILURE_RETURN(output->Resize(out_shape)); + MACE_RETURN_IF_ERROR(output->Resize(out_shape)); Tensor::MappingGuard input_mapper(input); Tensor::MappingGuard output_mapper(output); diff --git a/mace/kernels/slice.h b/mace/kernels/slice.h index 02396ce3..7ab311b0 100644 --- a/mace/kernels/slice.h +++ b/mace/kernels/slice.h @@ -61,7 +61,7 @@ struct SliceFunctor : SliceFunctorBase { 1, std::multiplies()); for (size_t i= 0; i < outputs_count; ++i) { - MACE_FAILURE_RETURN(output_list[i]->Resize(output_shape)); + MACE_RETURN_IF_ERROR(output_list[i]->Resize(output_shape)); output_ptrs[i] = output_list[i]->mutable_data(); } const T *input_ptr = input->data(); diff --git a/mace/kernels/space_to_batch.h b/mace/kernels/space_to_batch.h index 204fe44b..786e270a 100644 --- a/mace/kernels/space_to_batch.h +++ b/mace/kernels/space_to_batch.h @@ -150,12 +150,12 @@ struct SpaceToBatchFunctor : SpaceToBatchFunctorBase { CalculateBatchToSpaceOutputShape(batch_tensor, DataFormat::NCHW, output_shape.data()); - MACE_FAILURE_RETURN(space_tensor->Resize(output_shape)); + MACE_RETURN_IF_ERROR(space_tensor->Resize(output_shape)); } else { CalculateSpaceToBatchOutputShape(space_tensor, DataFormat::NCHW, output_shape.data()); - MACE_FAILURE_RETURN(batch_tensor->Resize(output_shape)); + MACE_RETURN_IF_ERROR(batch_tensor->Resize(output_shape)); } Tensor::MappingGuard input_guard(space_tensor); diff --git a/mace/ops/BUILD b/mace/ops/BUILD index c925ff13..09b5fa3b 100644 --- a/mace/ops/BUILD +++ b/mace/ops/BUILD @@ -15,7 +15,6 @@ cc_library( hdrs = [ "ops_test_util.h", ], - copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"], deps = [ "//mace/core", "@gtest//:gtest", @@ -36,18 +35,23 @@ cc_library( [ "buffer_to_image.cc", "image_to_buffer.cc", - ]), + ], + ), hdrs = glob( ["*.h"], exclude = ["ops_test_util.h"], ), - copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"] + - if_openmp_enabled(["-fopenmp"]) + - if_neon_enabled(["-DMACE_ENABLE_NEON"]) + - if_android_armv7(["-mfpu=neon"]) + - if_android_armv7(["-mfloat-abi=softfp"]) + - if_android(["-DMACE_ENABLE_OPENCL"]) + - if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), + copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled([ + "-DMACE_ENABLE_NEON", + ]) + if_android_armv7([ + "-mfpu=neon", + ]) + if_android_armv7([ + "-mfloat-abi=softfp", + ]) + if_android([ + "-DMACE_ENABLE_OPENCL", + ]) + if_hexagon_enabled([ + "-DMACE_ENABLE_HEXAGON", + ]), deps = [ "//mace/kernels", ], @@ -60,13 +64,17 @@ cc_test( srcs = glob( ["*_test.cc"], ), - copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"] + - if_openmp_enabled(["-fopenmp"]) + - if_neon_enabled(["-DMACE_ENABLE_NEON"]) + - if_android_armv7(["-mfpu=neon"]) + - if_android_armv7(["-mfloat-abi=softfp"]) + - if_android(["-DMACE_ENABLE_OPENCL"]) + - if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), + copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled([ + "-DMACE_ENABLE_NEON", + ]) + if_android_armv7([ + "-mfpu=neon", + ]) + if_android_armv7([ + "-mfloat-abi=softfp", + ]) + if_android([ + "-DMACE_ENABLE_OPENCL", + ]) + if_hexagon_enabled([ + "-DMACE_ENABLE_HEXAGON", + ]), linkopts = ["-fopenmp"], linkstatic = 1, deps = [ @@ -80,13 +88,17 @@ cc_test( name = "ops_benchmark", testonly = 1, srcs = glob(["*_benchmark.cc"]), - copts = ["-Werror", "-Wextra", "-Wno-missing-field-initializers"] + - if_openmp_enabled(["-fopenmp"]) + - if_neon_enabled(["-DMACE_ENABLE_NEON"]) + - if_android_armv7(["-mfpu=neon"]) + - if_android_armv7(["-mfloat-abi=softfp"]) + - if_android(["-DMACE_ENABLE_OPENCL"]) + - if_hexagon_enabled(["-DMACE_ENABLE_HEXAGON"]), + copts = if_openmp_enabled(["-fopenmp"]) + if_neon_enabled([ + "-DMACE_ENABLE_NEON", + ]) + if_android_armv7([ + "-mfpu=neon", + ]) + if_android_armv7([ + "-mfloat-abi=softfp", + ]) + if_android([ + "-DMACE_ENABLE_OPENCL", + ]) + if_hexagon_enabled([ + "-DMACE_ENABLE_HEXAGON", + ]), linkopts = ["-fopenmp"], linkstatic = 1, deps = [ diff --git a/mace/ops/activation.h b/mace/ops/activation.h index e9578e51..8938ea74 100644 --- a/mace/ops/activation.h +++ b/mace/ops/activation.h @@ -31,15 +31,15 @@ class ActivationOp : public Operator { functor_(kernels::StringToActivationType( OperatorBase::GetOptionalArg("activation", "NOOP")), - static_cast(OperatorBase::GetOptionalArg( - "max_limit", 0.0f))) {} + static_cast( + OperatorBase::GetOptionalArg("max_limit", 0.0f))) {} MaceStatus Run(StatsFuture *future) override { const Tensor *input_tensor = this->Input(0); const Tensor *alpha_tensor = this->InputSize() >= 2 ? this->Input(1) : nullptr; Tensor *output_tensor = this->Output(0); - MACE_FAILURE_RETURN(output_tensor->ResizeLike(input_tensor)); + MACE_RETURN_IF_ERROR(output_tensor->ResizeLike(input_tensor)); return functor_(input_tensor, alpha_tensor, output_tensor, future); } diff --git a/mace/ops/activation_test.cc b/mace/ops/activation_test.cc index a3b2ab19..cc40ac9d 100644 --- a/mace/ops/activation_test.cc +++ b/mace/ops/activation_test.cc @@ -120,7 +120,6 @@ TEST_F(ActivationOpTest, OPENCLUnalignedSimpleRelu) { TestUnalignedSimpleRelu(); } - namespace { template void TestSimpleRelux() { @@ -169,9 +168,7 @@ void TestSimpleRelux() { TEST_F(ActivationOpTest, CPUSimple) { TestSimpleRelux(); } -TEST_F(ActivationOpTest, OPENCLSimple) { - TestSimpleRelux(); -} +TEST_F(ActivationOpTest, OPENCLSimple) { TestSimpleRelux(); } namespace { template @@ -278,9 +275,7 @@ void TestSimplePrelu() { } } // namespace -TEST_F(ActivationOpTest, CPUSimplePrelu) { - TestSimplePrelu(); -} +TEST_F(ActivationOpTest, CPUSimplePrelu) { TestSimplePrelu(); } TEST_F(ActivationOpTest, OPENCLSimplePrelu) { TestSimplePrelu(); diff --git a/mace/ops/addn_test.cc b/mace/ops/addn_test.cc index 5b7c9d98..2f5aa28a 100644 --- a/mace/ops/addn_test.cc +++ b/mace/ops/addn_test.cc @@ -97,8 +97,8 @@ void SimpleAdd3() { net.RunOp(D); } - auto expected = CreateTensor({1, 2, 3, 1}, - {-0.000713, 8, 12, 16, 20, 24}); + auto expected = + CreateTensor({1, 2, 3, 1}, {-0.000713, 8, 12, 16, 20, 24}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-4, 1e-3); } @@ -160,8 +160,8 @@ void RandomTest() { ImageToBuffer(&net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); - ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), - 1e-2, 1e-2); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2, + 1e-2); } } } // namespace diff --git a/mace/ops/batch_norm.h b/mace/ops/batch_norm.h index 5963ee4b..9d983f10 100644 --- a/mace/ops/batch_norm.h +++ b/mace/ops/batch_norm.h @@ -51,7 +51,7 @@ class BatchNormOp : public Operator { var->dim_size()); Tensor *output = this->Output(OUTPUT); - MACE_FAILURE_RETURN(output->ResizeLike(input)); + MACE_RETURN_IF_ERROR(output->ResizeLike(input)); return functor_(input, scale, offset, mean, var, epsilon_, output, future); } diff --git a/mace/ops/batch_norm_test.cc b/mace/ops/batch_norm_test.cc index 05cc2ab8..b72ec73a 100644 --- a/mace/ops/batch_norm_test.cc +++ b/mace/ops/batch_norm_test.cc @@ -22,7 +22,7 @@ namespace test { class BatchNormOpTest : public OpsTestBase {}; namespace { -template +template void Simple() { OpsTestNet net; @@ -37,14 +37,14 @@ void Simple() { if (D == DeviceType::CPU) { net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("BatchNorm", "BatchNormTest") - .Input("InputNCHW") - .Input("Scale") - .Input("Offset") - .Input("Mean") - .Input("Var") - .AddFloatArg("epsilon", 1e-3) - .Output("OutputNCHW") - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Scale") + .Input("Offset") + .Input("Mean") + .Input("Var") + .AddFloatArg("epsilon", 1e-3) + .Output("OutputNCHW") + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); @@ -62,14 +62,14 @@ void Simple() { kernels::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") - .Input("InputImage") - .Input("ScaleImage") - .Input("OffsetImage") - .Input("MeanImage") - .Input("VarImage") - .AddFloatArg("epsilon", 1e-3) - .Output("OutputImage") - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("ScaleImage") + .Input("OffsetImage") + .Input("MeanImage") + .Input("VarImage") + .AddFloatArg("epsilon", 1e-3) + .Output("OutputImage") + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); @@ -79,10 +79,9 @@ void Simple() { } // Check - auto expected = - CreateTensor({1, 6, 2, 1}, {-3.8543, -3.8543, -1.5125, -1.5125, - 0.8291, 0.8291, 3.1708, 3.1708, - 5.5125, 5.5125, 7.8543, 7.8543}); + auto expected = CreateTensor( + {1, 6, 2, 1}, {-3.8543, -3.8543, -1.5125, -1.5125, 0.8291, 0.8291, 3.1708, + 3.1708, 5.5125, 5.5125, 7.8543, 7.8543}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-4); } @@ -103,35 +102,31 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { OpsTestNet net; // Add input data - net.AddRandomInput( - "Input", {batch, height, width, channels}); + net.AddRandomInput("Input", + {batch, height, width, channels}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); net.AddRandomInput("Var", {channels}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); // Construct graph OpDefBuilder("BatchNorm", "BatchNormTest") - .Input("InputNCHW") - .Input("Scale") - .Input("Offset") - .Input("Mean") - .Input("Var") - .AddFloatArg("epsilon", 1e-3) - .Output("OutputNCHW") - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Scale") + .Input("Offset") + .Input("Mean") + .Input("Var") + .AddFloatArg("epsilon", 1e-3) + .Output("OutputNCHW") + .Finalize(net.NewOperatorDef()); // run cpu net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); // Check @@ -140,25 +135,25 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Scale", "ScaleImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); BufferToImage(&net, "Offset", "OffsetImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); BufferToImage(&net, "Mean", "MeanImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); BufferToImage(&net, "Var", "VarImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") - .Input("InputImage") - .Input("ScaleImage") - .Input("OffsetImage") - .Input("MeanImage") - .Input("VarImage") - .AddFloatArg("epsilon", 1e-3) - .Output("OutputImage") - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("ScaleImage") + .Input("OffsetImage") + .Input("MeanImage") + .Input("VarImage") + .AddFloatArg("epsilon", 1e-3) + .Output("OutputImage") + .Finalize(net.NewOperatorDef()); // Tuning setenv("MACE_TUNING", "1", 1); @@ -170,7 +165,7 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { net.Sync(); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-5, 1e-4); } @@ -186,34 +181,30 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { OpsTestNet net; // Add input data - net.AddRandomInput( - "Input", {batch, height, width, channels}); + net.AddRandomInput("Input", + {batch, height, width, channels}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); net.AddRandomInput("Var", {channels}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("BatchNorm", "BatchNormTest") - .Input("InputNCHW") - .Input("Scale") - .Input("Offset") - .Input("Mean") - .Input("Var") - .AddFloatArg("epsilon", 1e-1) - .Output("OutputNCHW") - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Scale") + .Input("Offset") + .Input("Mean") + .Input("Var") + .AddFloatArg("epsilon", 1e-1) + .Output("OutputNCHW") + .Finalize(net.NewOperatorDef()); // run cpu net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); // Check @@ -222,26 +213,26 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Scale", "ScaleImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); BufferToImage(&net, "Offset", "OffsetImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); BufferToImage(&net, "Mean", "MeanImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); BufferToImage(&net, "Var", "VarImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") - .Input("InputImage") - .Input("ScaleImage") - .Input("OffsetImage") - .Input("MeanImage") - .Input("VarImage") - .AddFloatArg("epsilon", 1e-1) - .Output("OutputImage") - .AddIntArg("T", static_cast(DataType::DT_HALF)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("ScaleImage") + .Input("OffsetImage") + .Input("MeanImage") + .Input("VarImage") + .AddFloatArg("epsilon", 1e-1) + .Output("OutputImage") + .AddIntArg("T", static_cast(DataType::DT_HALF)) + .Finalize(net.NewOperatorDef()); // Tuning setenv("MACE_TUNING", "1", 1); @@ -253,7 +244,7 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) { net.Sync(); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-1, 1e-2); } @@ -269,34 +260,30 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { OpsTestNet net; // Add input data - net.AddRandomInput( - "Input", {batch, height, width, channels}); + net.AddRandomInput("Input", + {batch, height, width, channels}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); net.AddRandomInput("Var", {channels}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("BatchNorm", "BatchNormTest") - .Input("InputNCHW") - .Input("Scale") - .Input("Offset") - .Input("Mean") - .Input("Var") - .AddFloatArg("epsilon", 1e-3) - .Output("OutputNCHW") - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Scale") + .Input("Offset") + .Input("Mean") + .Input("Var") + .AddFloatArg("epsilon", 1e-3) + .Output("OutputNCHW") + .Finalize(net.NewOperatorDef()); // run cpu net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); // Check @@ -305,25 +292,25 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Scale", "ScaleImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); BufferToImage(&net, "Offset", "OffsetImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); BufferToImage(&net, "Mean", "MeanImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); BufferToImage(&net, "Var", "VarImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") - .Input("InputImage") - .Input("ScaleImage") - .Input("OffsetImage") - .Input("MeanImage") - .Input("VarImage") - .AddFloatArg("epsilon", 1e-3) - .Output("OutputImage") - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("ScaleImage") + .Input("OffsetImage") + .Input("MeanImage") + .Input("VarImage") + .AddFloatArg("epsilon", 1e-3) + .Output("OutputImage") + .Finalize(net.NewOperatorDef()); // tuning setenv("MACE_TUNING", "1", 1); @@ -335,7 +322,7 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { net.Sync(); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-5, 1e-4); } @@ -351,34 +338,30 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { OpsTestNet net; // Add input data - net.AddRandomInput( - "Input", {batch, height, width, channels}); + net.AddRandomInput("Input", + {batch, height, width, channels}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); net.AddRandomInput("Mean", {channels}); net.AddRandomInput("Var", {channels}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("BatchNorm", "BatchNormTest") - .Input("InputNCHW") - .Input("Scale") - .Input("Offset") - .Input("Mean") - .Input("Var") - .AddFloatArg("epsilon", 1e-1) - .Output("OutputNCHW") - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Scale") + .Input("Offset") + .Input("Mean") + .Input("Var") + .AddFloatArg("epsilon", 1e-1) + .Output("OutputNCHW") + .Finalize(net.NewOperatorDef()); // run cpu net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); // Check @@ -387,26 +370,26 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Scale", "ScaleImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); BufferToImage(&net, "Offset", "OffsetImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); BufferToImage(&net, "Mean", "MeanImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); BufferToImage(&net, "Var", "VarImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); OpDefBuilder("BatchNorm", "BatchNormTest") - .Input("InputImage") - .Input("ScaleImage") - .Input("OffsetImage") - .Input("MeanImage") - .Input("VarImage") - .AddFloatArg("epsilon", 1e-1) - .Output("OutputImage") - .AddIntArg("T", static_cast(DataType::DT_HALF)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("ScaleImage") + .Input("OffsetImage") + .Input("MeanImage") + .Input("VarImage") + .AddFloatArg("epsilon", 1e-1) + .Output("OutputImage") + .AddIntArg("T", static_cast(DataType::DT_HALF)) + .Finalize(net.NewOperatorDef()); // tuning setenv("MACE_TUNING", "1", 1); @@ -418,7 +401,7 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) { net.Sync(); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-1, 1e-2); } diff --git a/mace/ops/batch_to_space.h b/mace/ops/batch_to_space.h index 4f688dc4..91c4a9ba 100644 --- a/mace/ops/batch_to_space.h +++ b/mace/ops/batch_to_space.h @@ -36,8 +36,7 @@ class BatchToSpaceNDOp : public Operator { MaceStatus Run(StatsFuture *future) override { const Tensor *batch_tensor = this->Input(INPUT); Tensor *space_tensor = this->Output(OUTPUT); - return functor_(space_tensor, const_cast(batch_tensor), - future); + return functor_(space_tensor, const_cast(batch_tensor), future); } private: diff --git a/mace/ops/bias_add.h b/mace/ops/bias_add.h index 1f242253..cc9c4bd9 100644 --- a/mace/ops/bias_add.h +++ b/mace/ops/bias_add.h @@ -37,7 +37,7 @@ class BiasAddOp : public Operator { bias->dim_size()); Tensor *output = this->Output(OUTPUT); - MACE_FAILURE_RETURN(output->ResizeLike(input)); + MACE_RETURN_IF_ERROR(output->ResizeLike(input)); return functor_(input, bias, output, future); } diff --git a/mace/ops/bias_add_test.cc b/mace/ops/bias_add_test.cc index 2c4a5773..c030b8f0 100644 --- a/mace/ops/bias_add_test.cc +++ b/mace/ops/bias_add_test.cc @@ -32,21 +32,17 @@ void BiasAddSimple() { net.AddInputFromArray("Bias", {1}, {0.5f}); if (D == DeviceType::CPU) { - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("BiasAdd", "BiasAddTest") - .Input("InputNCHW") - .Input("Bias") - .Output("OutputNCHW") - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Bias") + .Output("OutputNCHW") + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); @@ -79,9 +75,7 @@ void BiasAddSimple() { TEST_F(BiasAddOpTest, BiasAddSimpleCPU) { BiasAddSimple(); } -TEST_F(BiasAddOpTest, BiasAddSimpleOPENCL) { - BiasAddSimple(); -} +TEST_F(BiasAddOpTest, BiasAddSimpleOPENCL) { BiasAddSimple(); } TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { // generate random input @@ -94,13 +88,11 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { OpsTestNet net; // Add input data - net.AddRandomInput( - "Input", {batch, height, width, channels}); + net.AddRandomInput("Input", + {batch, height, width, channels}); net.AddRandomInput("Bias", {channels}, true); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); // Construct graph @@ -113,9 +105,7 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { // run cpu net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); // Check @@ -124,9 +114,9 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); OpDefBuilder("BiasAdd", "BiasAddTest") .Input("InputImage") @@ -139,7 +129,7 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) { net.Sync(); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-5); } @@ -154,13 +144,11 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { OpsTestNet net; // Add input data - net.AddRandomInput( - "Input", {batch, height, width, channels}); + net.AddRandomInput("Input", + {batch, height, width, channels}); net.AddRandomInput("Bias", {channels}, true); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); // Construct graph @@ -173,9 +161,7 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { // run cpu net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); // Check Tensor expected; @@ -183,9 +169,9 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); OpDefBuilder("BiasAdd", "BiasAddTest") .Input("InputImage") @@ -198,7 +184,7 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) { net.Sync(); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-5); } diff --git a/mace/ops/buffer_to_image_test.cc b/mace/ops/buffer_to_image_test.cc index a01f802e..94b28b0b 100644 --- a/mace/ops/buffer_to_image_test.cc +++ b/mace/ops/buffer_to_image_test.cc @@ -71,27 +71,27 @@ TEST(BufferToImageTest, ArgLarge) { TEST(BufferToImageTest, InputSmallSingleChannel) { TestBidirectionTransform(kernels::IN_OUT_CHANNEL, - {1, 2, 3, 1}); + {1, 2, 3, 1}); } TEST(BufferToImageTest, InputSmallMultipleChannel) { TestBidirectionTransform(kernels::IN_OUT_CHANNEL, - {1, 2, 3, 3}); + {1, 2, 3, 3}); } TEST(BufferToImageTest, InputSmallMultipleBatchAndChannel) { TestBidirectionTransform(kernels::IN_OUT_CHANNEL, - {3, 2, 3, 3}); + {3, 2, 3, 3}); } TEST(BufferToImageTest, InputMedium) { TestBidirectionTransform(kernels::IN_OUT_CHANNEL, - {3, 13, 17, 128}); + {3, 13, 17, 128}); } TEST(BufferToImageTest, InputLarge) { TestBidirectionTransform(kernels::IN_OUT_CHANNEL, - {3, 64, 64, 256}); + {3, 64, 64, 256}); } TEST(BufferToImageTest, Filter1x1Small) { @@ -233,8 +233,8 @@ TEST(BufferToImageTest, ArgStringHalfToHalfSmall) { const unsigned char input_data[] = { 0xCD, 0x3C, 0x33, 0x40, }; - TestStringHalfBidirectionTransform( - kernels::ARGUMENT, {2}, input_data); + TestStringHalfBidirectionTransform(kernels::ARGUMENT, + {2}, input_data); } } // namespace test diff --git a/mace/ops/channel_shuffle_test.cc b/mace/ops/channel_shuffle_test.cc index 7c17c5a0..0b674dab 100644 --- a/mace/ops/channel_shuffle_test.cc +++ b/mace/ops/channel_shuffle_test.cc @@ -29,23 +29,19 @@ TEST_F(ChannelShuffleOpTest, C8G4_CPU) { "Input", {1, 1, 2, 8}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); // Construct graph OpDefBuilder("ChannelShuffle", "ChannelShuffleTest") - .Input("InputNCHW") - .Output("OutputNCHW") - .AddIntArg("group", 4) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Output("OutputNCHW") + .AddIntArg("group", 4) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); // Check @@ -65,7 +61,7 @@ TEST_F(ChannelShuffleOpTest, C16G4_OPENCL) { {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}); BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("ChannelShuffle", "ChannelShuffleTest") .Input("InputImage") @@ -78,7 +74,7 @@ TEST_F(ChannelShuffleOpTest, C16G4_OPENCL) { // Transfer output ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); // Check auto expected = CreateTensor( diff --git a/mace/ops/concat_test.cc b/mace/ops/concat_test.cc index 78d14394..9076aa27 100644 --- a/mace/ops/concat_test.cc +++ b/mace/ops/concat_test.cc @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include #include +#include #include "gmock/gmock.h" -#include "mace/ops/ops_test_util.h" #include "mace/ops/concat.h" +#include "mace/ops/ops_test_util.h" namespace mace { namespace ops { @@ -163,7 +163,7 @@ void OpenclRandomTest(const std::vector> &shapes, int concat_axis_size = 0; // Construct graph std::vector> inputs(num_inputs, std::vector()); - std::vector input_ptrs(num_inputs); + std::vector input_ptrs(num_inputs); OpsTestNet net; for (int i = 0; i < num_inputs; ++i) { const std::string input_name = MakeString("Input", i); @@ -171,10 +171,10 @@ void OpenclRandomTest(const std::vector> &shapes, concat_axis_size += shapes[i][axis]; GenerateRandomRealTypeData(shapes[i], &inputs[i]); input_ptrs[i] = inputs[i].data(); - net.AddInputFromArray(input_name, - shapes[i], inputs[i]); + net.AddInputFromArray(input_name, shapes[i], + inputs[i]); BufferToImage(&net, input_name, image_name, - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); } auto builder = OpDefBuilder("Concat", "ConcatTest"); @@ -191,7 +191,7 @@ void OpenclRandomTest(const std::vector> &shapes, net.RunOp(DeviceType::GPU); ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); // Check auto output = net.GetOutput("Output"); diff --git a/mace/ops/conv_2d_test.cc b/mace/ops/conv_2d_test.cc index be0e38cc..022703bb 100644 --- a/mace/ops/conv_2d_test.cc +++ b/mace/ops/conv_2d_test.cc @@ -25,40 +25,36 @@ namespace test { class Conv2dOpTest : public OpsTestBase {}; namespace { -template +template void TestNHWCSimple3x3VALID() { OpsTestNet net; // Add input data net.AddInputFromArray( - "Input", {1, 3, 3, 2}, - {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + "Input", {1, 3, 3, 2}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); net.AddInputFromArray( - "Filter", {1, 2, 3, 3}, - {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); + "Filter", {1, 2, 3, 3}, + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); net.AddInputFromArray("Bias", {1}, {0.1f}); if (D == DeviceType::CPU) { - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("Conv2D", "Conv2dTest") - .Input("InputNCHW") - .Input("Filter") - .Input("Bias") - .Output("OutputNCHW") - .AddIntsArg("strides", {1, 1}) - .AddIntArg("padding", Padding::VALID) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Filter") + .Input("Bias") + .Output("OutputNCHW") + .AddIntsArg("strides", {1, 1}) + .AddIntArg("padding", Padding::VALID) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); @@ -67,15 +63,15 @@ void TestNHWCSimple3x3VALID() { BufferToImage(&net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") - .Input("InputImage") - .Input("FilterImage") - .Input("BiasImage") - .Output("OutputImage") - .AddIntsArg("strides", {1, 1}) - .AddIntArg("padding", Padding::VALID) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("FilterImage") + .Input("BiasImage") + .Output("OutputImage") + .AddIntsArg("strides", {1, 1}) + .AddIntArg("padding", Padding::VALID) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); net.RunOp(D); @@ -91,41 +87,37 @@ void TestNHWCSimple3x3VALID() { ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } -template +template void TestNHWCSimple3x3SAME() { OpsTestNet net; // Add input data net.AddInputFromArray( - "Input", {1, 3, 3, 2}, - {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + "Input", {1, 3, 3, 2}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); net.AddInputFromArray( - "Filter", {1, 2, 3, 3}, - {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); + "Filter", {1, 2, 3, 3}, + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); net.AddInputFromArray("Bias", {1}, {0.1f}); if (D == DeviceType::CPU) { - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("Conv2D", "Conv2dTest") - .Input("InputNCHW") - .Input("Filter") - .Input("Bias") - .Output("OutputNCHW") - .AddIntsArg("strides", {1, 1}) - .AddIntArg("padding", Padding::SAME) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Filter") + .Input("Bias") + .Output("OutputNCHW") + .AddIntsArg("strides", {1, 1}) + .AddIntArg("padding", Padding::SAME) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); @@ -134,15 +126,15 @@ void TestNHWCSimple3x3SAME() { BufferToImage(&net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") - .Input("InputImage") - .Input("FilterImage") - .Input("BiasImage") - .Output("OutputImage") - .AddIntsArg("strides", {1, 1}) - .AddIntArg("padding", Padding::SAME) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("FilterImage") + .Input("BiasImage") + .Output("OutputImage") + .AddIntsArg("strides", {1, 1}) + .AddIntArg("padding", Padding::SAME) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); @@ -155,8 +147,8 @@ void TestNHWCSimple3x3SAME() { } auto expected = CreateTensor( - {1, 3, 3, 1}, - {8.1f, 12.1f, 8.1f, 12.1f, 18.1f, 12.1f, 8.1f, 12.1f, 8.1f}); + {1, 3, 3, 1}, + {8.1f, 12.1f, 8.1f, 12.1f, 18.1f, 12.1f, 8.1f, 12.1f, 8.1f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } @@ -173,40 +165,36 @@ TEST_F(Conv2dOpTest, OPENCLSimple) { } namespace { -template +template void TestNHWCSimple3x3WithoutBias() { OpsTestNet net; // Add input data net.AddInputFromArray( - "Input", {1, 3, 3, 2}, - {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + "Input", {1, 3, 3, 2}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); net.AddInputFromArray( - "Filter", {1, 2, 3, 3}, - {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); + "Filter", {1, 2, 3, 3}, + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); if (D == DeviceType::CPU) { - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("Conv2D", "Conv2dTest") - .Input("InputNCHW") - .Input("Filter") - .Output("OutputNCHW") - .AddIntsArg("strides", {1, 1}) - .AddIntArg("padding", Padding::VALID) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Filter") + .Output("OutputNCHW") + .AddIntsArg("strides", {1, 1}) + .AddIntArg("padding", Padding::VALID) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); @@ -214,14 +202,14 @@ void TestNHWCSimple3x3WithoutBias() { kernels::BufferType::CONV2D_FILTER); OpDefBuilder("Conv2D", "Conv2dTest") - .Input("InputImage") - .Input("FilterImage") - .Output("OutputImage") - .AddIntsArg("strides", {1, 1}) - .AddIntArg("padding", Padding::VALID) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("FilterImage") + .Output("OutputImage") + .AddIntsArg("strides", {1, 1}) + .AddIntArg("padding", Padding::VALID) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); // Transfer output @@ -247,45 +235,40 @@ TEST_F(Conv2dOpTest, OPENCLWithoutBias) { } namespace { -template +template void TestNHWCCombined3x3() { // Construct graph OpsTestNet net; // Add input data net.AddInputFromArray( - "Input", {1, 5, 5, 2}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + "Input", {1, 5, 5, 2}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); net.AddInputFromArray( - "Filter", {2, 2, 3, 3}, - {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, - 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, - 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f}); + "Filter", {2, 2, 3, 3}, + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, + 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, + 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f}); net.AddInputFromArray("Bias", {2}, {0.1f, 0.2f}); if (D == DeviceType::CPU) { - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("Conv2D", "Conv2DTest") - .Input("InputNCHW") - .Input("Filter") - .Input("Bias") - .Output("OutputNCHW") - .AddIntsArg("strides", {2, 2}) - .AddIntArg("padding", Padding::SAME) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Filter") + .Input("Bias") + .Output("OutputNCHW") + .AddIntsArg("strides", {2, 2}) + .AddIntArg("padding", Padding::SAME) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); @@ -295,15 +278,15 @@ void TestNHWCCombined3x3() { kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2DTest") - .Input("InputImage") - .Input("FilterImage") - .Input("BiasImage") - .Output("OutputImage") - .AddIntsArg("strides", {2, 2}) - .AddIntArg("padding", Padding::SAME) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("FilterImage") + .Input("BiasImage") + .Output("OutputImage") + .AddIntsArg("strides", {2, 2}) + .AddIntArg("padding", Padding::SAME) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); @@ -315,8 +298,8 @@ void TestNHWCCombined3x3() { // Check auto expected = CreateTensor( - {1, 3, 3, 2}, {8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 18.1f, - 9.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f}); + {1, 3, 3, 2}, {8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 18.1f, + 9.2f, 12.1f, 6.2f, 8.1f, 4.2f, 12.1f, 6.2f, 8.1f, 4.2f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } } // namespace @@ -330,14 +313,13 @@ TEST_F(Conv2dOpTest, OPENCLStride2) { } namespace { -template +template void TestFusedNHWCSimple3x3VALID() { OpsTestNet net; // Add input data net.AddInputFromArray( "Input", {1, 3, 3, 2}, - {-1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1}); + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}); net.AddInputFromArray( "Filter", {1, 2, 3, 3}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, @@ -345,9 +327,7 @@ void TestFusedNHWCSimple3x3VALID() { net.AddInputFromArray("Bias", {1}, {-0.1f}); if (D == DeviceType::CPU) { - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputNCHW") @@ -362,10 +342,8 @@ void TestFusedNHWCSimple3x3VALID() { .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); @@ -398,25 +376,21 @@ void TestFusedNHWCSimple3x3VALID() { auto expected = CreateTensor({1, 1, 1, 1}, {0.0f}); ExpectTensorNear(*expected, *net.GetOutput("Output")); } -template +template void TestFusedNHWCSimple3x3WithoutBias() { OpsTestNet net; // Add input data net.AddInputFromArray( "Input", {1, 3, 3, 2}, - {-1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1}); + {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}); net.AddInputFromArray( "Filter", {1, 2, 3, 3}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}); if (D == DeviceType::CPU) { - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("Conv2D", "Conv2DTest") .Input("InputNCHW") @@ -431,10 +405,8 @@ void TestFusedNHWCSimple3x3WithoutBias() { // Run net.RunOp(D); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); @@ -478,48 +450,43 @@ TEST_F(Conv2dOpTest, FusedOPENCLSimple) { TestFusedNHWCSimple3x3WithoutBias(); } - namespace { -template +template void TestConv1x1() { // Construct graph OpsTestNet net; // Add input data net.AddInputFromArray( - "Input", {1, 3, 10, 5}, - {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + "Input", {1, 3, 10, 5}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); net.AddInputFromArray( - "Filter", {2, 5, 1, 1}, - {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}); + "Filter", {2, 5, 1, 1}, + {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}); net.AddInputFromArray("Bias", {2}, {0.1f, 0.2f}); if (D == DeviceType::CPU) { - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("Conv2D", "Conv2DTest") - .Input("InputNCHW") - .Input("Filter") - .Input("Bias") - .Output("OutputNCHW") - .AddIntsArg("strides", {1, 1}) - .AddIntArg("padding", Padding::VALID) - .AddIntsArg("dilations", {1, 1}) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Filter") + .Input("Bias") + .Output("OutputNCHW") + .AddIntsArg("strides", {1, 1}) + .AddIntArg("padding", Padding::VALID) + .AddIntsArg("dilations", {1, 1}) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); @@ -529,14 +496,14 @@ void TestConv1x1() { kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2DTest") - .Input("InputImage") - .Input("FilterImage") - .Input("BiasImage") - .Output("OutputImage") - .AddIntsArg("strides", {1, 1}) - .AddIntArg("padding", Padding::VALID) - .AddIntsArg("dilations", {1, 1}) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("FilterImage") + .Input("BiasImage") + .Output("OutputImage") + .AddIntsArg("strides", {1, 1}) + .AddIntArg("padding", Padding::VALID) + .AddIntsArg("dilations", {1, 1}) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); @@ -548,13 +515,13 @@ void TestConv1x1() { // Check auto expected = CreateTensor( - {1, 3, 10, 2}, - {5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, - 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, - 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, - 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, - 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, - 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f}); + {1, 3, 10, 2}, + {5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, + 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, + 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, + 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, + 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, + 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } @@ -565,7 +532,7 @@ TEST_F(Conv2dOpTest, CPUConv1x1) { TestConv1x1(); } TEST_F(Conv2dOpTest, OPENCLConv1x1) { TestConv1x1(); } namespace { -template +template void TestComplexConvNxNS12(const std::vector &shape, const int stride) { testing::internal::LogToStderr(); @@ -584,33 +551,28 @@ void TestComplexConvNxNS12(const std::vector &shape, // Add input data net.AddRandomInput("Input", {batch, height, width, input_channels}); net.AddRandomInput( - "Filter", {output_channels, input_channels, kernel_h, kernel_w}); + "Filter", {output_channels, input_channels, kernel_h, kernel_w}); net.AddRandomInput("Bias", {output_channels}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); // Construct graph OpDefBuilder("Conv2D", "Conv2dTest") - .Input("InputNCHW") - .Input("Filter") - .Input("Bias") - .Output("OutputNCHW") - .AddIntsArg("strides", {stride_h, stride_w}) - .AddIntArg("padding", type) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); - + .Input("InputNCHW") + .Input("Filter") + .Input("Bias") + .Output("OutputNCHW") + .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntArg("padding", type) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); // run on cpu net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); // Check Tensor expected; @@ -625,22 +587,22 @@ void TestComplexConvNxNS12(const std::vector &shape, kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") - .Input("InputImage") - .Input("FilterImage") - .Input("BiasImage") - .Output("OutputImage") - .AddIntsArg("strides", {stride_h, stride_w}) - .AddIntArg("padding", type) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("FilterImage") + .Input("BiasImage") + .Output("OutputImage") + .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntArg("padding", type) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); // Run on device net.RunOp(D); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); - ExpectTensorNear(expected, - *net.GetOutput("OPENCLOutput"), 1e-4, 1e-4); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-4, + 1e-4); }; for (int kernel_size : {1, 3, 5, 7}) { @@ -666,7 +628,7 @@ TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNS34) { } namespace { -template +template void TestHalfComplexConvNxNS12(const std::vector &input_shape, const std::vector &filter_shape, const std::vector &dilations) { @@ -690,40 +652,36 @@ void TestHalfComplexConvNxNS12(const std::vector &input_shape, &float_input_data); std::vector float_filter_data; GenerateRandomRealTypeData( - {kernel_h, kernel_w, output_channels, input_channels}, - &float_filter_data); + {kernel_h, kernel_w, output_channels, input_channels}, + &float_filter_data); std::vector float_bias_data; GenerateRandomRealTypeData({output_channels}, &float_bias_data); // Add input data net.AddInputFromArray( - "Input", {batch, height, width, input_channels}, float_input_data); + "Input", {batch, height, width, input_channels}, float_input_data); net.AddInputFromArray( - "Filter", {output_channels, input_channels, kernel_h, kernel_w}, - float_filter_data); + "Filter", {output_channels, input_channels, kernel_h, kernel_w}, + float_filter_data); net.AddInputFromArray("Bias", {output_channels}, float_bias_data); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("Conv2D", "Conv2dTest") - .Input("InputNCHW") - .Input("Filter") - .Input("Bias") - .Output("OutputNCHW") - .AddIntsArg("strides", {stride_h, stride_w}) - .AddIntArg("padding", padding) - .AddIntsArg("dilations", {dilations[0], dilations[1]}) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Filter") + .Input("Bias") + .Output("OutputNCHW") + .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntArg("padding", padding) + .AddIntsArg("dilations", {dilations[0], dilations[1]}) + .Finalize(net.NewOperatorDef()); // run on cpu net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); // Check Tensor expected; @@ -738,23 +696,23 @@ void TestHalfComplexConvNxNS12(const std::vector &input_shape, kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") - .Input("InputImage") - .Input("FilterImage") - .Input("BiasImage") - .Output("OutputImage") - .AddIntsArg("strides", {stride_h, stride_w}) - .AddIntArg("padding", padding) - .AddIntsArg("dilations", {dilations[0], dilations[1]}) - .AddIntArg("T", static_cast(DataType::DT_HALF)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("FilterImage") + .Input("BiasImage") + .Output("OutputImage") + .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntArg("padding", padding) + .AddIntsArg("dilations", {dilations[0], dilations[1]}) + .AddIntArg("T", static_cast(DataType::DT_HALF)) + .Finalize(net.NewOperatorDef()); // Run on device net.RunOp(D); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); - ExpectTensorNear(expected, - *net.GetOutput("OPENCLOutput"), 1e-2, 1e-1); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2, + 1e-1); }; func(1, 1, VALID); @@ -767,20 +725,16 @@ void TestHalfComplexConvNxNS12(const std::vector &input_shape, } // namespace TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv1x1S12) { - TestHalfComplexConvNxNS12({32, 32}, {1, 1, 32, 64}, - {1, 1}); + TestHalfComplexConvNxNS12({32, 32}, {1, 1, 32, 64}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv3x3S12) { - TestHalfComplexConvNxNS12({32, 32}, {3, 3, 32, 64}, - {1, 1}); + TestHalfComplexConvNxNS12({32, 32}, {3, 3, 32, 64}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv5x5S12) { - TestHalfComplexConvNxNS12({32, 32}, {5, 5, 3, 64}, - {1, 1}); - TestHalfComplexConvNxNS12({32, 32}, {5, 5, 3, 63}, - {1, 1}); + TestHalfComplexConvNxNS12({32, 32}, {5, 5, 3, 64}, {1, 1}); + TestHalfComplexConvNxNS12({32, 32}, {5, 5, 3, 63}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv1x7S1) { @@ -800,55 +754,45 @@ TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv7x1S1) { } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv7x7S12) { - TestHalfComplexConvNxNS12({32, 32}, {7, 7, 3, 64}, - {1, 1}); - TestHalfComplexConvNxNS12({32, 32}, {7, 7, 3, 63}, - {1, 1}); + TestHalfComplexConvNxNS12({32, 32}, {7, 7, 3, 64}, {1, 1}); + TestHalfComplexConvNxNS12({32, 32}, {7, 7, 3, 63}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv15x1S12) { - TestHalfComplexConvNxNS12({32, 32}, {15, 1, 256, 2}, - {1, 1}); - TestHalfComplexConvNxNS12({64, 64}, {15, 1, 64, 2}, - {1, 1}); + TestHalfComplexConvNxNS12({32, 32}, {15, 1, 256, 2}, {1, 1}); + TestHalfComplexConvNxNS12({64, 64}, {15, 1, 64, 2}, {1, 1}); TestHalfComplexConvNxNS12({256, 256}, {15, 1, 32, 2}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv1x15S12) { - TestHalfComplexConvNxNS12({32, 32}, {1, 15, 256, 2}, - {1, 1}); + TestHalfComplexConvNxNS12({32, 32}, {1, 15, 256, 2}, {1, 1}); TestHalfComplexConvNxNS12({256, 256}, {1, 15, 32, 2}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv1x1S12) { - TestHalfComplexConvNxNS12({107, 113}, {1, 1, 5, 7}, - {1, 1}); + TestHalfComplexConvNxNS12({107, 113}, {1, 1, 5, 7}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv3x3S12) { - TestHalfComplexConvNxNS12({107, 113}, {3, 3, 5, 7}, - {1, 1}); + TestHalfComplexConvNxNS12({107, 113}, {3, 3, 5, 7}, {1, 1}); } TEST_F(Conv2dOpTest, OPENCLHalfConv5x5Dilation2) { - TestHalfComplexConvNxNS12({64, 64}, {5, 5, 16, 16}, - {2, 2}); + TestHalfComplexConvNxNS12({64, 64}, {5, 5, 16, 16}, {2, 2}); } TEST_F(Conv2dOpTest, OPENCLHalfConv7x7Dilation2) { - TestHalfComplexConvNxNS12({64, 64}, {7, 7, 16, 16}, - {2, 2}); + TestHalfComplexConvNxNS12({64, 64}, {7, 7, 16, 16}, {2, 2}); } TEST_F(Conv2dOpTest, OPENCLHalfConv7x7Dilation4) { - TestHalfComplexConvNxNS12({63, 67}, {7, 7, 16, 16}, - {4, 4}); + TestHalfComplexConvNxNS12({63, 67}, {7, 7, 16, 16}, {4, 4}); } namespace { -template +template void TestDilationConvNxN(const std::vector &shape, const int dilation_rate) { testing::internal::LogToStderr(); @@ -868,33 +812,28 @@ void TestDilationConvNxN(const std::vector &shape, // Add input data net.AddRandomInput("Input", {batch, height, width, input_channels}); net.AddRandomInput( - "Filter", {output_channels, input_channels, kernel_h, kernel_w}); + "Filter", {output_channels, input_channels, kernel_h, kernel_w}); net.AddRandomInput("Bias", {output_channels}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); // Construct graph OpDefBuilder("Conv2D", "Conv2dTest") - .Input("InputNCHW") - .Input("Filter") - .Input("Bias") - .Output("OutputNCHW") - .AddIntsArg("strides", {stride_h, stride_w}) - .AddIntArg("padding", type) - .AddIntsArg("dilations", {dilation_rate, dilation_rate}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Filter") + .Input("Bias") + .Output("OutputNCHW") + .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntArg("padding", type) + .AddIntsArg("dilations", {dilation_rate, dilation_rate}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); // run on cpu net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); - + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); // Check Tensor expected; @@ -909,22 +848,22 @@ void TestDilationConvNxN(const std::vector &shape, kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") - .Input("InputImage") - .Input("FilterImage") - .Input("BiasImage") - .Output("OutputImage") - .AddIntsArg("strides", {stride_h, stride_w}) - .AddIntArg("padding", type) - .AddIntsArg("dilations", {dilation_rate, dilation_rate}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("FilterImage") + .Input("BiasImage") + .Output("OutputImage") + .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntArg("padding", type) + .AddIntsArg("dilations", {dilation_rate, dilation_rate}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); // Run on device net.RunOp(D); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); - ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), - 1e-4, 1e-4); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-4, + 1e-4); }; for (int kernel_size : {3}) { @@ -949,7 +888,7 @@ TEST_F(Conv2dOpTest, OPENCLUnalignedDilation4) { } namespace { -template +template void TestGeneralHalfAtrousConv(const std::vector &image_shape, const std::vector &filter_shape, const std::vector &dilations) { @@ -975,9 +914,7 @@ void TestGeneralHalfAtrousConv(const std::vector &image_shape, "Filter", {output_channels, input_channels, kernel_h, kernel_w}); net.AddRandomInput("Bias", {output_channels}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); // Construct graph OpDefBuilder("Conv2D", "Conv2dTest") @@ -993,10 +930,8 @@ void TestGeneralHalfAtrousConv(const std::vector &image_shape, // run on cpu net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); // Check Tensor expected; expected.Copy(*net.GetOutput("Output")); @@ -1024,8 +959,8 @@ void TestGeneralHalfAtrousConv(const std::vector &image_shape, ImageToBuffer(&net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); - ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), - 1e-2, 1e-1); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2, + 1e-1); }; func(1, 1, VALID); @@ -1034,17 +969,16 @@ void TestGeneralHalfAtrousConv(const std::vector &image_shape, } // namespace TEST_F(Conv2dOpTest, OPENCLHalf7X7AtrousConvD2) { - TestGeneralHalfAtrousConv({32, 32}, {7, 7, 16, 3}, - {2, 2}); + TestGeneralHalfAtrousConv({32, 32}, {7, 7, 16, 3}, {2, 2}); } TEST_F(Conv2dOpTest, OPENCLHalf15X15AtrousConvD4) { TestGeneralHalfAtrousConv({63, 71}, {15, 15, 16, 16}, - {2, 2}); + {2, 2}); } namespace { -template +template void TestArbitraryPadConvNxN(const std::vector &shape, const std::vector &paddings) { testing::internal::LogToStderr(); @@ -1063,31 +997,27 @@ void TestArbitraryPadConvNxN(const std::vector &shape, // Add input data net.AddRandomInput("Input", {batch, height, width, input_channels}); net.AddRandomInput( - "Filter", {output_channels, input_channels, kernel_h, kernel_w}); + "Filter", {output_channels, input_channels, kernel_h, kernel_w}); net.AddRandomInput("Bias", {output_channels}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); // Construct graph OpDefBuilder("Conv2D", "Conv2dTest") - .Input("InputNCHW") - .Input("Filter") - .Input("Bias") - .Output("OutputNCHW") - .AddIntsArg("strides", {stride_h, stride_w}) - .AddIntsArg("padding_values", paddings) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Filter") + .Input("Bias") + .Output("OutputNCHW") + .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntsArg("padding_values", paddings) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); // run on cpu net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); // Check Tensor expected; @@ -1102,21 +1032,21 @@ void TestArbitraryPadConvNxN(const std::vector &shape, kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") - .Input("InputImage") - .Input("FilterImage") - .Input("BiasImage") - .Output("OutputImage") - .AddIntsArg("strides", {stride_h, stride_w}) - .AddIntsArg("padding_values", paddings) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("FilterImage") + .Input("BiasImage") + .Output("OutputImage") + .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntsArg("padding_values", paddings) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); // Run on device net.RunOp(D); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); - ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), - 1e-4, 1e-4); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-4, + 1e-4); }; for (int kernel_size : {3, 5, 7}) { @@ -1132,8 +1062,7 @@ TEST_F(Conv2dOpTest, OPENCLAlignedPad1) { } TEST_F(Conv2dOpTest, OPENCLAlignedPad2) { - TestArbitraryPadConvNxN({128, 128, 16, 16}, - {2, 2}); + TestArbitraryPadConvNxN({128, 128, 16, 16}, {2, 2}); } TEST_F(Conv2dOpTest, OPENCLUnalignedPad4) { diff --git a/mace/ops/conv_pool_2d_base.h b/mace/ops/conv_pool_2d_base.h index c446782c..9c4860df 100644 --- a/mace/ops/conv_pool_2d_base.h +++ b/mace/ops/conv_pool_2d_base.h @@ -32,8 +32,7 @@ class ConvPool2dOpBase : public Operator { padding_type_(static_cast(OperatorBase::GetOptionalArg( "padding", static_cast(SAME)))), paddings_(OperatorBase::GetRepeatedArgs("padding_values")), - dilations_( - OperatorBase::GetRepeatedArgs("dilations", {1, 1})) {} + dilations_(OperatorBase::GetRepeatedArgs("dilations", {1, 1})) {} protected: std::vector strides_; diff --git a/mace/ops/core_test.cc b/mace/ops/core_test.cc index 1874a178..d471a0f8 100644 --- a/mace/ops/core_test.cc +++ b/mace/ops/core_test.cc @@ -31,9 +31,8 @@ TEST(CoreTest, INIT_MODE) { .AddIntArg("mode", static_cast(NetMode::INIT)) .Finalize(&op_defs[op_defs.size() - 1]); - Tensor *input = - ws.CreateTensor("Input", GetDeviceAllocator(DeviceType::GPU), - DataTypeToEnum::v()); + Tensor *input = ws.CreateTensor("Input", GetDeviceAllocator(DeviceType::GPU), + DataTypeToEnum::v()); input->Resize({1, 3, 3, 3}); { Tensor::MappingGuard input_mapper(input); diff --git a/mace/ops/deconv_2d_test.cc b/mace/ops/deconv_2d_test.cc index 9fe8432c..ad2400cc 100644 --- a/mace/ops/deconv_2d_test.cc +++ b/mace/ops/deconv_2d_test.cc @@ -25,7 +25,7 @@ namespace test { class Deconv2dOpTest : public OpsTestBase {}; namespace { -template +template void RunTestSimple(const std::vector &input_shape, const std::vector &input_data, const int stride, @@ -40,10 +40,7 @@ void RunTestSimple(const std::vector &input_shape, // Add input data net.AddInputFromArray("Input", input_shape, input_data); net.AddInputFromArray("Filter", filter_shape, filter_data); - net.TransformDataFormat("Filter", - HWOI, - "FilterOIHW", - OIHW); + net.TransformDataFormat("Filter", HWOI, "FilterOIHW", OIHW); if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", @@ -66,9 +63,7 @@ void RunTestSimple(const std::vector &input_shape, ImageToBuffer(&net, "OutputImage", "Output", kernels::BufferType::IN_OUT_CHANNEL); } else { - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("Deconv2D", "Deconv2dTest") .Input("InputNCHW") @@ -81,317 +76,165 @@ void RunTestSimple(const std::vector &input_shape, .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); } auto expected = CreateTensor(expected_shape, expected_data); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0.0001); } -template +template void TestNHWCSimple3x3SAME_S1() { - RunTestSimple({1, 3, 3, 1}, - {1, 1, 1, 1, 1, 1, 1, 1, 1}, - 1, - Padding::SAME, - {0, 0}, - {1, 3, 3, 3}, - {3, 3, 3, 1}, - {1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1}, - {1, 3, 3, 3}, - {4, 4, 4, 6, 6, 6, 4, 4, 4, - 6, 6, 6, 9, 9, 9, 6, 6, 6, - 4, 4, 4, 6, 6, 6, 4, 4, 4}); - RunTestSimple({1, 3, 3, 1}, - {1, 1, 1, 1, 1, 1, 1, 1, 1}, - 1, - Padding::VALID, - {2, 2}, - {0}, - {3, 3, 3, 1}, - {1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1}, - {1, 3, 3, 3}, - {4, 4, 4, 6, 6, 6, 4, 4, 4, - 6, 6, 6, 9, 9, 9, 6, 6, 6, - 4, 4, 4, 6, 6, 6, 4, 4, 4}); - RunTestSimple({1, 3, 3, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9}, - 1, - Padding::SAME, - {0, 0}, - {1, 3, 3, 3}, - {3, 3, 3, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27}, - {1, 3, 3, 3}, - {54, 66, 78, 126, 147, 168, 130, 146, 162, - 198, 225, 252, 405, 450, 495, 366, 399, 432, - 354, 378, 402, 630, 669, 708, 502, 530, 558}); - RunTestSimple({1, 3, 3, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9}, - 1, - Padding::SAME, - {2, 2}, - {0}, - {3, 3, 3, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27}, - {1, 3, 3, 3}, - {54, 66, 78, 126, 147, 168, 130, 146, 162, - 198, 225, 252, 405, 450, 495, 366, 399, 432, - 354, 378, 402, 630, 669, 708, 502, 530, 558}); + RunTestSimple({1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, 1, Padding::SAME, + {0, 0}, {1, 3, 3, 3}, {3, 3, 3, 1}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {1, 3, 3, 3}, {4, 4, 4, 6, 6, 6, 4, 4, 4, 6, 6, 6, 9, 9, + 9, 6, 6, 6, 4, 4, 4, 6, 6, 6, 4, 4, 4}); + RunTestSimple({1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, 1, Padding::VALID, + {2, 2}, {0}, {3, 3, 3, 1}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {1, 3, 3, 3}, {4, 4, 4, 6, 6, 6, 4, 4, 4, 6, 6, 6, 9, 9, + 9, 6, 6, 6, 4, 4, 4, 6, 6, 6, 4, 4, 4}); + RunTestSimple({1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, 1, Padding::SAME, + {0, 0}, {1, 3, 3, 3}, {3, 3, 3, 1}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + {1, 3, 3, 3}, {54, 66, 78, 126, 147, 168, 130, 146, 162, + 198, 225, 252, 405, 450, 495, 366, 399, 432, + 354, 378, 402, 630, 669, 708, 502, 530, 558}); + RunTestSimple( + {1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, 1, Padding::SAME, {2, 2}, {0}, + {3, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + {1, 3, 3, 3}, + {54, 66, 78, 126, 147, 168, 130, 146, 162, 198, 225, 252, 405, 450, + 495, 366, 399, 432, 354, 378, 402, 630, 669, 708, 502, 530, 558}); } -template +template void TestNHWCSimple3x3SAME_S2() { - RunTestSimple({1, 3, 3, 1}, - {1, 1, 1, 1, 1, 1, 1, 1, 1}, - 2, - Padding::SAME, - {0, 0}, - {1, 6, 6, 3}, - {3, 3, 3, 1}, - {1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1}, - {1, 6, 6, 3}, - {1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1}); - RunTestSimple({1, 3, 3, 1}, - {1, 1, 1, 1, 1, 1, 1, 1, 1}, - 2, - Padding::SAME, - {2, 2}, - {0}, - {3, 3, 3, 1}, - {1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1}, - {1, 5, 5, 3}, - {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, - 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2, - 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, - 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2, - 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1}); - RunTestSimple({1, 3, 3, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9}, - 2, - Padding::SAME, - {0, 0}, - {1, 6, 6, 3}, - {3, 3, 3, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27}, - {1, 6, 6, 3}, - {1, 2, 3, 4, 5, 6, 9, 12, 15, - 8, 10, 12, 17, 22, 27, 12, 15, 18, - 10, 11, 12, 13, 14, 15, 36, 39, 42, - 26, 28, 30, 62, 67, 72, 39, 42, 45, - 23, 28, 33, 38, 43, 48, 96, 108, 120, - 64, 71, 78, 148, 164, 180, 90, 99, 108, - 40, 44, 48, 52, 56, 60, 114, 123, 132, - 65, 70, 75, 140, 151, 162, 78, 84, 90, - 83, 94, 105, 116, 127, 138, 252, 276, 300, - 142, 155, 168, 304, 332, 360, 168, 183, 198, - 70, 77, 84, 91, 98, 105, 192, 207, 222, - 104, 112, 120, 218, 235, 252, 117, 126, 135}); - RunTestSimple({1, 3, 3, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9}, - 2, - Padding::SAME, - {2, 2}, - {0}, - {3, 3, 3, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27}, - {1, 5, 5, 3}, - {13, 14, 15, 36, 39, 42, - 26, 28, 30, 62, 67, 72, 39, 42, 45, - 38, 43, 48, 96, 108, 120, - 64, 71, 78, 148, 164, 180, 90, 99, 108, - 52, 56, 60, 114, 123, 132, - 65, 70, 75, 140, 151, 162, 78, 84, 90, - 116, 127, 138, 252, 276, 300, - 142, 155, 168, 304, 332, 360, 168, 183, 198, - 91, 98, 105, 192, 207, 222, - 104, 112, 120, 218, 235, 252, 117, 126, 135}); + RunTestSimple( + {1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, 2, Padding::SAME, {0, 0}, + {1, 6, 6, 3}, {3, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {1, 6, 6, 3}, + {1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 4, 4, + 4, 2, 2, 2, 4, 4, 4, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, + 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, + 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1}); + RunTestSimple( + {1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, 2, Padding::SAME, {2, 2}, {0}, + {3, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {1, 5, 5, 3}, {1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 4, + 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, + 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, + 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1}); + RunTestSimple( + {1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, 2, Padding::SAME, {0, 0}, + {1, 6, 6, 3}, {3, 3, 3, 1}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + {1, 6, 6, 3}, + {1, 2, 3, 4, 5, 6, 9, 12, 15, 8, 10, 12, 17, 22, + 27, 12, 15, 18, 10, 11, 12, 13, 14, 15, 36, 39, 42, 26, + 28, 30, 62, 67, 72, 39, 42, 45, 23, 28, 33, 38, 43, 48, + 96, 108, 120, 64, 71, 78, 148, 164, 180, 90, 99, 108, 40, 44, + 48, 52, 56, 60, 114, 123, 132, 65, 70, 75, 140, 151, 162, 78, + 84, 90, 83, 94, 105, 116, 127, 138, 252, 276, 300, 142, 155, 168, + 304, 332, 360, 168, 183, 198, 70, 77, 84, 91, 98, 105, 192, 207, + 222, 104, 112, 120, 218, 235, 252, 117, 126, 135}); + RunTestSimple( + {1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, 2, Padding::SAME, {2, 2}, {0}, + {3, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + {1, 5, 5, 3}, + {13, 14, 15, 36, 39, 42, 26, 28, 30, 62, 67, 72, 39, + 42, 45, 38, 43, 48, 96, 108, 120, 64, 71, 78, 148, 164, + 180, 90, 99, 108, 52, 56, 60, 114, 123, 132, 65, 70, 75, + 140, 151, 162, 78, 84, 90, 116, 127, 138, 252, 276, 300, 142, + 155, 168, 304, 332, 360, 168, 183, 198, 91, 98, 105, 192, 207, + 222, 104, 112, 120, 218, 235, 252, 117, 126, 135}); } -template +template void TestNHWCSimple3x3SAME_S2_1() { - RunTestSimple({1, 3, 3, 1}, - {12, 18, 12, 18, 27, 18, 12, 18, 12}, - 2, - Padding::SAME, - {0, 0}, - {1, 5, 5, 3}, - {3, 3, 3, 1}, - {1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1}, - {1, 5, 5, 3}, - {12, 12, 12, 30, 30, 30, 18, 18, 18, - 30, 30, 30, 12, 12, 12, - 30, 30, 30, 75, 75, 75, 45, 45, 45, - 75, 75, 75, 30, 30, 30, - 18, 18, 18, 45, 45, 45, 27, 27, 27, - 45, 45, 45, 18, 18, 18, - 30, 30, 30, 75, 75, 75, 45, 45, 45, - 75, 75, 75, 30, 30, 30, - 12, 12, 12, 30, 30, 30, 18, 18, 18, - 30, 30, 30, 12, 12, 12}); + RunTestSimple( + {1, 3, 3, 1}, {12, 18, 12, 18, 27, 18, 12, 18, 12}, 2, Padding::SAME, + {0, 0}, {1, 5, 5, 3}, {3, 3, 3, 1}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {1, 5, 5, 3}, + {12, 12, 12, 30, 30, 30, 18, 18, 18, 30, 30, 30, 12, 12, 12, + 30, 30, 30, 75, 75, 75, 45, 45, 45, 75, 75, 75, 30, 30, 30, + 18, 18, 18, 45, 45, 45, 27, 27, 27, 45, 45, 45, 18, 18, 18, + 30, 30, 30, 75, 75, 75, 45, 45, 45, 75, 75, 75, 30, 30, 30, + 12, 12, 12, 30, 30, 30, 18, 18, 18, 30, 30, 30, 12, 12, 12}); } -template +template void TestNHWCSimple3x3VALID_S2() { - RunTestSimple({1, 3, 3, 1}, - {1, 1, 1, 1, 1, 1, 1, 1, 1}, - 2, - Padding::VALID, - {0, 0}, - {1, 7, 7, 3}, - {3, 3, 3, 1}, - {1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1}, - {1, 7, 7, 3}, - {1, 1, 1, 1, 1, 1, 2, 2, 2, - 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 2, 2, 2, - 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, - 4, 4, 4, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, - 2, 2, 2, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, - 4, 4, 4, 2, 2, 2, 2, 2, 2, - 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, - 2, 2, 2, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, - 2, 2, 2, 1, 1, 1, 1, 1, 1}); + RunTestSimple( + {1, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1}, 2, Padding::VALID, {0, 0}, + {1, 7, 7, 3}, {3, 3, 3, 1}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {1, 7, 7, 3}, + {1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 4, 4, 4, 2, 2, 2, 4, 4, 4, 2, 2, 2, 2, 2, 2, + 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1}); } -template +template void TestNHWCSimple3x3VALID_S1() { - RunTestSimple({1, 3, 3, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9}, - 1, - Padding::VALID, - {0, 0}, - {1, 5, 5, 3}, - {3, 3, 3, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27}, - {1, 5, 5, 3}, - {1, 2, 3, - 6, 9, 12, - 18, 24, 30, - 26, 31, 36, - 21, 24, 27, - 14, 19, 24, - 54, 66, 78, - 126, 147, 168, - 130, 146, 162, - 90, 99, 108, - 66, 78, 90, - 198, 225, 252, - 405, 450, 495, - 366, 399, 432, - 234, 252, 270, - 146, 157, 168, - 354, 378, 402, - 630, 669, 708, - 502, 530, 558, - 294, 309, 324, - 133, 140, 147, - 306, 321, 336, - 522, 546, 570, - 398, 415, 432, - 225, 234, 243}); - RunTestSimple({1, 3, 3, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9}, - 1, - Padding::VALID, - {4, 4}, - {0}, - {3, 3, 3, 1}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, - 19, 20, 21, 22, 23, 24, 25, 26, 27}, - {1, 5, 5, 3}, - {1, 2, 3, - 6, 9, 12, - 18, 24, 30, - 26, 31, 36, - 21, 24, 27, - 14, 19, 24, - 54, 66, 78, - 126, 147, 168, - 130, 146, 162, - 90, 99, 108, - 66, 78, 90, - 198, 225, 252, - 405, 450, 495, - 366, 399, 432, - 234, 252, 270, - 146, 157, 168, - 354, 378, 402, - 630, 669, 708, - 502, 530, 558, - 294, 309, 324, - 133, 140, 147, - 306, 321, 336, - 522, 546, 570, - 398, 415, 432, - 225, 234, 243}); + RunTestSimple( + {1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, 1, Padding::VALID, {0, 0}, + {1, 5, 5, 3}, {3, 3, 3, 1}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + {1, 5, 5, 3}, + {1, 2, 3, 6, 9, 12, 18, 24, 30, 26, 31, 36, 21, + 24, 27, 14, 19, 24, 54, 66, 78, 126, 147, 168, 130, 146, + 162, 90, 99, 108, 66, 78, 90, 198, 225, 252, 405, 450, 495, + 366, 399, 432, 234, 252, 270, 146, 157, 168, 354, 378, 402, 630, + 669, 708, 502, 530, 558, 294, 309, 324, 133, 140, 147, 306, 321, + 336, 522, 546, 570, 398, 415, 432, 225, 234, 243}); + RunTestSimple( + {1, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, 1, Padding::VALID, {4, 4}, {0}, + {3, 3, 3, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27}, + {1, 5, 5, 3}, + {1, 2, 3, 6, 9, 12, 18, 24, 30, 26, 31, 36, 21, + 24, 27, 14, 19, 24, 54, 66, 78, 126, 147, 168, 130, 146, + 162, 90, 99, 108, 66, 78, 90, 198, 225, 252, 405, 450, 495, + 366, 399, 432, 234, 252, 270, 146, 157, 168, 354, 378, 402, 630, + 669, 708, 502, 530, 558, 294, 309, 324, 133, 140, 147, 306, 321, + 336, 522, 546, 570, 398, 415, 432, 225, 234, 243}); } -template +template void TestNHWCSimple2x2SAME() { - RunTestSimple({1, 2, 2, 1}, - {1, 1, 1, 1}, - 1, - Padding::SAME, - {0, 0}, - {1, 2, 2, 1}, - {3, 3, 1, 1}, + RunTestSimple({1, 2, 2, 1}, {1, 1, 1, 1}, 1, Padding::SAME, {0, 0}, + {1, 2, 2, 1}, {3, 3, 1, 1}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, - {1, 2, 2, 1}, - {4.f, 4.f, 4.f, 4.f}); + {1, 2, 2, 1}, {4.f, 4.f, 4.f, 4.f}); } -template +template void TestNHWCSimple2x2VALID() { - RunTestSimple({1, 2, 2, 1}, - {1, 1, 1, 1}, - 2, - Padding::VALID, - {0, 0}, - {1, 5, 5, 1}, - {3, 3, 1, 1}, - {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, - {1, 5, 5, 1}, - {1.f, 1.f, 2.f, 1.f, 1.f, - 1.f, 1.f, 2.f, 1.f, 1.f, - 2.f, 2.f, 4.f, 2.f, 2.f, - 1.f, 1.f, 2.f, 1.f, 1.f, - 1.f, 1.f, 2.f, 1.f, 1.f}); + RunTestSimple( + {1, 2, 2, 1}, {1, 1, 1, 1}, 2, Padding::VALID, {0, 0}, {1, 5, 5, 1}, + {3, 3, 1, 1}, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, + {1, 5, 5, 1}, + {1.f, 1.f, 2.f, 1.f, 1.f, 1.f, 1.f, 2.f, 1.f, 1.f, 2.f, 2.f, 4.f, + 2.f, 2.f, 1.f, 1.f, 2.f, 1.f, 1.f, 1.f, 1.f, 2.f, 1.f, 1.f}); } } // namespace @@ -400,11 +243,11 @@ TEST_F(Deconv2dOpTest, CPUSimple3X3PaddingSame_S1) { } TEST_F(Deconv2dOpTest, CPUSimple3X3PaddingSame_S2) { -TestNHWCSimple3x3SAME_S2(); + TestNHWCSimple3x3SAME_S2(); } TEST_F(Deconv2dOpTest, CPUSimple3X3PaddingSame_S2_1) { -TestNHWCSimple3x3SAME_S2_1(); + TestNHWCSimple3x3SAME_S2_1(); } TEST_F(Deconv2dOpTest, CPUSimple2X2PaddingSame) { @@ -432,11 +275,11 @@ TEST_F(Deconv2dOpTest, OPENCLSimple3X3PaddingSame_S1) { } TEST_F(Deconv2dOpTest, OPENCLSimple3X3PaddingSame_S2) { -TestNHWCSimple3x3SAME_S2(); + TestNHWCSimple3x3SAME_S2(); } TEST_F(Deconv2dOpTest, OPENCLSimple3X3PaddingSame_S2_1) { -TestNHWCSimple3x3SAME_S2_1(); + TestNHWCSimple3x3SAME_S2_1(); } TEST_F(Deconv2dOpTest, OPENCLSimple2X2PaddingValid) { @@ -452,7 +295,7 @@ TEST_F(Deconv2dOpTest, OPENCLSimple3X3PaddingValid_S2) { } namespace { -template +template void TestComplexDeconvNxNS12(const int batch, const std::vector &shape, const int stride) { @@ -473,14 +316,12 @@ void TestComplexDeconvNxNS12(const int batch, net.AddRandomInput( "Filter", {output_channels, input_channels, kernel_h, kernel_w}); net.AddRandomInput("Bias", {output_channels}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); int out_h = 0; int out_w = 0; - std::vectorpaddings; + std::vector paddings; std::vector output_shape; if (padding < 0) { @@ -496,8 +337,8 @@ void TestComplexDeconvNxNS12(const int batch, output_shape.push_back(out_w); output_shape.push_back(output_channels); } else { -// out_h = (height - 1) * stride + 1 + padding - kernel_h + 1; -// out_w = (width -1) * stride + 1 + padding - kernel_w + 1; + // out_h = (height - 1) * stride + 1 + padding - kernel_h + 1; + // out_w = (width -1) * stride + 1 + padding - kernel_w + 1; paddings.push_back(padding); paddings.push_back(padding); } @@ -514,14 +355,11 @@ void TestComplexDeconvNxNS12(const int batch, .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); - // run on cpu net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); // Check Tensor expected; @@ -551,8 +389,8 @@ void TestComplexDeconvNxNS12(const int batch, ImageToBuffer(&net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); - ExpectTensorNear(expected, - *net.GetOutput("OPENCLOutput"), 1e-4, 1e-4); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-4, + 1e-4); }; for (int kernel_size : {1, 3, 5, 7}) { @@ -575,8 +413,8 @@ TEST_F(Deconv2dOpTest, OPENCLAlignedDeconvNxNS34) { } TEST_F(Deconv2dOpTest, OPENCLUnalignedDeconvNxNS12) { -TestComplexDeconvNxNS12(1, {17, 113, 5, 7}, 1); -TestComplexDeconvNxNS12(1, {17, 113, 5, 7}, 2); + TestComplexDeconvNxNS12(1, {17, 113, 5, 7}, 1); + TestComplexDeconvNxNS12(1, {17, 113, 5, 7}, 2); } TEST_F(Deconv2dOpTest, OPENCLUnalignedDeconvNxNS34) { diff --git a/mace/ops/depth_to_space_test.cc b/mace/ops/depth_to_space_test.cc index 692d2d62..e61590ff 100644 --- a/mace/ops/depth_to_space_test.cc +++ b/mace/ops/depth_to_space_test.cc @@ -36,9 +36,7 @@ void RunDepthToSpace(const bool d2s, const char *ops_test_name = (d2s) ? "DepthToSpaceTest" : "SpaceToDepthTest"; // Construct graph if (D == DeviceType::CPU) { - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder(ops_name, ops_test_name) .Input("InputNCHW") @@ -47,10 +45,8 @@ void RunDepthToSpace(const bool d2s, .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); } else { BufferToImage(&net, "Input", "InputImage", @@ -64,118 +60,98 @@ void RunDepthToSpace(const bool d2s, net.RunOp(D); } - if (D == DeviceType::GPU) { ImageToBuffer(&net, "OutputImage", "Output", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); } auto expected = CreateTensor(expected_shape, expected_data); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } } // namespace - class SpaceToDepthOpTest : public OpsTestBase {}; TEST_F(SpaceToDepthOpTest, Input2x4x4_B2_CPU) { - RunDepthToSpace(false, {1, 2, 4, 4}, + RunDepthToSpace( + false, {1, 2, 4, 4}, {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}, - 2, - {1, 1, 2, 16}, + 2, {1, 1, 2, 16}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}); + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}); } TEST_F(SpaceToDepthOpTest, Input2x4x4_B2_OPENCL) { - RunDepthToSpace(false, {1, 2, 4, 4}, - {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, + RunDepthToSpace( + false, {1, 2, 4, 4}, + {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}, - 2, - {1, 1, 2, 16}, + 2, {1, 1, 2, 16}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}); + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}); } - - TEST_F(SpaceToDepthOpTest, Input2x2x4_B2_CPU) { - RunDepthToSpace(false, {1, 2, 2, 4}, - {1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16}, - 2, - {1, 1, 1, 16}, - {1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16}); + RunDepthToSpace( + false, {1, 2, 2, 4}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, 2, {1, 1, 1, 16}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); } TEST_F(SpaceToDepthOpTest, Input4x4x1_B2_OPENCL) { - RunDepthToSpace(false, {1, 2, 2, 4}, - {1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16}, - 2, - {1, 1, 1, 16}, - {1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16}); + RunDepthToSpace( + false, {1, 2, 2, 4}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, 2, {1, 1, 1, 16}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); } class DepthToSpaceOpTest : public OpsTestBase {}; TEST_F(DepthToSpaceOpTest, Input1x2x16_B2_CPU) { - RunDepthToSpace(true, {1, 1, 2, 16}, + RunDepthToSpace( + true, {1, 1, 2, 16}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, - 2, - {1, 2, 4, 4}, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, + 2, {1, 2, 4, 4}, {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, - 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}); + 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}); } TEST_F(DepthToSpaceOpTest, Input1x2x16_B2_OPENCL) { - RunDepthToSpace(true, {1, 1, 2, 16}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, - 2, - {1, 2, 4, 4}, - {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, - 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}); + RunDepthToSpace( + true, {1, 1, 2, 16}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, + 2, {1, 2, 4, 4}, + {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, + 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}); } TEST_F(DepthToSpaceOpTest, Input1x1x16_B2_CPU) { - RunDepthToSpace(true, {1, 1, 1, 16}, - {1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16}, - 2, - {1, 2, 2, 4}, - {1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16}); + RunDepthToSpace( + true, {1, 1, 1, 16}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, 2, {1, 2, 2, 4}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); } TEST_F(DepthToSpaceOpTest, Input1x1x16_B2_OPENCL) { - RunDepthToSpace(true, {1, 1, 1, 16}, - {1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16}, - 2, - {1, 2, 2, 4}, - {1, 2, 3, 4, 5, 6, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16}); + RunDepthToSpace( + true, {1, 1, 1, 16}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, 2, {1, 2, 2, 4}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); } - TEST_F(DepthToSpaceOpTest, InputLarger_B2_OPENCL) { - const std::vector in = std::vector(192 * 192 *128, 1.0); + const std::vector in = std::vector(192 * 192 * 128, 1.0); - RunDepthToSpace(true, {1, 192, 192, 128}, - in, - 2, - {1, 384, 384, 32}, - in); + RunDepthToSpace(true, {1, 192, 192, 128}, in, 2, + {1, 384, 384, 32}, in); } - namespace { template -void RandomTest(const bool d2s, const int block_size, +void RandomTest(const bool d2s, + const int block_size, const std::vector &shape) { testing::internal::LogToStderr(); srand(time(NULL)); @@ -188,9 +164,7 @@ void RandomTest(const bool d2s, const int block_size, // Add input data net.AddRandomInput("Input", shape); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder(ops_name, ops_test_name) .Input("InputNCHW") @@ -201,12 +175,9 @@ void RandomTest(const bool d2s, const int block_size, // Run net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); - BufferToImage(&net, "Input", "InputImg", kernels::BufferType::IN_OUT_CHANNEL); @@ -238,15 +209,15 @@ TEST_F(DepthToSpaceOpTest, OPENCLRandomFloat) { } TEST_F(DepthToSpaceOpTest, OPENCLRandomHalf) { -RandomTest(true, 2, {1, 192, 192, 128}); + RandomTest(true, 2, {1, 192, 192, 128}); } TEST_F(SpaceToDepthOpTest, OPENCLRandomFloat) { -RandomTest(false, 2, {1, 384, 384, 32}); + RandomTest(false, 2, {1, 384, 384, 32}); } TEST_F(SpaceToDepthOpTest, OPENCLRandomHalf) { -RandomTest(false, 2, {1, 384, 384, 32}); + RandomTest(false, 2, {1, 384, 384, 32}); } } // namespace test diff --git a/mace/ops/depthwise_conv2d_test.cc b/mace/ops/depthwise_conv2d_test.cc index d4f069c4..254fb4c2 100644 --- a/mace/ops/depthwise_conv2d_test.cc +++ b/mace/ops/depthwise_conv2d_test.cc @@ -22,7 +22,7 @@ namespace test { class DepthwiseConv2dOpTest : public OpsTestBase {}; namespace { -template +template void SimpleValidTest() { testing::internal::LogToStderr(); // Construct graph @@ -30,31 +30,27 @@ void SimpleValidTest() { // Add input data net.AddInputFromArray( - "Input", {1, 3, 3, 2}, - {1, 2, 2, 4, 3, 6, 4, 8, 5, 10, 6, 12, 7, 14, 8, 16, 9, 18}); + "Input", {1, 3, 3, 2}, + {1, 2, 2, 4, 3, 6, 4, 8, 5, 10, 6, 12, 7, 14, 8, 16, 9, 18}); net.AddInputFromArray( - "Filter", {1, 2, 2, 2}, {1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 4.0f, 6.0f, 8.0f}); + "Filter", {1, 2, 2, 2}, {1.0f, 2.0f, 3.0f, 4.0f, 2.0f, 4.0f, 6.0f, 8.0f}); net.AddInputFromArray("Bias", {2}, {.1f, .2f}); if (D == DeviceType::CPU) { - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest") - .Input("InputNCHW") - .Input("Filter") - .Input("Bias") - .Output("OutputNCHW") - .AddIntsArg("strides", {1, 1}) - .AddIntArg("padding", Padding::VALID) - .AddIntsArg("dilations", {1, 1}) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Filter") + .Input("Bias") + .Output("OutputNCHW") + .AddIntsArg("strides", {1, 1}) + .AddIntArg("padding", Padding::VALID) + .AddIntsArg("dilations", {1, 1}) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); @@ -63,15 +59,15 @@ void SimpleValidTest() { BufferToImage(&net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest") - .Input("InputImage") - .Input("FilterImage") - .Input("BiasImage") - .Output("OutputImage") - .AddIntsArg("strides", {1, 1}) - .AddIntArg("padding", Padding::VALID) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("FilterImage") + .Input("BiasImage") + .Output("OutputImage") + .AddIntsArg("strides", {1, 1}) + .AddIntArg("padding", Padding::VALID) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); net.RunOp(D); @@ -85,8 +81,8 @@ void SimpleValidTest() { // Check auto expected = CreateTensor( - {1, 2, 2, 2}, {37.1f, 148.2f, 47.1f, 188.2f, - 67.1f, 268.2f, 77.1f, 308.2f}); + {1, 2, 2, 2}, + {37.1f, 148.2f, 47.1f, 188.2f, 67.1f, 268.2f, 77.1f, 308.2f}); if (DataTypeToEnum::value == DT_HALF) { ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-3, 1e-3); @@ -109,9 +105,13 @@ TEST_F(DepthwiseConv2dOpTest, SimpleOpenCLHalf) { } namespace { -template -void ComplexValidTest(index_t batch, index_t channel, index_t height, - index_t width, index_t kernel, index_t multiplier, +template +void ComplexValidTest(index_t batch, + index_t channel, + index_t height, + index_t width, + index_t kernel, + index_t multiplier, int stride) { testing::internal::LogToStderr(); // Construct graph @@ -125,35 +125,29 @@ void ComplexValidTest(index_t batch, index_t channel, index_t height, std::vector filter_data(kernel * kernel * channel * multiplier); GenerateRandomRealTypeData({multiplier, channel, kernel, kernel}, &filter_data); - net.AddInputFromArray("Filter", - {multiplier, channel, kernel, kernel}, - filter_data); + net.AddInputFromArray( + "Filter", {multiplier, channel, kernel, kernel}, filter_data); std::vector bias_data(channel * multiplier); GenerateRandomRealTypeData({channel * multiplier}, &bias_data); - net.AddInputFromArray("Bias", {channel * multiplier}, - bias_data); + net.AddInputFromArray("Bias", {channel * multiplier}, bias_data); if (D == DeviceType::CPU) { - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest") - .Input("InputNCHW") - .Input("Filter") - .Input("Bias") - .Output("OutputNCHW") - .AddIntsArg("strides", {stride, stride}) - .AddIntArg("padding", Padding::SAME) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Filter") + .Input("Bias") + .Output("OutputNCHW") + .AddIntsArg("strides", {stride, stride}) + .AddIntArg("padding", Padding::SAME) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); @@ -162,15 +156,15 @@ void ComplexValidTest(index_t batch, index_t channel, index_t height, BufferToImage(&net, "Bias", "BiasImage", kernels::BufferType::ARGUMENT); OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest") - .Input("InputImage") - .Input("FilterImage") - .Input("BiasImage") - .Output("OutputImage") - .AddIntsArg("strides", {stride, stride}) - .AddIntArg("padding", Padding::SAME) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("FilterImage") + .Input("BiasImage") + .Output("OutputImage") + .AddIntsArg("strides", {stride, stride}) + .AddIntArg("padding", Padding::SAME) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); net.RunOp(D); @@ -217,8 +211,8 @@ void ComplexValidTest(index_t batch, index_t channel, index_t height, } } - auto expected = CreateTensor( - {1, out_height, out_width, out_channels}, expect); + auto expected = + CreateTensor({1, out_height, out_width, out_channels}, expect); if (DataTypeToEnum::value == DT_FLOAT) { ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); @@ -249,7 +243,7 @@ TEST_F(DepthwiseConv2dOpTest, ComplexOpenCLHalf) { } namespace { -template +template void TestNxNS12(const index_t height, const index_t width) { testing::internal::LogToStderr(); auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, @@ -263,74 +257,66 @@ void TestNxNS12(const index_t height, const index_t width) { OpsTestNet net; // Add input data - net.AddRandomInput("Input", - {batch, height, width, - input_channels}); net.AddRandomInput( - "Filter", {multiplier, input_channels, kernel_h, kernel_w}); + "Input", {batch, height, width, input_channels}); + net.AddRandomInput( + "Filter", {multiplier, input_channels, kernel_h, kernel_w}); net.AddRandomInput("Bias", - {multiplier - * input_channels}); + {multiplier * input_channels}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest") - .Input("InputNCHW") - .Input("Filter") - .Input("Bias") - .Output("OutputNCHW") - .AddIntsArg("strides", {stride_h, stride_w}) - .AddIntArg("padding", type) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Filter") + .Input("Bias") + .Output("OutputNCHW") + .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntArg("padding", type) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); // Run on cpu net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); // Check Tensor expected; expected.Copy(*net.GetOutput("Output")); BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Filter", "FilterImage", - kernels::BufferType::DW_CONV2D_FILTER); + kernels::BufferType::DW_CONV2D_FILTER); BufferToImage(&net, "Bias", "BiasImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); OpDefBuilder("DepthwiseConv2d", "DepthwiseConv2DTest") - .Input("InputImage") - .Input("FilterImage") - .Input("BiasImage") - .Output("OutputImage") - .AddIntsArg("strides", {stride_h, stride_w}) - .AddIntArg("padding", type) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("FilterImage") + .Input("BiasImage") + .Output("OutputImage") + .AddIntsArg("strides", {stride_h, stride_w}) + .AddIntArg("padding", type) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); net.RunOp(DeviceType::GPU); // Transfer output - ImageToBuffer(&net, - "OutputImage", - "DeviceOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ImageToBuffer(&net, "OutputImage", "DeviceOutput", + kernels::BufferType::IN_OUT_CHANNEL); // Check if (DataTypeToEnum::value == DT_FLOAT) { - ExpectTensorNear(expected, *net.GetOutput("DeviceOutput"), - 1e-5, 1e-4); + ExpectTensorNear(expected, *net.GetOutput("DeviceOutput"), 1e-5, + 1e-4); } else { - ExpectTensorNear(expected, *net.GetOutput("DeviceOutput"), - 1e-2, 1e-2); + ExpectTensorNear(expected, *net.GetOutput("DeviceOutput"), 1e-2, + 1e-2); } }; @@ -343,9 +329,7 @@ void TestNxNS12(const index_t height, const index_t width) { } } // namespace -TEST_F(DepthwiseConv2dOpTest, OpenCLSimpleNxNS12) { - TestNxNS12(4, 4); -} +TEST_F(DepthwiseConv2dOpTest, OpenCLSimpleNxNS12) { TestNxNS12(4, 4); } TEST_F(DepthwiseConv2dOpTest, OpenCLSimpleNxNS12Half) { TestNxNS12(4, 4); diff --git a/mace/ops/eltwise.h b/mace/ops/eltwise.h index efa87dd9..66c505fa 100644 --- a/mace/ops/eltwise.h +++ b/mace/ops/eltwise.h @@ -26,15 +26,15 @@ class EltwiseOp : public Operator { public: EltwiseOp(const OperatorDef &op_def, Workspace *ws) : Operator(op_def, ws), - functor_(static_cast( - OperatorBase::GetOptionalArg( - "type", static_cast(kernels::EltwiseType::NONE))), - OperatorBase::GetRepeatedArgs("coeff"), - OperatorBase::GetOptionalArg("x", 1.0)) {} + functor_( + static_cast(OperatorBase::GetOptionalArg( + "type", static_cast(kernels::EltwiseType::NONE))), + OperatorBase::GetRepeatedArgs("coeff"), + OperatorBase::GetOptionalArg("x", 1.0)) {} MaceStatus Run(StatsFuture *future) override { - const Tensor* input0 = this->Input(0); - const Tensor* input1 = this->InputSize() == 2 ? this->Input(1) : nullptr; + const Tensor *input0 = this->Input(0); + const Tensor *input1 = this->InputSize() == 2 ? this->Input(1) : nullptr; Tensor *output = this->Output(OUTPUT); return functor_(input0, input1, output, future); } diff --git a/mace/ops/eltwise_test.cc b/mace/ops/eltwise_test.cc index f2d88712..e3cbb2de 100644 --- a/mace/ops/eltwise_test.cc +++ b/mace/ops/eltwise_test.cc @@ -36,10 +36,7 @@ void SimpleTensorScalar(const kernels::EltwiseType type, net.AddInputFromArray("Input", shape, input); if (D == DeviceType::CPU) { - net.TransformDataFormat("Input", - NHWC, - "TInput", - NCHW); + net.TransformDataFormat("Input", NHWC, "TInput", NCHW); OpDefBuilder("Eltwise", "EltwiseTest") .Input("TInput") .AddIntArg("type", static_cast(type)) @@ -48,13 +45,10 @@ void SimpleTensorScalar(const kernels::EltwiseType type, .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); - net.TransformDataFormat("TOutput", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("TOutput", NCHW, "Output", NHWC); } else { BufferToImage(&net, "Input", "InputImg", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Eltwise", "EltwiseTest") .Input("InputImg") .AddIntArg("type", static_cast(type)) @@ -90,10 +84,8 @@ void SimpleTensorEltwise(const kernels::EltwiseType type, net.AddInputFromArray("Input1", shape1, input1); if (D == DeviceType::CPU) { - net.TransformDataFormat("Input0", NHWC, - "TInput0", NCHW); - net.TransformDataFormat("Input1", NHWC, - "TInput1", NCHW); + net.TransformDataFormat("Input0", NHWC, "TInput0", NCHW); + net.TransformDataFormat("Input1", NHWC, "TInput1", NCHW); OpDefBuilder("Eltwise", "EltwiseTest") .Input("TInput0") .Input("TInput1") @@ -104,13 +96,12 @@ void SimpleTensorEltwise(const kernels::EltwiseType type, // Run net.RunOp(D); - net.TransformDataFormat("TOutput", NCHW, - "Output", NHWC); + net.TransformDataFormat("TOutput", NCHW, "Output", NHWC); } else { BufferToImage(&net, "Input0", "InputImg0", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Input1", "InputImg1", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Eltwise", "EltwiseTest") .Input("InputImg0") .Input("InputImg1") @@ -138,278 +129,181 @@ void SimpleTensorEltwise(const kernels::EltwiseType type, TEST_F(EltwiseOpTest, CPUSimpleTensorScalar) { SimpleTensorScalar(kernels::EltwiseType::SUM, - {1, 1, 1, 1}, {1}, 1, - {2}); + {1, 1, 1, 1}, {1}, 1, {2}); SimpleTensorScalar(kernels::EltwiseType::SUB, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - 1, - {0, 1, 2, 3, 4, 5}); + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + 1, {0, 1, 2, 3, 4, 5}); SimpleTensorScalar(kernels::EltwiseType::PROD, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - 2, - {2, 4, 6, 8, 10, 12}); + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + 2, {2, 4, 6, 8, 10, 12}); SimpleTensorScalar(kernels::EltwiseType::DIV, - {1, 1, 2, 3}, - {2, 4, 6, 8, 10, 12}, - 2, - {1, 2, 3, 4, 5, 6}); + {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, + 2, {1, 2, 3, 4, 5, 6}); SimpleTensorScalar(kernels::EltwiseType::MIN, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - 1, - {1, 1, 1, 1, 1, 1}); + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + 1, {1, 1, 1, 1, 1, 1}); SimpleTensorScalar(kernels::EltwiseType::MAX, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - 3, - {3, 3, 3, 4, 5, 6}); + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + 3, {3, 3, 3, 4, 5, 6}); SimpleTensorScalar(kernels::EltwiseType::NEG, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - 3, - {-1, -2, -3, -4, -5, -6}); - SimpleTensorScalar(kernels::EltwiseType::ABS, - {1, 1, 2, 3}, - {-1, -2, -3, -4, -5, -6}, - 3, - {1, 2, 3, 4, 5, 6}); + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + 3, {-1, -2, -3, -4, -5, -6}); + SimpleTensorScalar( + kernels::EltwiseType::ABS, {1, 1, 2, 3}, {-1, -2, -3, -4, -5, -6}, 3, + {1, 2, 3, 4, 5, 6}); SimpleTensorScalar(kernels::EltwiseType::SQR_DIFF, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - 1, - {0, 1, 4, 9, 16, 25}); + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + 1, {0, 1, 4, 9, 16, 25}); } TEST_F(EltwiseOpTest, GPUSimpleTensorScalar) { SimpleTensorScalar(kernels::EltwiseType::SUM, - {1, 1, 1, 1}, {1}, 1, - {2}); + {1, 1, 1, 1}, {1}, 1, {2}); SimpleTensorScalar(kernels::EltwiseType::SUB, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - 1, - {0, 1, 2, 3, 4, 5}); + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + 1, {0, 1, 2, 3, 4, 5}); SimpleTensorScalar(kernels::EltwiseType::PROD, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - 2, - {2, 4, 6, 8, 10, 12}); + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + 2, {2, 4, 6, 8, 10, 12}); SimpleTensorScalar(kernels::EltwiseType::DIV, - {1, 1, 2, 3}, - {2, 4, 6, 8, 10, 12}, - 2, - {1, 2, 3, 4, 5, 6}); + {1, 1, 2, 3}, {2, 4, 6, 8, 10, 12}, + 2, {1, 2, 3, 4, 5, 6}); SimpleTensorScalar(kernels::EltwiseType::MIN, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - 1, - {1, 1, 1, 1, 1, 1}); + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + 1, {1, 1, 1, 1, 1, 1}); SimpleTensorScalar(kernels::EltwiseType::MAX, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - 3, - {3, 3, 3, 4, 5, 6}); + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + 3, {3, 3, 3, 4, 5, 6}); SimpleTensorScalar(kernels::EltwiseType::NEG, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - 3, - {-1, -2, -3, -4, -5, -6}); - SimpleTensorScalar(kernels::EltwiseType::ABS, - {1, 1, 2, 3}, - {-1, -2, -3, -4, -5, -6}, - 3, - {1, 2, 3, 4, 5, 6}); + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + 3, {-1, -2, -3, -4, -5, -6}); + SimpleTensorScalar( + kernels::EltwiseType::ABS, {1, 1, 2, 3}, {-1, -2, -3, -4, -5, -6}, 3, + {1, 2, 3, 4, 5, 6}); SimpleTensorScalar(kernels::EltwiseType::SQR_DIFF, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, - 1, - {0, 1, 4, 9, 16, 25}); + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + 1, {0, 1, 4, 9, 16, 25}); } TEST_F(EltwiseOpTest, CPUSimpleTensorVector) { - SimpleTensorEltwise(kernels::EltwiseType::SUM, - {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, - {1, 1, 1, 3}, {1, 2, 3}, - {2, 4, 6, 5, 7, 9}); - SimpleTensorEltwise(kernels::EltwiseType::SUB, - {1, 2, 1, 5}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {1, 1, 1, 5}, {1, 2, 3, 4, 5}, - {0, 0, 0, 0, 0, 5, 5, 5, 5, 5}); - SimpleTensorEltwise(kernels::EltwiseType::SUB, - {1, 1, 1, 5}, {1, 2, 3, 4, 5}, - {1, 2, 1, 5}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {0, 0, 0, 0, 0, - -5, -5, -5, -5, -5}); - SimpleTensorEltwise(kernels::EltwiseType::PROD, - {1, 1, 1, 3}, {1, 2, 3}, - {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, - {1, 4, 9, 4, 10, 18}); - SimpleTensorEltwise(kernels::EltwiseType::DIV, - {1, 2, 1, 5}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {1, 1, 1, 5}, {1, 1, 1, 1, 5}, - {1, 2, 3, 4, 1, 6, 7, 8, 9, 2}); - SimpleTensorEltwise(kernels::EltwiseType::DIV, - {1, 1, 1, 5}, {1, 1, 1, 2, 4}, - {1, 2, 1, 5}, - {1, 1, 1, 2, 2, 1, 1, 1, 1, 1}, - {1, 1, 1, 1, 2, 1, 1, 1, 2, 4}); - SimpleTensorEltwise(kernels::EltwiseType::MIN, - {1, 1, 1, 5}, {1, 2, 3, 4, 5}, - {1, 2, 1, 5}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); - SimpleTensorEltwise(kernels::EltwiseType::MAX, - {1, 2, 1, 5}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {1, 1, 1, 5}, {1, 2, 3, 4, 5}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); - SimpleTensorEltwise(kernels::EltwiseType::SQR_DIFF, - {1, 1, 1, 5}, {1, 2, 3, 4, 5}, - {1, 2, 1, 5}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {0, 0, 0, 0, 0, 25, 25, 25, 25, - 25}); + SimpleTensorEltwise( + kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 1, 3}, + {1, 2, 3}, {2, 4, 6, 5, 7, 9}); + SimpleTensorEltwise( + kernels::EltwiseType::SUB, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {0, 0, 0, 0, 0, 5, 5, 5, 5, 5}); + SimpleTensorEltwise( + kernels::EltwiseType::SUB, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 0, 0, 0, 0, -5, -5, -5, -5, -5}); + SimpleTensorEltwise( + kernels::EltwiseType::PROD, {1, 1, 1, 3}, {1, 2, 3}, {1, 2, 1, 3}, + {1, 2, 3, 4, 5, 6}, {1, 4, 9, 4, 10, 18}); + SimpleTensorEltwise( + kernels::EltwiseType::DIV, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 1, 1, 5}, {1, 1, 1, 1, 5}, {1, 2, 3, 4, 1, 6, 7, 8, 9, 2}); + SimpleTensorEltwise( + kernels::EltwiseType::DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5}, + {1, 1, 1, 2, 2, 1, 1, 1, 1, 1}, {1, 1, 1, 1, 2, 1, 1, 1, 2, 4}); + SimpleTensorEltwise( + kernels::EltwiseType::MIN, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); + SimpleTensorEltwise( + kernels::EltwiseType::MAX, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + SimpleTensorEltwise( + kernels::EltwiseType::SQR_DIFF, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, + {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {0, 0, 0, 0, 0, 25, 25, 25, 25, 25}); } TEST_F(EltwiseOpTest, GPUSimpleTensorVector) { SimpleTensorEltwise( - kernels::EltwiseType::SUM, - {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, - {1, 1, 1, 3}, {1, 2, 3}, - {2, 4, 6, 5, 7, 9}); + kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 1, 3}, + {1, 2, 3}, {2, 4, 6, 5, 7, 9}); SimpleTensorEltwise( - kernels::EltwiseType::SUB, - {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {1, 1, 1, 5}, {1, 2, 3, 4, 5}, - {0, 0, 0, 0, 0, 5, 5, 5, 5, 5}); + kernels::EltwiseType::SUB, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {0, 0, 0, 0, 0, 5, 5, 5, 5, 5}); SimpleTensorEltwise( - kernels::EltwiseType::SUB, - {1, 1, 1, 5}, {1, 2, 3, 4, 5}, - {1, 2, 1, 5}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {0, 0, 0, 0, 0, -5, -5, -5, -5, -5}); + kernels::EltwiseType::SUB, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 0, 0, 0, 0, -5, -5, -5, -5, -5}); SimpleTensorEltwise( - kernels::EltwiseType::PROD, - {1, 1, 1, 3}, {1, 2, 3}, - {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, - {1, 4, 9, 4, 10, 18}); + kernels::EltwiseType::PROD, {1, 1, 1, 3}, {1, 2, 3}, {1, 2, 1, 3}, + {1, 2, 3, 4, 5, 6}, {1, 4, 9, 4, 10, 18}); SimpleTensorEltwise( - kernels::EltwiseType::DIV, - {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {1, 1, 1, 5}, {1, 1, 1, 1, 5}, - {1, 2, 3, 4, 1, 6, 7, 8, 9, 2}); + kernels::EltwiseType::DIV, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 1, 1, 5}, {1, 1, 1, 1, 5}, {1, 2, 3, 4, 1, 6, 7, 8, 9, 2}); SimpleTensorEltwise( - kernels::EltwiseType::DIV, - {1, 1, 1, 5}, {1, 1, 1, 2, 4}, - {1, 2, 1, 5}, - {1, 1, 1, 2, 2, 1, 1, 1, 1, 1}, - {1, 1, 1, 1, 2, 1, 1, 1, 2, 4}); + kernels::EltwiseType::DIV, {1, 1, 1, 5}, {1, 1, 1, 2, 4}, {1, 2, 1, 5}, + {1, 1, 1, 2, 2, 1, 1, 1, 1, 1}, {1, 1, 1, 1, 2, 1, 1, 1, 2, 4}); SimpleTensorEltwise( - kernels::EltwiseType::MIN, - {1, 1, 1, 5}, {1, 2, 3, 4, 5}, - {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); + kernels::EltwiseType::MIN, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); SimpleTensorEltwise( - kernels::EltwiseType::MAX, - {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {1, 1, 1, 5}, {1, 2, 3, 4, 5}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + kernels::EltwiseType::MAX, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); SimpleTensorEltwise( - kernels::EltwiseType::SQR_DIFF, - {1, 1, 1, 5}, {1, 2, 3, 4, 5}, + kernels::EltwiseType::SQR_DIFF, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 0, 0, 0, 0, 25, 25, 25, 25, 25}); } TEST_F(EltwiseOpTest, CPUSimpleTensorTensor) { - SimpleTensorEltwise(kernels::EltwiseType::SUM, - {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, - {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, - {2, 4, 6, 8, 10, 12}); - SimpleTensorEltwise(kernels::EltwiseType::SUM, - {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, - {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, - {0.2, 0.4, 0.6, 0.8, 1, 1.2}, - {0.1, 0.1}); - SimpleTensorEltwise(kernels::EltwiseType::SUB, - {1, 1, 1, 5}, {1, 2, 3, 4, 5}, - {1, 1, 1, 5}, {1, 2, 3, 4, 5}, - {0, 0, 0, 0, 0}); - SimpleTensorEltwise(kernels::EltwiseType::PROD, - {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, - {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, - {1, 4, 9, 16, 25, 36}); - SimpleTensorEltwise(kernels::EltwiseType::DIV, - {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, - {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, - {1, 1, 1, 1, 1, 1}); - SimpleTensorEltwise(kernels::EltwiseType::MIN, - {1, 2, 1, 5}, - {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, - {1, 2, 1, 5}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); - SimpleTensorEltwise(kernels::EltwiseType::MAX, - {1, 2, 1, 5}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {1, 2, 1, 5}, - {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); - SimpleTensorEltwise(kernels::EltwiseType::SQR_DIFF, - {1, 2, 1, 5}, - {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, - {1, 2, 1, 5}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {0, 0, 0, 0, 0, 25, 25, 25, 25, - 25}); + SimpleTensorEltwise( + kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}); + SimpleTensorEltwise( + kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, {0.2, 0.4, 0.6, 0.8, 1, 1.2}, {0.1, 0.1}); + SimpleTensorEltwise( + kernels::EltwiseType::SUB, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 1, 1, 5}, + {1, 2, 3, 4, 5}, {0, 0, 0, 0, 0}); + SimpleTensorEltwise( + kernels::EltwiseType::PROD, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, + {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 4, 9, 16, 25, 36}); + SimpleTensorEltwise( + kernels::EltwiseType::DIV, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 2, 1, 3}, + {1, 2, 3, 4, 5, 6}, {1, 1, 1, 1, 1, 1}); + SimpleTensorEltwise( + kernels::EltwiseType::MIN, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, + {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); + SimpleTensorEltwise( + kernels::EltwiseType::MAX, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + SimpleTensorEltwise( + kernels::EltwiseType::SQR_DIFF, {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 0, 0, 0, 0, 25, 25, 25, 25, 25}); } TEST_F(EltwiseOpTest, GPUSimpleTensorTensor) { SimpleTensorEltwise( - kernels::EltwiseType::SUM, - {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, - {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, - {2, 4, 6, 8, 10, 12}); + kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, {2, 4, 6, 8, 10, 12}); SimpleTensorEltwise( - kernels::EltwiseType::SUM, - {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, - {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, - {0.2, 0.4, 0.6, 0.8, 1, 1.2}, - {0.1, 0.1}); + kernels::EltwiseType::SUM, {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, {0.2, 0.4, 0.6, 0.8, 1, 1.2}, {0.1, 0.1}); SimpleTensorEltwise( - kernels::EltwiseType::SUB, - {1, 1, 1, 5}, {1, 2, 3, 4, 5}, - {1, 1, 1, 5}, {1, 2, 3, 4, 5}, - {0, 0, 0, 0, 0}); + kernels::EltwiseType::SUB, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 1, 1, 5}, + {1, 2, 3, 4, 5}, {0, 0, 0, 0, 0}); SimpleTensorEltwise( - kernels::EltwiseType::PROD, - {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, - {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, - {1, 4, 9, 16, 25, 36}); + kernels::EltwiseType::PROD, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, + {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 4, 9, 16, 25, 36}); SimpleTensorEltwise( - kernels::EltwiseType::DIV, - {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, - {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, - {1, 1, 1, 1, 1, 1}); + kernels::EltwiseType::DIV, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, {1, 2, 1, 3}, + {1, 2, 3, 4, 5, 6}, {1, 1, 1, 1, 1, 1}); SimpleTensorEltwise( - kernels::EltwiseType::MIN, - {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, + kernels::EltwiseType::MIN, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); SimpleTensorEltwise( - kernels::EltwiseType::MAX, - {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + kernels::EltwiseType::MAX, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); SimpleTensorEltwise( - kernels::EltwiseType::SQR_DIFF, - {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, - {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, - {0, 0, 0, 0, 0, 25, 25, 25, 25, 25}); + kernels::EltwiseType::SQR_DIFF, {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 0, 0, 0, 0, 25, 25, 25, 25, 25}); } namespace { @@ -422,9 +316,7 @@ void RandomTensorScalar(const kernels::EltwiseType type, // Add input data net.AddRandomInput("Input", shape, true, true); - net.TransformDataFormat("Input", - NHWC, - "TInput", + net.TransformDataFormat("Input", NHWC, "TInput", NCHW); OpDefBuilder("Eltwise", "EltwiseTest") .Input("TInput") @@ -434,15 +326,13 @@ void RandomTensorScalar(const kernels::EltwiseType type, .Finalize(net.NewOperatorDef()); // Run net.RunOp(DeviceType::CPU); - net.TransformDataFormat("TOutput", - NCHW, - "Output", + net.TransformDataFormat("TOutput", NCHW, "Output", NHWC); Tensor expected; expected.Copy(*net.GetOutput("Output")); BufferToImage(&net, "Input", "InputImg", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Eltwise", "EltwiseTest") .Input("InputImg") .AddIntArg("type", static_cast(type)) @@ -455,13 +345,12 @@ void RandomTensorScalar(const kernels::EltwiseType type, net.RunOp(DeviceType::GPU); ImageToBuffer(&net, "OutputImg", "GPUOutput", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DT_FLOAT) { ExpectTensorNear(expected, *net.GetOutput("GPUOutput"), 1e-5); } else { - ExpectTensorNear(expected, *net.GetOutput("GPUOutput"), 1e-2, - 1e-2); + ExpectTensorNear(expected, *net.GetOutput("GPUOutput"), 1e-2, 1e-2); } } @@ -477,10 +366,10 @@ void RandomTensorEltwise(const kernels::EltwiseType type, net.AddRandomInput("Input0", shape0, true, true); net.AddRandomInput("Input1", shape1, true, true); - net.TransformDataFormat("Input0", NHWC, - "TInput0", NCHW); - net.TransformDataFormat("Input1", NHWC, - "TInput1", NCHW); + net.TransformDataFormat("Input0", NHWC, "TInput0", + NCHW); + net.TransformDataFormat("Input1", NHWC, "TInput1", + NCHW); OpDefBuilder("Eltwise", "EltwiseTest") .Input("TInput0") .Input("TInput1") @@ -491,15 +380,15 @@ void RandomTensorEltwise(const kernels::EltwiseType type, // Run net.RunOp(DeviceType::CPU); - net.TransformDataFormat("TOutput", NCHW, - "Output", NHWC); + net.TransformDataFormat("TOutput", NCHW, "Output", + NHWC); Tensor expected; expected.Copy(*net.GetOutput("Output")); BufferToImage(&net, "Input0", "InputImg0", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Input1", "InputImg1", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Eltwise", "EltwiseTest") .Input("InputImg0") .Input("InputImg1") @@ -513,13 +402,12 @@ void RandomTensorEltwise(const kernels::EltwiseType type, net.RunOp(DeviceType::GPU); ImageToBuffer(&net, "OutputImg", "GPUOutput", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DT_FLOAT) { ExpectTensorNear(expected, *net.GetOutput("GPUOutput"), 1e-5); } else { - ExpectTensorNear(expected, *net.GetOutput("GPUOutput"), 1e-2, - 1e-2); + ExpectTensorNear(expected, *net.GetOutput("GPUOutput"), 1e-2, 1e-2); } } } // namespace @@ -549,88 +437,87 @@ TEST_F(EltwiseOpTest, RandomTensorScalarHalf) { } TEST_F(EltwiseOpTest, RandomTensorVecFloat) { - RandomTensorEltwise(kernels::EltwiseType::SUM, - {1, 32, 32, 16}, {1, 1, 1, 16}); - RandomTensorEltwise(kernels::EltwiseType::SUB, - {5, 32, 32, 16}, {5, 1, 1, 16}); - RandomTensorEltwise(kernels::EltwiseType::SUB, - {5, 32, 32, 16}, {1, 1, 1, 16}); - RandomTensorEltwise(kernels::EltwiseType::SUB, - {5, 1, 1, 16}, {5, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::PROD, - {1, 31, 37, 17}, {1, 1, 1, 17}); - RandomTensorEltwise(kernels::EltwiseType::PROD, - {1, 1, 1, 17}, {1, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::DIV, - {3, 1, 1, 17}, {3, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::MIN, - {1, 1, 1, 16}, {1, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::MAX, - {5, 31, 37, 17}, {5, 1, 1, 17}); - RandomTensorEltwise(kernels::EltwiseType::SQR_DIFF, - {5, 31, 37, 17}, {5, 1, 1, 17}); + RandomTensorEltwise(kernels::EltwiseType::SUM, {1, 32, 32, 16}, + {1, 1, 1, 16}); + RandomTensorEltwise(kernels::EltwiseType::SUB, {5, 32, 32, 16}, + {5, 1, 1, 16}); + RandomTensorEltwise(kernels::EltwiseType::SUB, {5, 32, 32, 16}, + {1, 1, 1, 16}); + RandomTensorEltwise(kernels::EltwiseType::SUB, {5, 1, 1, 16}, + {5, 32, 32, 16}); + RandomTensorEltwise(kernels::EltwiseType::PROD, {1, 31, 37, 17}, + {1, 1, 1, 17}); + RandomTensorEltwise(kernels::EltwiseType::PROD, {1, 1, 1, 17}, + {1, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::DIV, {3, 1, 1, 17}, + {3, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::MIN, {1, 1, 1, 16}, + {1, 32, 32, 16}); + RandomTensorEltwise(kernels::EltwiseType::MAX, {5, 31, 37, 17}, + {5, 1, 1, 17}); + RandomTensorEltwise(kernels::EltwiseType::SQR_DIFF, {5, 31, 37, 17}, + {5, 1, 1, 17}); } TEST_F(EltwiseOpTest, RandomTensorVecHalf) { - RandomTensorEltwise(kernels::EltwiseType::SUM, - {1, 32, 32, 16}, {1, 1, 1, 16}); - RandomTensorEltwise(kernels::EltwiseType::SUB, - {3, 32, 32, 16}, {3, 1, 1, 16}); - RandomTensorEltwise(kernels::EltwiseType::SUB, - {3, 32, 32, 16}, {1, 1, 1, 16}); - RandomTensorEltwise(kernels::EltwiseType::SUB, - {3, 1, 1, 16}, {3, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::PROD, - {1, 1, 1, 17}, {1, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::DIV, - {5, 31, 37, 17}, {5, 1, 1, 17}); - RandomTensorEltwise(kernels::EltwiseType::DIV, - {5, 31, 37, 17}, {1, 1, 1, 17}); - RandomTensorEltwise(kernels::EltwiseType::DIV, - {5, 1, 1, 17}, {5, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::MIN, - {1, 1, 1, 16}, {1, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::MAX, - {3, 31, 37, 17}, {3, 1, 1, 17}); - RandomTensorEltwise(kernels::EltwiseType::SQR_DIFF, - {3, 31, 37, 17}, {3, 1, 1, 17}); + RandomTensorEltwise(kernels::EltwiseType::SUM, {1, 32, 32, 16}, + {1, 1, 1, 16}); + RandomTensorEltwise(kernels::EltwiseType::SUB, {3, 32, 32, 16}, + {3, 1, 1, 16}); + RandomTensorEltwise(kernels::EltwiseType::SUB, {3, 32, 32, 16}, + {1, 1, 1, 16}); + RandomTensorEltwise(kernels::EltwiseType::SUB, {3, 1, 1, 16}, + {3, 32, 32, 16}); + RandomTensorEltwise(kernels::EltwiseType::PROD, {1, 1, 1, 17}, + {1, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::DIV, {5, 31, 37, 17}, + {5, 1, 1, 17}); + RandomTensorEltwise(kernels::EltwiseType::DIV, {5, 31, 37, 17}, + {1, 1, 1, 17}); + RandomTensorEltwise(kernels::EltwiseType::DIV, {5, 1, 1, 17}, + {5, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::MIN, {1, 1, 1, 16}, + {1, 32, 32, 16}); + RandomTensorEltwise(kernels::EltwiseType::MAX, {3, 31, 37, 17}, + {3, 1, 1, 17}); + RandomTensorEltwise(kernels::EltwiseType::SQR_DIFF, {3, 31, 37, 17}, + {3, 1, 1, 17}); } TEST_F(EltwiseOpTest, RandomTensorTensorFloat) { - RandomTensorEltwise(kernels::EltwiseType::SUM, - {1, 32, 32, 16}, {1, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::SUB, - {3, 32, 32, 16}, {3, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::PROD, - {1, 31, 37, 17}, {1, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::DIV, - {5, 31, 37, 17}, {5, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::MIN, - {1, 32, 32, 16}, {1, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::MAX, - {3, 31, 37, 17}, {3, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::SQR_DIFF, - {3, 31, 37, 17}, {3, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::SUM, {1, 32, 32, 16}, + {1, 32, 32, 16}); + RandomTensorEltwise(kernels::EltwiseType::SUB, {3, 32, 32, 16}, + {3, 32, 32, 16}); + RandomTensorEltwise(kernels::EltwiseType::PROD, {1, 31, 37, 17}, + {1, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::DIV, {5, 31, 37, 17}, + {5, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::MIN, {1, 32, 32, 16}, + {1, 32, 32, 16}); + RandomTensorEltwise(kernels::EltwiseType::MAX, {3, 31, 37, 17}, + {3, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::SQR_DIFF, {3, 31, 37, 17}, + {3, 31, 37, 17}); } TEST_F(EltwiseOpTest, RandomTensorTensorHalf) { - RandomTensorEltwise(kernels::EltwiseType::SUM, - {1, 32, 32, 16}, {1, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::SUB, - {3, 32, 32, 16}, {3, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::PROD, - {1, 31, 37, 17}, {1, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::DIV, - {5, 31, 37, 17}, {5, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::MIN, - {1, 32, 32, 16}, {1, 32, 32, 16}); - RandomTensorEltwise(kernels::EltwiseType::MAX, - {3, 31, 37, 17}, {3, 31, 37, 17}); - RandomTensorEltwise(kernels::EltwiseType::SQR_DIFF, - {3, 31, 37, 17}, {3, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::SUM, {1, 32, 32, 16}, + {1, 32, 32, 16}); + RandomTensorEltwise(kernels::EltwiseType::SUB, {3, 32, 32, 16}, + {3, 32, 32, 16}); + RandomTensorEltwise(kernels::EltwiseType::PROD, {1, 31, 37, 17}, + {1, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::DIV, {5, 31, 37, 17}, + {5, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::MIN, {1, 32, 32, 16}, + {1, 32, 32, 16}); + RandomTensorEltwise(kernels::EltwiseType::MAX, {3, 31, 37, 17}, + {3, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::SQR_DIFF, {3, 31, 37, 17}, + {3, 31, 37, 17}); } - } // namespace test } // namespace ops } // namespace mace diff --git a/mace/ops/folded_batch_norm.h b/mace/ops/folded_batch_norm.h index 03543c1c..9cd76c73 100644 --- a/mace/ops/folded_batch_norm.h +++ b/mace/ops/folded_batch_norm.h @@ -47,7 +47,7 @@ class FoldedBatchNormOp : public Operator { offset->dim_size()); Tensor *output = this->Output(OUTPUT); - MACE_FAILURE_RETURN(output->ResizeLike(input)); + MACE_RETURN_IF_ERROR(output->ResizeLike(input)); return functor_(input, scale, offset, nullptr, nullptr, 0, output, future); } diff --git a/mace/ops/folded_batch_norm_test.cc b/mace/ops/folded_batch_norm_test.cc index 26cad38e..3979583a 100644 --- a/mace/ops/folded_batch_norm_test.cc +++ b/mace/ops/folded_batch_norm_test.cc @@ -36,7 +36,7 @@ void CalculateScaleOffset(const std::vector &gamma, } } -template +template void Simple() { OpsTestNet net; @@ -52,11 +52,11 @@ void Simple() { if (D == DeviceType::CPU) { net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest") - .Input("InputNCHW") - .Input("Scale") - .Input("Offset") - .Output("OutputNCHW") - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Scale") + .Input("Offset") + .Output("OutputNCHW") + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); @@ -69,11 +69,11 @@ void Simple() { kernels::BufferType::ARGUMENT); OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest") - .Input("InputImage") - .Input("ScaleImage") - .Input("OffsetImage") - .Output("OutputImage") - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("ScaleImage") + .Input("OffsetImage") + .Output("OutputImage") + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); @@ -83,10 +83,9 @@ void Simple() { } // Check - auto expected = - CreateTensor({1, 6, 2, 1}, {-3.8543, -3.8543, -1.5125, -1.5125, - 0.8291, 0.8291, 3.1708, 3.1708, - 5.5125, 5.5125, 7.8543, 7.8543}); + auto expected = CreateTensor( + {1, 6, 2, 1}, {-3.8543, -3.8543, -1.5125, -1.5125, 0.8291, 0.8291, 3.1708, + 3.1708, 5.5125, 5.5125, 7.8543, 7.8543}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-4); } @@ -108,29 +107,25 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomOPENCL) { OpsTestNet net; // Add input data - net.AddRandomInput( - "Input", {batch, height, width, channels}); + net.AddRandomInput("Input", + {batch, height, width, channels}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest") - .Input("InputNCHW") - .Input("Scale") - .Input("Offset") - .Output("OutputNCHW") - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Scale") + .Input("Offset") + .Output("OutputNCHW") + .Finalize(net.NewOperatorDef()); // run cpu net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); // Check @@ -139,25 +134,25 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Scale", "ScaleImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); BufferToImage(&net, "Offset", "OffsetImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest") - .Input("InputImage") - .Input("ScaleImage") - .Input("OffsetImage") - .Output("OutputImage") - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("ScaleImage") + .Input("OffsetImage") + .Output("OutputImage") + .Finalize(net.NewOperatorDef()); // Run on opencl net.RunOp(DeviceType::GPU); net.Sync(); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-5, 1e-4); } @@ -173,29 +168,25 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomHalfOPENCL) { OpsTestNet net; // Add input data - net.AddRandomInput( - "Input", {batch, height, width, channels}); + net.AddRandomInput("Input", + {batch, height, width, channels}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest") - .Input("InputNCHW") - .Input("Scale") - .Input("Offset") - .Output("OutputNCHW") - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Scale") + .Input("Offset") + .Output("OutputNCHW") + .Finalize(net.NewOperatorDef()); // run cpu net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); // Check @@ -204,26 +195,26 @@ TEST_F(FoldedBatchNormOpTest, SimpleRandomHalfOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Scale", "ScaleImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); BufferToImage(&net, "Offset", "OffsetImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest") - .Input("InputImage") - .Input("ScaleImage") - .Input("OffsetImage") - .Output("OutputImage") - .AddIntArg("T", static_cast(DataType::DT_HALF)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("ScaleImage") + .Input("OffsetImage") + .Output("OutputImage") + .AddIntArg("T", static_cast(DataType::DT_HALF)) + .Finalize(net.NewOperatorDef()); // Run on opencl net.RunOp(DeviceType::GPU); net.Sync(); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2, 1e-2); } @@ -239,29 +230,25 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomOPENCL) { OpsTestNet net; // Add input data - net.AddRandomInput( - "Input", {batch, height, width, channels}); + net.AddRandomInput("Input", + {batch, height, width, channels}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest") - .Input("InputNCHW") - .Input("Scale") - .Input("Offset") - .Output("OutputNCHW") - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Scale") + .Input("Offset") + .Output("OutputNCHW") + .Finalize(net.NewOperatorDef()); // run cpu net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); // Check @@ -270,24 +257,24 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Scale", "ScaleImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); BufferToImage(&net, "Offset", "OffsetImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest") - .Input("InputImage") - .Input("ScaleImage") - .Input("OffsetImage") - .Output("OutputImage") - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("ScaleImage") + .Input("OffsetImage") + .Output("OutputImage") + .Finalize(net.NewOperatorDef()); // Run on opencl net.RunOp(DeviceType::GPU); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-5, 1e-4); } @@ -303,29 +290,25 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomHalfOPENCL) { OpsTestNet net; // Add input data - net.AddRandomInput( - "Input", {batch, height, width, channels}); + net.AddRandomInput("Input", + {batch, height, width, channels}); net.AddRandomInput("Scale", {channels}); net.AddRandomInput("Offset", {channels}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest") - .Input("InputNCHW") - .Input("Scale") - .Input("Offset") - .Output("OutputNCHW") - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Scale") + .Input("Offset") + .Output("OutputNCHW") + .Finalize(net.NewOperatorDef()); // run cpu net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); // Check @@ -334,25 +317,25 @@ TEST_F(FoldedBatchNormOpTest, ComplexRandomHalfOPENCL) { // Run on opencl BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(&net, "Scale", "ScaleImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); BufferToImage(&net, "Offset", "OffsetImage", - kernels::BufferType::ARGUMENT); + kernels::BufferType::ARGUMENT); OpDefBuilder("FoldedBatchNorm", "FoldedBatchNormTest") - .Input("InputImage") - .Input("ScaleImage") - .Input("OffsetImage") - .Output("OutputImage") - .AddIntArg("T", static_cast(DataType::DT_HALF)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("ScaleImage") + .Input("OffsetImage") + .Output("OutputImage") + .AddIntArg("T", static_cast(DataType::DT_HALF)) + .Finalize(net.NewOperatorDef()); // Run on opencl net.RunOp(DeviceType::GPU); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2, 1e-2); } diff --git a/mace/ops/fully_connected.h b/mace/ops/fully_connected.h index a4b17d4e..286b2258 100644 --- a/mace/ops/fully_connected.h +++ b/mace/ops/fully_connected.h @@ -23,15 +23,15 @@ namespace mace { namespace ops { -template +template class FullyConnectedOp : public Operator { public: FullyConnectedOp(const OperatorDef &operator_def, Workspace *ws) - : Operator(operator_def, ws), - functor_(kernels::StringToActivationType( - OperatorBase::GetOptionalArg("activation", - "NOOP")), - OperatorBase::GetOptionalArg("max_limit", 0.0f)) {} + : Operator(operator_def, ws), + functor_(kernels::StringToActivationType( + OperatorBase::GetOptionalArg("activation", + "NOOP")), + OperatorBase::GetOptionalArg("max_limit", 0.0f)) {} MaceStatus Run(StatsFuture *future) override { const Tensor *input = this->Input(INPUT); @@ -40,29 +40,19 @@ class FullyConnectedOp : public Operator { Tensor *output = this->Output(OUTPUT); if (D == DeviceType::CPU) { - MACE_CHECK(input->dim(1) == weight->dim(1) - && input->dim(2) == weight->dim(2) - && input->dim(3) == weight->dim(3) - && weight->dim(0) == bias->dim(0), - "The shape of Input: ", - MakeString(input->shape()), - "The shape of Weight: ", - MakeString(weight->shape()), - " and Bias ", - bias->dim(0), - " don't match."); + MACE_CHECK( + input->dim(1) == weight->dim(1) && input->dim(2) == weight->dim(2) && + input->dim(3) == weight->dim(3) && weight->dim(0) == bias->dim(0), + "The shape of Input: ", MakeString(input->shape()), + "The shape of Weight: ", MakeString(weight->shape()), " and Bias ", + bias->dim(0), " don't match."); } else { - MACE_CHECK(input->dim(1) == weight->dim(2) - && input->dim(2) == weight->dim(3) - && input->dim(3) == weight->dim(1) - && weight->dim(0) == bias->dim(0), - "The shape of Input: ", - MakeString(input->shape()), - "The shape of Weight: ", - MakeString(weight->shape()), - " and Bias ", - bias->dim(0), - " don't match."); + MACE_CHECK( + input->dim(1) == weight->dim(2) && input->dim(2) == weight->dim(3) && + input->dim(3) == weight->dim(1) && weight->dim(0) == bias->dim(0), + "The shape of Input: ", MakeString(input->shape()), + "The shape of Weight: ", MakeString(weight->shape()), " and Bias ", + bias->dim(0), " don't match."); } return functor_(input, weight, bias, output, future); diff --git a/mace/ops/fully_connected_test.cc b/mace/ops/fully_connected_test.cc index b6dc65f0..d84f0500 100644 --- a/mace/ops/fully_connected_test.cc +++ b/mace/ops/fully_connected_test.cc @@ -24,7 +24,7 @@ namespace test { class FullyConnectedOpTest : public OpsTestBase {}; namespace { -template +template void Simple(const std::vector &input_shape, const std::vector &input_value, const std::vector &weight_shape, @@ -42,11 +42,11 @@ void Simple(const std::vector &input_shape, if (D == DeviceType::CPU) { OpDefBuilder("FullyConnected", "FullyConnectedTest") - .Input("Input") - .Input("Weight") - .Input("Bias") - .Output("OutputNCHW") - .Finalize(net.NewOperatorDef()); + .Input("Input") + .Input("Weight") + .Input("Bias") + .Output("OutputNCHW") + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); @@ -59,11 +59,11 @@ void Simple(const std::vector &input_shape, kernels::BufferType::ARGUMENT); OpDefBuilder("FullyConnected", "FullyConnectedTest") - .Input("InputImage") - .Input("WeightImage") - .Input("BiasImage") - .Output("OutputImage") - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("WeightImage") + .Input("BiasImage") + .Output("OutputImage") + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); @@ -86,14 +86,14 @@ TEST_F(FullyConnectedOpTest, SimpleCPU) { {1, 2, 3, 4, 5, 6, 7, 8}, {1}, {2}, {1, 1, 1, 1}, {206}); Simple( - {1, 1, 2, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {2, 1, 2, 5}, - {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100}, - {2}, {2, 3}, {1, 1, 1, 2}, {387, 3853}); + {1, 1, 2, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {2, 1, 2, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100}, + {2}, {2, 3}, {1, 1, 1, 2}, {387, 3853}); Simple( - {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {5, 1, 2, 3}, - {1, 2, 3, 4, 5, 6, 10, 20, 30, 40, 50, 60, 1, 2, 3, - 4, 5, 6, 10, 20, 30, 40, 50, 60, 1, 2, 3, 4, 5, 6}, - {5}, {1, 2, 3, 4, 5}, {1, 1, 1, 5}, {92, 912, 94, 914, 96}); + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {5, 1, 2, 3}, + {1, 2, 3, 4, 5, 6, 10, 20, 30, 40, 50, 60, 1, 2, 3, + 4, 5, 6, 10, 20, 30, 40, 50, 60, 1, 2, 3, 4, 5, 6}, + {5}, {1, 2, 3, 4, 5}, {1, 1, 1, 5}, {92, 912, 94, 914, 96}); } TEST_F(FullyConnectedOpTest, SimpleCPUWithBatch) { @@ -103,26 +103,26 @@ TEST_F(FullyConnectedOpTest, SimpleCPUWithBatch) { TEST_F(FullyConnectedOpTest, SimpleOPENCL) { Simple({1, 2, 2, 2}, {1, 2, 3, 4, 5, 6, 7, 8}, {1, 2, 2, 2}, - {1, 3, 5, 7, 2, 4, 6, 8}, {1}, {2}, {1, 1, 1, 1}, - {206}); + {1, 3, 5, 7, 2, 4, 6, 8}, {1}, {2}, {1, 1, 1, 1}, + {206}); Simple( - {1, 1, 2, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {2, 5, 1, 2}, - {1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 10, 60, 20, 70, 30, 80, 40, 90, 50, 100}, - {2}, {2, 3}, {1, 1, 1, 2}, {387, 3853}); + {1, 1, 2, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {2, 5, 1, 2}, + {1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 10, 60, 20, 70, 30, 80, 40, 90, 50, 100}, + {2}, {2, 3}, {1, 1, 1, 2}, {387, 3853}); Simple( - {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {5, 3, 1, 2}, - {1, 4, 2, 5, 3, 6, 10, 40, 20, 50, 30, 60, 1, 4, 2, 5, 3, 6, - 10, 40, 20, 50, 30, 60, 1, 4, 2, 5, 3, 6}, - {5}, {1, 2, 3, 4, 5}, {1, 1, 1, 5}, {92, 912, 94, 914, 96}); + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, {5, 3, 1, 2}, + {1, 4, 2, 5, 3, 6, 10, 40, 20, 50, 30, 60, 1, 4, 2, + 5, 3, 6, 10, 40, 20, 50, 30, 60, 1, 4, 2, 5, 3, 6}, + {5}, {1, 2, 3, 4, 5}, {1, 1, 1, 5}, {92, 912, 94, 914, 96}); } TEST_F(FullyConnectedOpTest, SimpleGPUWithBatch) { Simple({2, 1, 2, 2}, {1, 2, 3, 4, 5, 6, 7, 8}, {1, 2, 1, 2}, - {1, 3, 2, 4}, {1}, {2}, {2, 1, 1, 1}, {32, 72}); + {1, 3, 2, 4}, {1}, {2}, {2, 1, 1, 1}, {32, 72}); } namespace { -template +template void Random(const index_t batch, const index_t height, const index_t width, @@ -134,22 +134,20 @@ void Random(const index_t batch, OpsTestNet net; // Add input data + net.AddRandomInput("Input", + {batch, height, width, channels}); net.AddRandomInput( - "Input", {batch, height, width, channels}); - net.AddRandomInput( - "Weight", {out_channel, channels, height, width}); + "Weight", {out_channel, channels, height, width}); net.AddRandomInput("Bias", {out_channel}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("FullyConnected", "FullyConnectedTest") - .Input("InputNCHW") - .Input("Weight") - .Input("Bias") - .Output("OutputNCHW") - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Input("Weight") + .Input("Bias") + .Output("OutputNCHW") + .Finalize(net.NewOperatorDef()); // run cpu net.RunOp(); @@ -169,12 +167,12 @@ void Random(const index_t batch, kernels::BufferType::ARGUMENT); OpDefBuilder("FullyConnected", "FullyConnectedTest") - .Input("InputImage") - .Input("WeightImage") - .Input("BiasImage") - .Output("OutputImage") - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Input("WeightImage") + .Input("BiasImage") + .Output("OutputImage") + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(DeviceType::GPU); @@ -182,11 +180,11 @@ void Random(const index_t batch, ImageToBuffer(&net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DataType::DT_HALF) { - ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), - 1e-1, 1e-1); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-1, + 1e-1); } else { - ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), - 1e-2, 1e-3); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2, + 1e-3); } } } // namespace diff --git a/mace/ops/local_response_norm.h b/mace/ops/local_response_norm.h index 6f2bd691..d8ad1d3e 100644 --- a/mace/ops/local_response_norm.h +++ b/mace/ops/local_response_norm.h @@ -25,8 +25,7 @@ template class LocalResponseNormOp : public Operator { public: LocalResponseNormOp(const OperatorDef &operator_def, Workspace *ws) - : Operator(operator_def, ws), - functor_() { + : Operator(operator_def, ws), functor_() { depth_radius_ = OperatorBase::GetOptionalArg("depth_radius", 5); bias_ = OperatorBase::GetOptionalArg("bias", 1.0f); alpha_ = OperatorBase::GetOptionalArg("alpha", 1.0f); @@ -40,7 +39,7 @@ class LocalResponseNormOp : public Operator { input->dim_size()); Tensor *output = this->Output(OUTPUT); - MACE_FAILURE_RETURN(output->ResizeLike(input)); + MACE_RETURN_IF_ERROR(output->ResizeLike(input)); return functor_(input, depth_radius_, bias_, alpha_, beta_, output, future); } diff --git a/mace/ops/local_response_norm_test.cc b/mace/ops/local_response_norm_test.cc index be51532c..dc12f28a 100644 --- a/mace/ops/local_response_norm_test.cc +++ b/mace/ops/local_response_norm_test.cc @@ -21,7 +21,7 @@ namespace test { class LocalResponseNormOpTest : public OpsTestBase {}; -template +template void Simple() { OpsTestNet net; @@ -33,22 +33,22 @@ void Simple() { net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("LocalResponseNorm", "LocalResponseNormTest") - .Input("InputNCHW") - .AddIntArg("depth_radius", 5) - .AddFloatArg("bias", 1.0f) - .AddFloatArg("alpha", 1.0f) - .AddFloatArg("beta", 0.5f) - .Output("OutputNCHW") - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .AddIntArg("depth_radius", 5) + .AddFloatArg("bias", 1.0f) + .AddFloatArg("alpha", 1.0f) + .AddFloatArg("beta", 0.5f) + .Output("OutputNCHW") + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); } // Check - auto expected = - CreateTensor({1, 1, 2, 6}, {0.28, 0.28, 0.39, 0.39, 0.51, 0.51, - 0.34, 0.34, 0.40, 0.40, 0.47, 0.47}); + auto expected = CreateTensor( + {1, 1, 2, 6}, + {0.28, 0.28, 0.39, 0.39, 0.51, 0.51, 0.34, 0.34, 0.40, 0.40, 0.47, 0.47}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 0, 1e-2); } diff --git a/mace/ops/matmul_test.cc b/mace/ops/matmul_test.cc index da949b80..8999ffac 100644 --- a/mace/ops/matmul_test.cc +++ b/mace/ops/matmul_test.cc @@ -92,8 +92,7 @@ TEST_F(MatMulOpTest, SimpleCPUWithBatch) { TEST_F(MatMulOpTest, SimpleOPENCL) { Simple({1, 2, 3, 1}, {1, 2, 3, 4, 5, 6}, {1, 3, 2, 1}, - {1, 2, 3, 4, 5, 6}, {1, 2, 2, 1}, - {22, 28, 49, 64}); + {1, 2, 3, 4, 5, 6}, {1, 2, 2, 1}, {22, 28, 49, 64}); Simple( {1, 5, 5, 1}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25}, @@ -127,10 +126,9 @@ void Complex(const index_t batch, .Finalize(net.NewOperatorDef()); // Add input data - net.AddRandomInput("A", - {batch, height, channels, 1}); - net.AddRandomInput( - "B", {batch, channels, out_width, 1}); + net.AddRandomInput("A", {batch, height, channels, 1}); + net.AddRandomInput("B", + {batch, channels, out_width, 1}); // run cpu net.RunOp(); @@ -141,9 +139,9 @@ void Complex(const index_t batch, // Run on opencl BufferToImage(&net, "A", "AImage", - kernels::BufferType::IN_OUT_WIDTH); + kernels::BufferType::IN_OUT_WIDTH); BufferToImage(&net, "B", "BImage", - kernels::BufferType::IN_OUT_HEIGHT); + kernels::BufferType::IN_OUT_HEIGHT); OpDefBuilder("MatMul", "MatMulTest") .Input("AImage") @@ -156,13 +154,13 @@ void Complex(const index_t batch, net.RunOp(DeviceType::GPU); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", - kernels::BufferType::IN_OUT_HEIGHT); + kernels::BufferType::IN_OUT_HEIGHT); if (DataTypeToEnum::value == DataType::DT_HALF) { - ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), - 1e-2, 1e-1); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2, + 1e-1); } else { - ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), - 1e-5, 1e-5); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-5, + 1e-5); } } } // namespace diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index ad7d2ee6..29c8d6af 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -112,12 +112,12 @@ class OpsTestNet { public: OpsTestNet() : op_registry_(new OperatorRegistry()) {} - template + template void AddInputFromArray(const std::string &name, const std::vector &shape, const std::vector &data) { Tensor *input = - ws_.CreateTensor(name, GetDeviceAllocator(D), DataTypeToEnum::v()); + ws_.CreateTensor(name, GetDeviceAllocator(D), DataTypeToEnum::v()); input->Resize(shape); Tensor::MappingGuard input_mapper(input); T *input_data = input->mutable_data(); @@ -125,25 +125,25 @@ class OpsTestNet { memcpy(input_data, data.data(), data.size() * sizeof(T)); } - template + template void AddRepeatedInput(const std::string &name, const std::vector &shape, const T data) { Tensor *input = - ws_.CreateTensor(name, GetDeviceAllocator(D), DataTypeToEnum::v()); + ws_.CreateTensor(name, GetDeviceAllocator(D), DataTypeToEnum::v()); input->Resize(shape); Tensor::MappingGuard input_mapper(input); T *input_data = input->mutable_data(); std::fill(input_data, input_data + input->size(), data); } - template + template void AddRandomInput(const std::string &name, const std::vector &shape, bool positive = true, bool truncate = false) { Tensor *input = - ws_.CreateTensor(name, GetDeviceAllocator(D), DataTypeToEnum::v()); + ws_.CreateTensor(name, GetDeviceAllocator(D), DataTypeToEnum::v()); input->Resize(shape); Tensor::MappingGuard input_mapper(input); T *input_data = input->mutable_data(); @@ -153,15 +153,15 @@ class OpsTestNet { std::normal_distribution nd(0, 1); if (DataTypeToEnum::value == DT_HALF) { std::generate( - input_data, input_data + input->size(), - [&gen, &nd, positive, truncate] { - float d = nd(gen); - if (truncate) { - if (std::abs(d) > 100.f) d = 100.f; - if (std::abs(d) < 0.001f) d = 0.001f; - } - return half_float::half_cast(positive ?std::abs(d) : d); - }); + input_data, input_data + input->size(), + [&gen, &nd, positive, truncate] { + float d = nd(gen); + if (truncate) { + if (std::abs(d) > 100.f) d = 100.f; + if (std::abs(d) < 0.001f) d = 0.001f; + } + return half_float::half_cast(positive ? std::abs(d) : d); + }); } else { std::generate(input_data, input_data + input->size(), [&gen, &nd, positive, truncate] { @@ -170,17 +170,15 @@ class OpsTestNet { if (std::abs(d) > 100.f) d = 100.f; if (std::abs(d) < 0.001f) d = 0.001f; } - return (positive ?std::abs(d) : d); + return (positive ? std::abs(d) : d); }); } } - template - void Transpose2D(const std::string &src_name, - const std::string &dst_name) { + template + void Transpose2D(const std::string &src_name, const std::string &dst_name) { Tensor *input = ws_.GetTensor(src_name); - Tensor *output = ws_.CreateTensor(dst_name, - GetDeviceAllocator(D), + Tensor *output = ws_.CreateTensor(dst_name, GetDeviceAllocator(D), DataTypeToEnum::v()); const std::vector input_shape = input->shape(); MACE_CHECK(input_shape.size() == 2, "input shape != 2"); @@ -192,19 +190,18 @@ class OpsTestNet { for (index_t i = 0; i < input_shape[0]; ++i) { for (index_t j = 0; j < input_shape[1]; ++j) { output_data[j * input_shape[0] + i] = - input_data[i * input_shape[1] + j]; + input_data[i * input_shape[1] + j]; } } } - template + template void TransformDataFormat(const std::string &src_name, const DataFormat src_format, const std::string &dst_name, const DataFormat dst_format) { Tensor *input = ws_.GetTensor(src_name); - Tensor *output = ws_.CreateTensor(dst_name, - GetDeviceAllocator(D), + Tensor *output = ws_.CreateTensor(dst_name, GetDeviceAllocator(D), DataTypeToEnum::v()); const std::vector input_shape = input->shape(); MACE_CHECK(input_shape.size() == 4, "input shape != 4"); @@ -224,7 +221,7 @@ class OpsTestNet { for (index_t h = 0; h < height; ++h) { for (index_t w = 0; w < width; ++w) { output_data[((b * channels + c) * height + h) * width + w] = - input_data[((b * height + h) * width + w) * channels + c]; + input_data[((b * height + h) * width + w) * channels + c]; } } } @@ -244,7 +241,7 @@ class OpsTestNet { for (index_t w = 0; w < width; ++w) { for (index_t c = 0; c < channels; ++c) { output_data[((b * height + h) * width + w) * channels + c] = - input_data[((b * channels + c) * height + h) * width + w]; + input_data[((b * channels + c) * height + h) * width + w]; } } } @@ -264,7 +261,7 @@ class OpsTestNet { for (index_t i = 0; i < oi; ++i) { for (index_t j = 0; j < hw; ++j) { output_data[i * height * width + j] = - input_data[j * out_channels * in_channels + i]; + input_data[j * out_channels * in_channels + i]; } } } else if (src_format == OIHW && dst_format == HWOI) { @@ -282,7 +279,7 @@ class OpsTestNet { for (index_t i = 0; i < hw; ++i) { for (index_t j = 0; j < oi; ++j) { output_data[i * out_channels * in_channels + j] = - input_data[j * height * width + i]; + input_data[j * height * width + i]; } } } else if (src_format == HWIO && dst_format == OIHW) { @@ -300,7 +297,8 @@ class OpsTestNet { for (index_t c = 0; c < in_channels; ++c) { for (index_t k = 0; k < hw; ++k) { output_data[((m * in_channels) + c) * height * width + k] = - input_data[k * out_channels * in_channels + c * out_channels + m]; + input_data[k * out_channels * in_channels + c * out_channels + + m]; } } } @@ -309,12 +307,11 @@ class OpsTestNet { } } - template + template void FillNHWCInputToNCHWInput(const std::string &name_nchw, const std::string &name_nhwc) { Tensor *input = ws_.GetTensor(name_nhwc); - Tensor *output = ws_.CreateTensor(name_nchw, - GetDeviceAllocator(D), + Tensor *output = ws_.CreateTensor(name_nchw, GetDeviceAllocator(D), DataTypeToEnum::v()); const std::vector input_shape = input->shape(); index_t batch = input_shape[0]; @@ -329,7 +326,7 @@ class OpsTestNet { for (index_t h = 0; h < height; ++h) { for (index_t w = 0; w < width; ++w) { output_data[((b * channels + c) * height + h) * width + w] = - input_data[((b * height + h) * width + w) * channels + c]; + input_data[((b * height + h) * width + w) * channels + c]; } } } @@ -370,14 +367,12 @@ class OpsTestNet { // DEPRECATED(liyin): // Test and benchmark should setup model once and run multiple times. // Setup time should not be counted during benchmark. - MaceStatus RunOp() { - return RunOp(DeviceType::CPU); - } + MaceStatus RunOp() { return RunOp(DeviceType::CPU); } MaceStatus RunNet(const NetDef &net_def, const DeviceType device) { device_ = device; net_ = CreateNet(op_registry_, net_def, &ws_, device, NetMode::INIT); - MACE_FAILURE_RETURN(net_->Run()); + MACE_RETURN_IF_ERROR(net_->Run()); net_ = CreateNet(op_registry_, net_def, &ws_, device); return net_->Run(); } @@ -415,7 +410,7 @@ class OpsTestBase : public ::testing::Test { } }; -template +template void GenerateRandomRealTypeData(const std::vector &shape, std::vector *res, bool positive = true) { @@ -430,11 +425,10 @@ void GenerateRandomRealTypeData(const std::vector &shape, res->resize(size); if (DataTypeToEnum::value == DT_HALF) { - std::generate(res->begin(), res->end(), - [&gen, &nd, positive] { - return half_float::half_cast( - positive ? std::abs(nd(gen)) : nd(gen)); - }); + std::generate(res->begin(), res->end(), [&gen, &nd, positive] { + return half_float::half_cast(positive ? std::abs(nd(gen)) + : nd(gen)); + }); } else { std::generate(res->begin(), res->end(), [&gen, &nd, positive] { return positive ? std::abs(nd(gen)) : nd(gen); @@ -442,7 +436,7 @@ void GenerateRandomRealTypeData(const std::vector &shape, } } -template +template void GenerateRandomIntTypeData(const std::vector &shape, std::vector *res, const T a = 0, @@ -460,7 +454,7 @@ void GenerateRandomIntTypeData(const std::vector &shape, std::generate(res->begin(), res->end(), [&gen, &nd] { return nd(gen); }); } -template +template std::vector VectorStaticCast(const std::vector &&src) { std::vector dest; dest.reserve(src.size()); @@ -470,11 +464,11 @@ std::vector VectorStaticCast(const std::vector &&src) { return std::move(dest); } -template +template std::unique_ptr CreateTensor(const std::vector &shape, const std::vector &data) { std::unique_ptr res( - new Tensor(GetDeviceAllocator(DeviceType::CPU), DataTypeToEnum::v())); + new Tensor(GetDeviceAllocator(DeviceType::CPU), DataTypeToEnum::v())); res->Resize(shape); T *input_data = res->mutable_data(); memcpy(input_data, data.data(), data.size() * sizeof(T)); @@ -504,24 +498,24 @@ inline std::string ShapeToString(const Tensor &x) { return std::string(stream.str()); } -template +template struct is_floating_point_type { static const bool value = std::is_same::value || - std::is_same::value || - std::is_same::value; + std::is_same::value || + std::is_same::value; }; -template +template inline void ExpectEqual(const T &a, const T &b) { EXPECT_EQ(a, b); } -template<> +template <> inline void ExpectEqual(const float &a, const float &b) { EXPECT_FLOAT_EQ(a, b); } -template<> +template <> inline void ExpectEqual(const double &a, const double &b) { EXPECT_DOUBLE_EQ(a, b); } @@ -531,13 +525,13 @@ inline void AssertSameDims(const Tensor &x, const Tensor &y) { << "y.shape [ " << ShapeToString(y) << "]"; } -template::value> +template ::value> struct Expector; // Partial specialization for float and double. -template +template struct Expector { static void Equal(const EXP_TYPE &a, const RES_TYPE &b) { ExpectEqual(a, b); } @@ -554,7 +548,8 @@ struct Expector { } } - static void Near(const Tensor &x, const Tensor &y, + static void Near(const Tensor &x, + const Tensor &y, const double rel_err, const double abs_err) { ASSERT_EQ(x.dtype(), DataTypeToEnum::v()); @@ -588,7 +583,7 @@ struct Expector { } }; -template +template struct Expector { static void Equal(const EXP_TYPE &a, const RES_TYPE &b) { ExpectEqual(a, b); } @@ -605,7 +600,8 @@ struct Expector { } } - static void Near(const Tensor &x, const Tensor &y, + static void Near(const Tensor &x, + const Tensor &y, const double rel_err, const double abs_err) { MACE_UNUSED(rel_err); @@ -614,21 +610,23 @@ struct Expector { } }; -template -void ExpectTensorNear(const Tensor &x, const Tensor &y, +template +void ExpectTensorNear(const Tensor &x, + const Tensor &y, const double rel_err = 1e-5, const double abs_err = 1e-8) { Expector::Near(x, y, rel_err, abs_err); } -template -void ExpectTensorNear(const Tensor &x, const Tensor &y, +template +void ExpectTensorNear(const Tensor &x, + const Tensor &y, const double rel_err = 1e-5, const double abs_err = 1e-8) { Expector::Near(x, y, rel_err, abs_err); } -template +template void BufferToImage(OpsTestNet *net, const std::string &input_name, const std::string &output_name, @@ -636,11 +634,11 @@ void BufferToImage(OpsTestNet *net, MACE_CHECK_NOTNULL(net); OpDefBuilder("BufferToImage", "BufferToImageTest") - .Input(input_name) - .Output(output_name) - .AddIntArg("buffer_type", type) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net->NewOperatorDef()); + .Input(input_name) + .Output(output_name) + .AddIntArg("buffer_type", type) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net->NewOperatorDef()); // Run net->RunOp(D); @@ -648,7 +646,7 @@ void BufferToImage(OpsTestNet *net, net->Sync(); } -template +template void ImageToBuffer(OpsTestNet *net, const std::string &input_name, const std::string &output_name, @@ -656,11 +654,11 @@ void ImageToBuffer(OpsTestNet *net, MACE_CHECK_NOTNULL(net); OpDefBuilder("ImageToBuffer", "ImageToBufferTest") - .Input(input_name) - .Output(output_name) - .AddIntArg("buffer_type", type) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net->NewOperatorDef()); + .Input(input_name) + .Output(output_name) + .AddIntArg("buffer_type", type) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net->NewOperatorDef()); // Run net->RunOp(D); diff --git a/mace/ops/pad.h b/mace/ops/pad.h index 3ab03fd6..98677109 100644 --- a/mace/ops/pad.h +++ b/mace/ops/pad.h @@ -29,8 +29,7 @@ class PadOp : public Operator { PadOp(const OperatorDef &operator_def, Workspace *ws) : Operator(operator_def, ws), functor_(OperatorBase::GetRepeatedArgs("paddings"), - OperatorBase::GetOptionalArg("constant_value", 0.0)) - {} + OperatorBase::GetOptionalArg("constant_value", 0.0)) {} MaceStatus Run(StatsFuture *future) override { const Tensor *input_tensor = this->Input(0); diff --git a/mace/ops/pad_test.cc b/mace/ops/pad_test.cc index e244acec..2f4a9721 100644 --- a/mace/ops/pad_test.cc +++ b/mace/ops/pad_test.cc @@ -45,9 +45,7 @@ void Simple() { ImageToBuffer(&net, "OutputImage", "Output", kernels::BufferType::IN_OUT_CHANNEL); } else { - net.TransformDataFormat("Input", - NHWC, - "TInput", + net.TransformDataFormat("Input", NHWC, "TInput", NCHW); OpDefBuilder("Pad", "PadTest") .Input("TInput") @@ -59,33 +57,25 @@ void Simple() { // Run net.RunOp(); - net.TransformDataFormat("TOutput", - NCHW, - "Output", + net.TransformDataFormat("TOutput", NCHW, "Output", NHWC); } auto output = net.GetTensor("Output"); - auto expected = CreateTensor({1, 5, 6, 1}, - { - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 2, 2, 2, 1.0, 1.0, - 1.0, 2, 2, 2, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, - }); + auto expected = CreateTensor( + {1, 5, 6, 1}, { + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2, 2, 2, + 1.0, 1.0, 1.0, 2, 2, 2, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + }); ExpectTensorNear(*expected, *output, 1e-5); } } // namespace -TEST_F(PadTest, SimpleCPU) { - Simple(); -} +TEST_F(PadTest, SimpleCPU) { Simple(); } -TEST_F(PadTest, SimpleGPU) { - Simple(); -} +TEST_F(PadTest, SimpleGPU) { Simple(); } TEST_F(PadTest, ComplexCPU) { // Construct graph @@ -93,9 +83,7 @@ TEST_F(PadTest, ComplexCPU) { // Add input data net.AddRepeatedInput("Input", {1, 1, 1, 2}, 2); - net.TransformDataFormat("Input", - NHWC, - "TInput", + net.TransformDataFormat("Input", NHWC, "TInput", NCHW); OpDefBuilder("Pad", "PadTest") .Input("TInput") @@ -106,9 +94,7 @@ TEST_F(PadTest, ComplexCPU) { // Run net.RunOp(); - net.TransformDataFormat("TOutput", - NCHW, - "Output", + net.TransformDataFormat("TOutput", NCHW, "Output", NHWC); auto output = net.GetTensor("Output"); @@ -134,9 +120,7 @@ void Complex(const std::vector &input_shape, // Add input data net.AddRandomInput("Input", input_shape); - net.TransformDataFormat("Input", - NHWC, - "TInput", + net.TransformDataFormat("Input", NHWC, "TInput", NCHW); OpDefBuilder("Pad", "PadTest") .Input("TInput") @@ -147,16 +131,14 @@ void Complex(const std::vector &input_shape, // Run net.RunOp(); - net.TransformDataFormat("TOutput", - NCHW, - "Output", + net.TransformDataFormat("TOutput", NCHW, "Output", NHWC); Tensor expected; expected.Copy(*net.GetOutput("Output")); BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Pad", "PadTest") .Input("InputImage") .Output("OutputImage") @@ -168,7 +150,7 @@ void Complex(const std::vector &input_shape, net.RunOp(DeviceType::GPU); ImageToBuffer(&net, "OutputImage", "OpenCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); auto output = net.GetTensor("OpenCLOutput"); @@ -181,24 +163,23 @@ void Complex(const std::vector &input_shape, } // namespace TEST_F(PadTest, ComplexFloat) { - Complex({1, 32, 32, 4}, - {0, 0, 0, 0, 2, 2, 1, 1}, {0, 0, 2, 2, 1, 1, 0, 0}); - Complex({1, 31, 37, 16}, - {0, 0, 0, 0, 2, 0, 1, 0}, {0, 0, 2, 0, 1, 0, 0, 0}); - Complex({1, 128, 128, 32}, - {0, 0, 0, 0, 0, 1, 0, 2}, {0, 0, 0, 1, 0, 2, 0, 0}); + Complex({1, 32, 32, 4}, {0, 0, 0, 0, 2, 2, 1, 1}, + {0, 0, 2, 2, 1, 1, 0, 0}); + Complex({1, 31, 37, 16}, {0, 0, 0, 0, 2, 0, 1, 0}, + {0, 0, 2, 0, 1, 0, 0, 0}); + Complex({1, 128, 128, 32}, {0, 0, 0, 0, 0, 1, 0, 2}, + {0, 0, 0, 1, 0, 2, 0, 0}); } TEST_F(PadTest, ComplexHalf) { - Complex({1, 32, 32, 4}, - {0, 0, 0, 0, 2, 2, 1, 1}, {0, 0, 2, 2, 1, 1, 0, 0}); - Complex({1, 31, 37, 16}, - {0, 0, 0, 0, 2, 0, 1, 0}, {0, 0, 2, 0, 1, 0, 0, 0}); - Complex({1, 128, 128, 32}, - {0, 0, 0, 0, 0, 1, 0, 2}, {0, 0, 0, 1, 0, 2, 0, 0}); + Complex({1, 32, 32, 4}, {0, 0, 0, 0, 2, 2, 1, 1}, + {0, 0, 2, 2, 1, 1, 0, 0}); + Complex({1, 31, 37, 16}, {0, 0, 0, 0, 2, 0, 1, 0}, + {0, 0, 2, 0, 1, 0, 0, 0}); + Complex({1, 128, 128, 32}, {0, 0, 0, 0, 0, 1, 0, 2}, + {0, 0, 0, 1, 0, 2, 0, 0}); } } // namespace test } // namespace ops } // namespace mace - diff --git a/mace/ops/pooling_test.cc b/mace/ops/pooling_test.cc index 9a2c769b..8e7fbb83 100644 --- a/mace/ops/pooling_test.cc +++ b/mace/ops/pooling_test.cc @@ -31,36 +31,32 @@ TEST_F(PoolingOpTest, MAX_VALID) { // Add input data net.AddInputFromArray( - "Input", {1, 4, 4, 2}, - {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, - 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); + "Input", {1, 4, 4, 2}, + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, + 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("Pooling", "PoolingTest") - .Input("InputNCHW") - .Output("OutputNCHW") - .AddIntsArg("kernels", {2, 2}) - .AddIntsArg("strides", {2, 2}) - .AddIntArg("padding", Padding::VALID) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("pooling_type", PoolingType::MAX) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Output("OutputNCHW") + .AddIntsArg("kernels", {2, 2}) + .AddIntsArg("strides", {2, 2}) + .AddIntArg("padding", Padding::VALID) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("pooling_type", PoolingType::MAX) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); // Check auto expected = - CreateTensor({1, 2, 2, 2}, {5, 21, 7, 23, 13, 29, 15, 31}); + CreateTensor({1, 2, 2, 2}, {5, 21, 7, 23, 13, 29, 15, 31}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } @@ -73,27 +69,23 @@ TEST_F(PoolingOpTest, MAX_SAME) { net.AddInputFromArray("Input", {1, 3, 3, 1}, {0, 1, 2, 3, 4, 5, 6, 7, 8}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("Pooling", "PoolingTest") - .Input("InputNCHW") - .Output("OutputNCHW") - .AddIntsArg("kernels", {2, 2}) - .AddIntsArg("strides", {2, 2}) - .AddIntArg("padding", Padding::SAME) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("pooling_type", PoolingType::MAX) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Output("OutputNCHW") + .AddIntsArg("kernels", {2, 2}) + .AddIntsArg("strides", {2, 2}) + .AddIntArg("padding", Padding::SAME) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("pooling_type", PoolingType::MAX) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); // Check @@ -108,30 +100,26 @@ TEST_F(PoolingOpTest, MAX_VALID_DILATION) { // Add input data net.AddInputFromArray( - "Input", {1, 4, 4, 1}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); + "Input", {1, 4, 4, 1}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("Pooling", "PoolingTest") - .Input("InputNCHW") - .Output("OutputNCHW") - .AddIntsArg("kernels", {2, 2}) - .AddIntsArg("strides", {1, 1}) - .AddIntArg("padding", Padding::VALID) - .AddIntsArg("dilations", {2, 2}) - .AddIntArg("pooling_type", PoolingType::MAX) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Output("OutputNCHW") + .AddIntsArg("kernels", {2, 2}) + .AddIntsArg("strides", {1, 1}) + .AddIntArg("padding", Padding::VALID) + .AddIntsArg("dilations", {2, 2}) + .AddIntArg("pooling_type", PoolingType::MAX) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); // Check @@ -146,31 +134,26 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) { // Add input data net.AddInputFromArray( - "Input", {1, 2, 9, 1}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}); + "Input", {1, 2, 9, 1}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("Pooling", "PoolingTest") - .Input("InputNCHW") - .Output("OutputNCHW") - .AddIntArg("pooling_type", PoolingType::MAX) - .AddIntsArg("kernels", {2, 2}) - .AddIntsArg("strides", {2, 2}) - .AddIntArg("padding", Padding::SAME) - .AddIntsArg("dilations", {1, 1}) - .Finalize(net.NewOperatorDef()); - + .Input("InputNCHW") + .Output("OutputNCHW") + .AddIntArg("pooling_type", PoolingType::MAX) + .AddIntsArg("kernels", {2, 2}) + .AddIntsArg("strides", {2, 2}) + .AddIntArg("padding", Padding::SAME) + .AddIntsArg("dilations", {1, 1}) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); // Check @@ -180,49 +163,45 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) { } namespace { -template +template void SimpleMaxPooling3S2() { // Construct graph OpsTestNet net; // Add input data net.AddInputFromArray( - "Input", {1, 3, 9, 1}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26}); + "Input", {1, 3, 9, 1}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26}); if (D == DeviceType::CPU) { - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); // Run OpDefBuilder("Pooling", "PoolingTest") - .Input("InputNCHW") - .Output("OutputNCHW") - .AddIntArg("pooling_type", PoolingType::MAX) - .AddIntsArg("kernels", {3, 3}) - .AddIntsArg("strides", {2, 2}) - .AddIntArg("padding", Padding::VALID) - .AddIntsArg("dilations", {1, 1}) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Output("OutputNCHW") + .AddIntArg("pooling_type", PoolingType::MAX) + .AddIntsArg("kernels", {3, 3}) + .AddIntsArg("strides", {2, 2}) + .AddIntArg("padding", Padding::VALID) + .AddIntsArg("dilations", {1, 1}) + .Finalize(net.NewOperatorDef()); net.RunOp(D); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); } else if (D == DeviceType::GPU) { BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Pooling", "PoolingTest") - .Input("InputImage") - .Output("OutputImage") - .AddIntArg("pooling_type", PoolingType::MAX) - .AddIntsArg("kernels", {3, 3}) - .AddIntsArg("strides", {2, 2}) - .AddIntArg("padding", Padding::VALID) - .AddIntsArg("dilations", {1, 1}) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Output("OutputImage") + .AddIntArg("pooling_type", PoolingType::MAX) + .AddIntsArg("kernels", {3, 3}) + .AddIntsArg("strides", {2, 2}) + .AddIntArg("padding", Padding::VALID) + .AddIntsArg("dilations", {1, 1}) + .Finalize(net.NewOperatorDef()); net.RunOp(D); ImageToBuffer(&net, "OutputImage", "Output", kernels::BufferType::IN_OUT_CHANNEL); @@ -237,12 +216,10 @@ void SimpleMaxPooling3S2() { TEST_F(PoolingOpTest, CPUSimpleMaxPooling3S2) { SimpleMaxPooling3S2(); } -TEST_F(PoolingOpTest, OPENCLSimpleMaxPooling3S2) { - SimpleMaxPooling3S2(); -} +TEST_F(PoolingOpTest, OPENCLSimpleMaxPooling3S2) { SimpleMaxPooling3S2(); } namespace { -template +template void MaxPooling3S2(const std::vector &input_shape, const std::vector strides, Padding padding) { @@ -252,27 +229,23 @@ void MaxPooling3S2(const std::vector &input_shape, // Add input data net.AddRandomInput("Input", input_shape); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("Pooling", "PoolingTest") - .Input("InputNCHW") - .Output("OutputNCHW") - .AddIntArg("pooling_type", PoolingType::MAX) - .AddIntsArg("kernels", {3, 3}) - .AddIntsArg("strides", strides) - .AddIntArg("padding", padding) - .AddIntsArg("dilations", {1, 1}) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Output("OutputNCHW") + .AddIntArg("pooling_type", PoolingType::MAX) + .AddIntsArg("kernels", {3, 3}) + .AddIntsArg("strides", strides) + .AddIntArg("padding", padding) + .AddIntsArg("dilations", {1, 1}) + .Finalize(net.NewOperatorDef()); // run on cpu net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); Tensor expected; @@ -281,22 +254,22 @@ void MaxPooling3S2(const std::vector &input_shape, BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Pooling", "PoolingTest") - .Input("InputImage") - .Output("OutputImage") - .AddIntArg("pooling_type", PoolingType::MAX) - .AddIntsArg("kernels", {3, 3}) - .AddIntsArg("strides", strides) - .AddIntArg("padding", padding) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Output("OutputImage") + .AddIntArg("pooling_type", PoolingType::MAX) + .AddIntsArg("kernels", {3, 3}) + .AddIntsArg("strides", strides) + .AddIntArg("padding", padding) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); net.RunOp(D); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DT_HALF) { - ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), - 1e-3, 1e-4); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-3, + 1e-4); } else { ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-5); } @@ -330,63 +303,58 @@ TEST_F(PoolingOpTest, AVG_VALID) { // Add input data net.AddInputFromArray( - "Input", {1, 4, 4, 2}, - {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, - 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); + "Input", {1, 4, 4, 2}, + {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, + 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("Pooling", "PoolingTest") - .Input("InputNCHW") - .Output("OutputNCHW") - .AddIntsArg("kernels", {2, 2}) - .AddIntsArg("strides", {2, 2}) - .AddIntArg("padding", Padding::VALID) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("pooling_type", PoolingType::AVG) - .Finalize(net.NewOperatorDef()); - + .Input("InputNCHW") + .Output("OutputNCHW") + .AddIntsArg("kernels", {2, 2}) + .AddIntsArg("strides", {2, 2}) + .AddIntArg("padding", Padding::VALID) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("pooling_type", PoolingType::AVG) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); // Check auto expected = CreateTensor( - {1, 2, 2, 2}, {2.5, 18.5, 4.5, 20.5, 10.5, 26.5, 12.5, 28.5}); + {1, 2, 2, 2}, {2.5, 18.5, 4.5, 20.5, 10.5, 26.5, 12.5, 28.5}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } namespace { -template +template void SimpleAvgPoolingTest() { // Construct graph OpsTestNet net; // Add input data net.AddInputFromArray( - "Input", {1, 2, 8, 1}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); + "Input", {1, 2, 8, 1}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}); BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Pooling", "PoolingTest") - .Input("InputImage") - .Output("OutputImage") - .AddIntArg("pooling_type", PoolingType::AVG) - .AddIntsArg("kernels", {2, 2}) - .AddIntsArg("strides", {2, 2}) - .AddIntArg("padding", Padding::SAME) - .AddIntsArg("dilations", {1, 1}) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Output("OutputImage") + .AddIntArg("pooling_type", PoolingType::AVG) + .AddIntsArg("kernels", {2, 2}) + .AddIntsArg("strides", {2, 2}) + .AddIntArg("padding", Padding::SAME) + .AddIntsArg("dilations", {1, 1}) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); ImageToBuffer(&net, "OutputImage", "Output", @@ -399,12 +367,10 @@ void SimpleAvgPoolingTest() { } } // namespace -TEST_F(PoolingOpTest, OPENCLSimpleAvgPooling) { - SimpleAvgPoolingTest(); -} +TEST_F(PoolingOpTest, OPENCLSimpleAvgPooling) { SimpleAvgPoolingTest(); } namespace { -template +template void AvgPoolingTest(const std::vector &shape, const std::vector &kernels, const std::vector &strides, @@ -415,27 +381,23 @@ void AvgPoolingTest(const std::vector &shape, // Add input data net.AddRandomInput("Input", shape); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("Pooling", "PoolingTest") - .Input("InputNCHW") - .Output("OutputNCHW") - .AddIntArg("pooling_type", PoolingType::AVG) - .AddIntsArg("kernels", kernels) - .AddIntsArg("strides", strides) - .AddIntArg("padding", padding) - .AddIntsArg("dilations", {1, 1}) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Output("OutputNCHW") + .AddIntArg("pooling_type", PoolingType::AVG) + .AddIntsArg("kernels", kernels) + .AddIntsArg("strides", strides) + .AddIntArg("padding", padding) + .AddIntsArg("dilations", {1, 1}) + .Finalize(net.NewOperatorDef()); // run on cpu net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); Tensor expected; @@ -444,68 +406,60 @@ void AvgPoolingTest(const std::vector &shape, BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Pooling", "PoolingTest") - .Input("InputImage") - .Output("OutputImage") - .AddIntArg("pooling_type", PoolingType::AVG) - .AddIntsArg("kernels", kernels) - .AddIntsArg("strides", strides) - .AddIntArg("padding", padding) - .AddIntsArg("dilations", {1, 1}) - .AddIntArg("T", static_cast(DataTypeToEnum::value)) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Output("OutputImage") + .AddIntArg("pooling_type", PoolingType::AVG) + .AddIntsArg("kernels", kernels) + .AddIntsArg("strides", strides) + .AddIntArg("padding", padding) + .AddIntsArg("dilations", {1, 1}) + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); net.RunOp(D); ImageToBuffer(&net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DT_HALF) { - ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), - 1e-3, 1e-3); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-3, + 1e-3); } else { - ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), - 1e-5); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-5); } } } // namespace TEST_F(PoolingOpTest, OPENCLAlignedAvgPooling) { - AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, - Padding::VALID); - AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, - Padding::SAME); + AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::VALID); + AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::SAME); } TEST_F(PoolingOpTest, OPENCLHalfAlignedAvgPooling) { - AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, - Padding::VALID); + AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::VALID); AvgPoolingTest({3, 15, 15, 128}, {4, 4}, {4, 4}, Padding::SAME); } TEST_F(PoolingOpTest, OPENCLAlignedLargeKernelAvgPooling) { AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, - Padding::VALID); + Padding::VALID); AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, - Padding::SAME); + Padding::SAME); } TEST_F(PoolingOpTest, OPENCLHalfAlignedLargeKernelAvgPooling) { AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, - Padding::VALID); + Padding::VALID); AvgPoolingTest({3, 64, 64, 128}, {16, 16}, {16, 16}, - Padding::SAME); + Padding::SAME); } TEST_F(PoolingOpTest, OPENCLUnAlignedAvgPooling) { - AvgPoolingTest({3, 31, 37, 128}, {2, 2}, {2, 2}, - Padding::VALID); - AvgPoolingTest({3, 31, 37, 128}, {2, 2}, {2, 2}, - Padding::SAME); + AvgPoolingTest({3, 31, 37, 128}, {2, 2}, {2, 2}, Padding::VALID); + AvgPoolingTest({3, 31, 37, 128}, {2, 2}, {2, 2}, Padding::SAME); } TEST_F(PoolingOpTest, OPENCLUnAlignedLargeKernelAvgPooling) { - AvgPoolingTest({3, 31, 37, 128}, {8, 8}, {8, 8}, - Padding::VALID); - AvgPoolingTest({3, 31, 37, 128}, {8, 8}, {8, 8}, - Padding::SAME); + AvgPoolingTest({3, 31, 37, 128}, {8, 8}, {8, 8}, Padding::VALID); + AvgPoolingTest({3, 31, 37, 128}, {8, 8}, {8, 8}, Padding::SAME); } } // namespace test diff --git a/mace/ops/proposal_test.cc b/mace/ops/proposal_test.cc index d36fd3a2..c5b71ad2 100644 --- a/mace/ops/proposal_test.cc +++ b/mace/ops/proposal_test.cc @@ -45,17 +45,17 @@ TEST_F(ProposalOpTest, CPUSimple) { .Finalize(net.NewOperatorDef()); std::vector scores(height * width * 18); - for (size_t i = 0 ; i < scores.size(); ++i) { + for (size_t i = 0; i < scores.size(); ++i) { scores[i] = i; } // Add input data - net.AddInputFromArray( - "RpnCLSProb", {1, height, width, 18}, scores); - net.AddRepeatedInput( - "RpnBBoxPred", {1, height, width, 4 * 9}, 1); - net.AddInputFromArray( - "ImgInfo", {1, 1, 1, 3}, {img_height, img_width, 2}); + net.AddInputFromArray("RpnCLSProb", + {1, height, width, 18}, scores); + net.AddRepeatedInput("RpnBBoxPred", + {1, height, width, 4 * 9}, 1); + net.AddInputFromArray("ImgInfo", {1, 1, 1, 3}, + {img_height, img_width, 2}); // Run net.RunOp(); @@ -65,7 +65,6 @@ TEST_F(ProposalOpTest, CPUSimple) { ExpectTensorNear(*expected_tensor, *net.GetTensor("Output"), 1e-5); } - } // namespace test } // namespace ops } // namespace mace diff --git a/mace/ops/quantize.cc b/mace/ops/quantize.cc index ffd4cd78..dad9610d 100644 --- a/mace/ops/quantize.cc +++ b/mace/ops/quantize.cc @@ -35,9 +35,9 @@ void Register_Dequantize(OperatorRegistry *op_registry) { void Register_Requantize(OperatorRegistry *op_registry) { MACE_REGISTER_OPERATOR(op_registry, OpKeyBuilder("Requantize") - .Device(DeviceType::CPU) - .TypeConstraint("T") - .Build(), + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), RequantizeOp); } diff --git a/mace/ops/quantize.h b/mace/ops/quantize.h index bf0d1534..ef2ec147 100644 --- a/mace/ops/quantize.h +++ b/mace/ops/quantize.h @@ -21,12 +21,11 @@ namespace mace { namespace ops { -template +template class QuantizeOp : public Operator { public: QuantizeOp(const OperatorDef &operator_def, Workspace *ws) - : Operator(operator_def, ws) { - } + : Operator(operator_def, ws) {} MaceStatus Run(StatsFuture *future) override { const Tensor *input = this->Input(INPUT); @@ -39,9 +38,9 @@ class QuantizeOp : public Operator { Tensor *output = this->Output(OUTPUT); Tensor *out_min = this->Output(OUT_MIN); Tensor *out_max = this->Output(OUT_MAX); - MACE_FAILURE_RETURN(output->ResizeLike(input)); - MACE_FAILURE_RETURN(out_min->ResizeLike(in_min)); - MACE_FAILURE_RETURN(out_max->ResizeLike(in_max)); + MACE_RETURN_IF_ERROR(output->ResizeLike(input)); + MACE_RETURN_IF_ERROR(out_min->ResizeLike(in_min)); + MACE_RETURN_IF_ERROR(out_max->ResizeLike(in_max)); return functor_(input, in_min, in_max, output, out_min, out_max, future); } @@ -54,12 +53,11 @@ class QuantizeOp : public Operator { MACE_OP_OUTPUT_TAGS(OUTPUT, OUT_MIN, OUT_MAX); }; -template +template class DequantizeOp : public Operator { public: DequantizeOp(const OperatorDef &operator_def, Workspace *ws) - : Operator(operator_def, ws) { - } + : Operator(operator_def, ws) {} MaceStatus Run(StatsFuture *future) override { const Tensor *input = this->Input(INPUT); @@ -70,7 +68,7 @@ class DequantizeOp : public Operator { MACE_CHECK(in_max->size() == 1, "max val tensor has more than 1 value"); Tensor *output = this->Output(OUTPUT); - MACE_FAILURE_RETURN(output->ResizeLike(input)); + MACE_RETURN_IF_ERROR(output->ResizeLike(input)); return functor_(input, in_min, in_max, output, future); } @@ -83,12 +81,11 @@ class DequantizeOp : public Operator { MACE_OP_OUTPUT_TAGS(OUTPUT); }; -template +template class RequantizeOp : public Operator { public: RequantizeOp(const OperatorDef &operator_def, Workspace *ws) - : Operator(operator_def, ws) { - } + : Operator(operator_def, ws) {} MaceStatus Run(StatsFuture *future) override { const Tensor *input = this->Input(INPUT); @@ -112,19 +109,12 @@ class RequantizeOp : public Operator { Tensor *output = this->Output(OUTPUT); Tensor *out_min = this->Output(OUT_MIN); Tensor *out_max = this->Output(OUT_MAX); - MACE_FAILURE_RETURN(output->ResizeLike(input)); - MACE_FAILURE_RETURN(out_min->ResizeLike(in_min)); - MACE_FAILURE_RETURN(out_max->ResizeLike(out_max)); - - return functor_(input, - in_min, - in_max, - rerange_min, - rerange_max, - output, - out_min, - out_max, - future); + MACE_RETURN_IF_ERROR(output->ResizeLike(input)); + MACE_RETURN_IF_ERROR(out_min->ResizeLike(in_min)); + MACE_RETURN_IF_ERROR(out_max->ResizeLike(out_max)); + + return functor_(input, in_min, in_max, rerange_min, rerange_max, output, + out_min, out_max, future); } private: diff --git a/mace/ops/quantize_test.cc b/mace/ops/quantize_test.cc index 536e9ad7..288b1faf 100644 --- a/mace/ops/quantize_test.cc +++ b/mace/ops/quantize_test.cc @@ -26,22 +26,21 @@ TEST_F(QuantizeTest, TestQuantize) { OpsTestNet net; // Add input data - net.AddInputFromArray("Input", {1, 2, 3, 1}, { - -2, -1, 1, 2, 3, 4 - }); + net.AddInputFromArray("Input", {1, 2, 3, 1}, + {-2, -1, 1, 2, 3, 4}); net.AddInputFromArray("InputMin", {1}, {-3}); net.AddInputFromArray("InputMax", {1}, {5}); OpDefBuilder("Quantize", "QuantizeTest") - .Input("Input") - .Input("InputMin") - .Input("InputMax") - .Output("Output") - .Output("OutputMin") - .Output("OutputMax") - .OutputType({DT_UINT8, DT_FLOAT, DT_FLOAT}) - .AddIntArg("T", DT_UINT8) - .Finalize(net.NewOperatorDef()); + .Input("Input") + .Input("InputMin") + .Input("InputMax") + .Output("Output") + .Output("OutputMin") + .Output("OutputMax") + .OutputType({DT_UINT8, DT_FLOAT, DT_FLOAT}) + .AddIntArg("T", DT_UINT8) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(); @@ -50,10 +49,8 @@ TEST_F(QuantizeTest, TestQuantize) { auto output_min = net.GetTensor("OutputMin"); auto output_max = net.GetTensor("OutputMax"); - auto expected_output = CreateTensor({1, 2, 3, 1}, - { - 32, 64, 127, 159, 191, 223 - }); + auto expected_output = + CreateTensor({1, 2, 3, 1}, {32, 64, 127, 159, 191, 223}); auto expected_min = CreateTensor({1}, {-3.01887}); auto expected_max = CreateTensor({1}, {5}); @@ -69,27 +66,25 @@ TEST_F(QuantizeTest, TestQuantizeTrend) { // Add input data net.AddRandomInput("Input", {100}); const float *input_data = net.GetTensor("Input")->data(); - net.AddInputFromArray("InputMin", - {1}, - {*std::min_element(input_data, - input_data - + net.GetTensor("Input")->size())}); - net.AddInputFromArray("InputMax", - {1}, - {*std::max_element(input_data, - input_data - + net.GetTensor("Input")->size())}); + net.AddInputFromArray( + "InputMin", {1}, + {*std::min_element(input_data, + input_data + net.GetTensor("Input")->size())}); + net.AddInputFromArray( + "InputMax", {1}, + {*std::max_element(input_data, + input_data + net.GetTensor("Input")->size())}); OpDefBuilder("Quantize", "QuantizeTest") - .Input("Input") - .Input("InputMin") - .Input("InputMax") - .Output("Output") - .Output("OutputMin") - .Output("OutputMax") - .OutputType({DT_UINT8, DT_FLOAT, DT_FLOAT}) - .AddIntArg("T", DT_UINT8) - .Finalize(net.NewOperatorDef()); + .Input("Input") + .Input("InputMin") + .Input("InputMax") + .Output("Output") + .Output("OutputMin") + .Output("OutputMax") + .OutputType({DT_UINT8, DT_FLOAT, DT_FLOAT}) + .AddIntArg("T", DT_UINT8) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(); @@ -113,29 +108,26 @@ TEST_F(QuantizeTest, TestDequantize) { OpsTestNet net; // Add input data - net.AddInputFromArray("Input", {1, 2, 3, 1}, { - 32, 64, 127, 159, 191, 223 - }); + net.AddInputFromArray("Input", {1, 2, 3, 1}, + {32, 64, 127, 159, 191, 223}); net.AddInputFromArray("InputMin", {1}, {-3.01887}); net.AddInputFromArray("InputMax", {1}, {5}); OpDefBuilder("Dequantize", "DequantizeTest") - .Input("Input") - .Input("InputMin") - .Input("InputMax") - .Output("Output") - .OutputType({DT_FLOAT}) - .AddIntArg("T", DT_UINT8) - .Finalize(net.NewOperatorDef()); + .Input("Input") + .Input("InputMin") + .Input("InputMax") + .Output("Output") + .OutputType({DT_FLOAT}) + .AddIntArg("T", DT_UINT8) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(); auto output = net.GetTensor("Output"); - auto expected_output = CreateTensor({1, 2, 3, 1}, - { - -2, -1, 1, 2, 3, 4 - }); + auto expected_output = + CreateTensor({1, 2, 3, 1}, {-2, -1, 1, 2, 3, 4}); auto expected_min = CreateTensor({1}, {-3.01887}); auto expected_max = CreateTensor({1}, {5}); @@ -147,35 +139,33 @@ TEST_F(QuantizeTest, TestRequantizeWithMinMax) { OpsTestNet net; // Add input data - net.AddInputFromArray("Input", {1, 2, 3, 1}, { - -1073741824, -536870912, 536870912, 1073741824, 1610612736, 2147483647 - }); + net.AddInputFromArray( + "Input", {1, 2, 3, 1}, + {-1073741824, -536870912, 536870912, 1073741824, 1610612736, 2147483647}); net.AddInputFromArray("InputMin", {1}, {-3}); net.AddInputFromArray("InputMax", {1}, {5}); net.AddInputFromArray("RerangeMin", {1}, {-3.01887}); net.AddInputFromArray("RerangeMax", {1}, {5}); OpDefBuilder("Requantize", "RequantizeTest") - .Input("Input") - .Input("InputMin") - .Input("InputMax") - .Input("RerangeMin") - .Input("RerangeMax") - .Output("Output") - .Output("OutputMin") - .Output("OutputMax") - .OutputType({DT_UINT8, DT_FLOAT, DT_FLOAT}) - .AddIntArg("T", DT_UINT8) - .Finalize(net.NewOperatorDef()); + .Input("Input") + .Input("InputMin") + .Input("InputMax") + .Input("RerangeMin") + .Input("RerangeMax") + .Output("Output") + .Output("OutputMin") + .Output("OutputMax") + .OutputType({DT_UINT8, DT_FLOAT, DT_FLOAT}) + .AddIntArg("T", DT_UINT8) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(); auto output = net.GetTensor("Output"); - auto expected_output = CreateTensor({1, 2, 3, 1}, - { - 32, 64, 128, 160, 191, 223 - }); + auto expected_output = + CreateTensor({1, 2, 3, 1}, {32, 64, 128, 160, 191, 223}); auto expected_min = CreateTensor({1}, {-3.01887}); auto expected_max = CreateTensor({1}, {5}); @@ -187,31 +177,29 @@ TEST_F(QuantizeTest, TestRequantizeWithoutMinMax) { OpsTestNet net; // Add input data - net.AddInputFromArray("Input", {1, 2, 3, 1}, { - -1073741824, -536870912, 536870912, 1073741824, 1610612736, 2147483647 - }); + net.AddInputFromArray( + "Input", {1, 2, 3, 1}, + {-1073741824, -536870912, 536870912, 1073741824, 1610612736, 2147483647}); net.AddInputFromArray("InputMin", {1}, {-3}); net.AddInputFromArray("InputMax", {1}, {5}); OpDefBuilder("Requantize", "RequantizeTest") - .Input("Input") - .Input("InputMin") - .Input("InputMax") - .Output("Output") - .Output("OutputMin") - .Output("OutputMax") - .OutputType({DT_UINT8, DT_FLOAT, DT_FLOAT}) - .AddIntArg("T", DT_UINT8) - .Finalize(net.NewOperatorDef()); + .Input("Input") + .Input("InputMin") + .Input("InputMax") + .Output("Output") + .Output("OutputMin") + .Output("OutputMax") + .OutputType({DT_UINT8, DT_FLOAT, DT_FLOAT}) + .AddIntArg("T", DT_UINT8) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(); auto output = net.GetTensor("Output"); - auto expected_output = CreateTensor({1, 2, 3, 1}, - { - 0, 43, 128, 170, 213, 255 - }); + auto expected_output = + CreateTensor({1, 2, 3, 1}, {0, 43, 128, 170, 213, 255}); auto expected_min = CreateTensor({1}, {-3.01887}); auto expected_max = CreateTensor({1}, {5}); ExpectTensorNear(*expected_output, *output); diff --git a/mace/ops/resize_bilinear.h b/mace/ops/resize_bilinear.h index 571009c4..fb389859 100644 --- a/mace/ops/resize_bilinear.h +++ b/mace/ops/resize_bilinear.h @@ -26,9 +26,8 @@ class ResizeBilinearOp : public Operator { public: ResizeBilinearOp(const OperatorDef &operator_def, Workspace *ws) : Operator(operator_def, ws), - functor_( - OperatorBase::GetRepeatedArgs("size", {-1, -1}), - OperatorBase::GetOptionalArg("align_corners", false)) {} + functor_(OperatorBase::GetRepeatedArgs("size", {-1, -1}), + OperatorBase::GetOptionalArg("align_corners", false)) {} MaceStatus Run(StatsFuture *future) override { const Tensor *input = this->Input(0); diff --git a/mace/ops/resize_bilinear_test.cc b/mace/ops/resize_bilinear_test.cc index 9b92e89f..49dda888 100644 --- a/mace/ops/resize_bilinear_test.cc +++ b/mace/ops/resize_bilinear_test.cc @@ -15,8 +15,8 @@ #include #include "mace/core/operator.h" -#include "mace/ops/resize_bilinear.h" #include "mace/ops/ops_test_util.h" +#include "mace/ops/resize_bilinear.h" namespace mace { namespace ops { @@ -33,22 +33,18 @@ TEST_F(ResizeBilinearTest, CPUResizeBilinearWOAlignCorners) { std::vector input(24); std::iota(begin(input), end(input), 0); net.AddInputFromArray("Input", {1, 2, 4, 3}, input); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("ResizeBilinear", "ResizeBilinearTest") - .Input("InputNCHW") - .Output("OutputNCHW") - .AddIntsArg("size", {1, 2}) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Output("OutputNCHW") + .AddIntsArg("size", {1, 2}) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); // Check @@ -66,26 +62,21 @@ TEST_F(ResizeBilinearTest, ResizeBilinearWAlignCorners) { std::vector input(24); std::iota(begin(input), end(input), 0); net.AddInputFromArray("Input", {1, 2, 4, 3}, input); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("ResizeBilinear", "ResizeBilinearTest") - .Input("InputNCHW") - .Output("OutputNCHW") - .AddIntArg("align_corners", 1) - .AddIntsArg("size", {1, 2}) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Output("OutputNCHW") + .AddIntArg("align_corners", 1) + .AddIntsArg("size", {1, 2}) + .Finalize(net.NewOperatorDef()); // Run net.RunOp(); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", + net.TransformDataFormat("OutputNCHW", NCHW, "Output", NHWC); - // Check auto expected = CreateTensor({1, 1, 2, 3}, {0, 1, 2, 9, 10, 11}); @@ -111,9 +102,7 @@ void TestRandomResizeBilinear() { // Add input data net.AddRandomInput("Input", {batch, in_height, in_width, channels}); - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("ResizeBilinear", "ResizeBilinearTest") @@ -124,10 +113,8 @@ void TestRandomResizeBilinear() { .Finalize(net.NewOperatorDef()); // Run on CPU net.RunOp(DeviceType::CPU); - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); Tensor expected; expected.Copy(*net.GetOutput("Output")); @@ -149,8 +136,8 @@ void TestRandomResizeBilinear() { kernels::BufferType::IN_OUT_CHANNEL); } // Check - ExpectTensorNear(expected, *net.GetOutput("DeviceOutput"), - 1e-5, 1e-6); + ExpectTensorNear(expected, *net.GetOutput("DeviceOutput"), 1e-5, + 1e-6); } } } // namespace diff --git a/mace/ops/slice.h b/mace/ops/slice.h index d917f132..7f01162f 100644 --- a/mace/ops/slice.h +++ b/mace/ops/slice.h @@ -32,12 +32,12 @@ class SliceOp : public Operator { MaceStatus Run(StatsFuture *future) override { MACE_CHECK(this->OutputSize() >= 2) - << "There must be at least two outputs for slicing"; + << "There must be at least two outputs for slicing"; const Tensor *input = this->Input(INPUT); const std::vector output_list = this->Outputs(); const int32_t slice_axis = OperatorBase::GetOptionalArg("axis", 3); MACE_CHECK((input->dim(slice_axis) % this->OutputSize()) == 0) - << "Outputs do not split input equally."; + << "Outputs do not split input equally."; return functor_(input, output_list, future); } diff --git a/mace/ops/slice_test.cc b/mace/ops/slice_test.cc index 92a54a86..b445d56a 100644 --- a/mace/ops/slice_test.cc +++ b/mace/ops/slice_test.cc @@ -16,8 +16,8 @@ #include #include "gmock/gmock.h" -#include "mace/ops/slice.h" #include "mace/ops/ops_test_util.h" +#include "mace/ops/slice.h" namespace mace { namespace ops { @@ -26,7 +26,7 @@ namespace test { class SliceOpTest : public OpsTestBase {}; namespace { -template +template void RandomTest(const int num_outputs, const int axis) { static unsigned int seed = time(NULL); const index_t output_channels = 4 * (1 + rand_r(&seed) % 10); @@ -43,10 +43,8 @@ void RandomTest(const int num_outputs, const int axis) { input_shape = {batch, input_channels, height, width}; else if (axis == 3) input_shape = {batch, height, width, input_channels}; - const index_t input_size = std::accumulate(input_shape.begin(), - input_shape.end(), - 1, - std::multiplies()); + const index_t input_size = std::accumulate( + input_shape.begin(), input_shape.end(), 1, std::multiplies()); std::vector input_data(input_size); GenerateRandomRealTypeData(input_shape, &input_data); net.AddInputFromArray("Input", input_shape, input_data); @@ -60,8 +58,7 @@ void RandomTest(const int num_outputs, const int axis) { for (int i = 0; i < num_outputs; ++i) { builder = builder.Output(MakeString("OutputImage", i)); } - builder - .AddIntArg("T", static_cast(DataTypeToEnum::value)) + builder.AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); } else { auto builder = OpDefBuilder("Slice", "SliceTest").AddIntArg("axis", axis); @@ -77,8 +74,7 @@ void RandomTest(const int num_outputs, const int axis) { if (D == DeviceType::GPU) { for (int i = 0; i < num_outputs; ++i) { - ImageToBuffer(&net, - MakeString("OutputImage", i), + ImageToBuffer(&net, MakeString("OutputImage", i), MakeString("Output", i), kernels::BufferType::IN_OUT_CHANNEL); } @@ -90,14 +86,12 @@ void RandomTest(const int num_outputs, const int axis) { expected_shape = {batch, output_channels, height, width}; else if (axis == 3) expected_shape = {batch, height, width, output_channels}; - const index_t outer_size = std::accumulate(expected_shape.begin(), - expected_shape.begin() + axis, - 1, - std::multiplies()); - const index_t inner_size = std::accumulate(expected_shape.begin() + axis + 1, - expected_shape.end(), - 1, - std::multiplies()); + const index_t outer_size = + std::accumulate(expected_shape.begin(), expected_shape.begin() + axis, 1, + std::multiplies()); + const index_t inner_size = + std::accumulate(expected_shape.begin() + axis + 1, expected_shape.end(), + 1, std::multiplies()); const float *input_ptr = input_data.data(); const float *output_ptr; for (int i = 0; i < num_outputs; ++i) { @@ -106,11 +100,11 @@ void RandomTest(const int num_outputs, const int axis) { Tensor::MappingGuard output_mapper(output); output_ptr = output->data(); for (int outer_idx = 0; outer_idx < outer_size; ++outer_idx) { - const int idx = (outer_idx * input_channels + i * output_channels) - * inner_size; + const int idx = + (outer_idx * input_channels + i * output_channels) * inner_size; for (int j = 0; j < output_channels * inner_size; ++j) { - ASSERT_NEAR(*output_ptr++, input_ptr[idx + j], 1e-2) << "with output " - << i << " index " << idx + j; + ASSERT_NEAR(*output_ptr++, input_ptr[idx + j], 1e-2) + << "with output " << i << " index " << idx + j; } } } diff --git a/mace/ops/softmax.h b/mace/ops/softmax.h index c6b81d9e..0a6868f0 100644 --- a/mace/ops/softmax.h +++ b/mace/ops/softmax.h @@ -31,7 +31,7 @@ class SoftmaxOp : public Operator { const Tensor *logits = this->Input(LOGITS); Tensor *output = this->Output(OUTPUT); - output->ResizeLike(logits); + MACE_RETURN_IF_ERROR(output->ResizeLike(logits)); return functor_(logits, output, future); } diff --git a/mace/ops/softmax_test.cc b/mace/ops/softmax_test.cc index 6de118d3..9997ee10 100644 --- a/mace/ops/softmax_test.cc +++ b/mace/ops/softmax_test.cc @@ -22,7 +22,7 @@ namespace test { class SoftmaxOpTest : public OpsTestBase {}; namespace { -template +template void Simple() { // Construct graph OpsTestNet net; @@ -33,9 +33,9 @@ void Simple() { if (D == DeviceType::CPU) { net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("Softmax", "SoftmaxTest") - .Input("InputNCHW") - .Output("OutputNCHW") - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Output("OutputNCHW") + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); @@ -45,9 +45,9 @@ void Simple() { kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Softmax", "SoftmaxTest") - .Input("InputImage") - .Output("OutputImage") - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Output("OutputImage") + .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); @@ -60,8 +60,8 @@ void Simple() { } auto expected = CreateTensor( - {1, 1, 2, 4}, - {0.25, 0.25, 0.25, 0.25, 0.0320586, 0.08714432, 0.23688282, 0.64391426}); + {1, 1, 2, 4}, + {0.25, 0.25, 0.25, 0.25, 0.0320586, 0.08714432, 0.23688282, 0.64391426}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } @@ -71,7 +71,7 @@ TEST_F(SoftmaxOpTest, CPUSimple) { Simple(); } TEST_F(SoftmaxOpTest, OPENCLSimple) { Simple(); } namespace { -template +template void Complex(const std::vector &logits_shape) { // Construct graph OpsTestNet net; @@ -81,9 +81,9 @@ void Complex(const std::vector &logits_shape) { net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("Softmax", "SoftmaxTest") - .Input("InputNCHW") - .Output("OutputNCHW") - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Output("OutputNCHW") + .Finalize(net.NewOperatorDef()); // Run on cpu net.RunOp(); @@ -97,9 +97,9 @@ void Complex(const std::vector &logits_shape) { kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Softmax", "SoftmaxTest") - .Input("InputImage") - .Output("OutputImage") - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Output("OutputImage") + .Finalize(net.NewOperatorDef()); // Run on gpu net.RunOp(D); @@ -108,8 +108,7 @@ void Complex(const std::vector &logits_shape) { ImageToBuffer(&net, "OutputImage", "OPENCLOutput", kernels::BufferType::IN_OUT_CHANNEL); - ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), - 1e-5); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-5); } } // namespace diff --git a/mace/ops/space_to_batch.h b/mace/ops/space_to_batch.h index faef4b57..7ce0dd13 100644 --- a/mace/ops/space_to_batch.h +++ b/mace/ops/space_to_batch.h @@ -29,16 +29,14 @@ class SpaceToBatchNDOp : public Operator { public: SpaceToBatchNDOp(const OperatorDef &op_def, Workspace *ws) : Operator(op_def, ws), - functor_( - OperatorBase::GetRepeatedArgs("paddings", {0, 0, 0, 0}), - OperatorBase::GetRepeatedArgs("block_shape", {1, 1}), - false) {} + functor_(OperatorBase::GetRepeatedArgs("paddings", {0, 0, 0, 0}), + OperatorBase::GetRepeatedArgs("block_shape", {1, 1}), + false) {} MaceStatus Run(StatsFuture *future) override { const Tensor *space_tensor = this->Input(INPUT); Tensor *batch_tensor = this->Output(OUTPUT); - return functor_(const_cast(space_tensor), batch_tensor, - future); + return functor_(const_cast(space_tensor), batch_tensor, future); } private: diff --git a/mace/ops/space_to_batch_test.cc b/mace/ops/space_to_batch_test.cc index cca39baf..5539bfd6 100644 --- a/mace/ops/space_to_batch_test.cc +++ b/mace/ops/space_to_batch_test.cc @@ -35,22 +35,20 @@ void RunSpaceToBatch(const std::vector &input_shape, BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest") - .Input("InputImage") - .Output("OutputImage") - .AddIntsArg("paddings", padding_data) - .AddIntsArg("block_shape", block_shape_data) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Output("OutputImage") + .AddIntsArg("paddings", padding_data) + .AddIntsArg("block_shape", block_shape_data) + .Finalize(net.NewOperatorDef()); } else if (D == CPU) { - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest") - .Input("InputNCHW") - .Output("OutputNCHW") - .AddIntsArg("paddings", padding_data) - .AddIntsArg("block_shape", block_shape_data) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Output("OutputNCHW") + .AddIntsArg("paddings", padding_data) + .AddIntsArg("block_shape", block_shape_data) + .Finalize(net.NewOperatorDef()); } // Run @@ -60,10 +58,8 @@ void RunSpaceToBatch(const std::vector &input_shape, ImageToBuffer(&net, "OutputImage", "Output", kernels::BufferType::IN_OUT_CHANNEL); } else if (D == CPU) { - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); } // Check ExpectTensorNear(*expected, *net.GetOutput("Output")); @@ -83,22 +79,20 @@ void RunBatchToSpace(const std::vector &input_shape, BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest") - .Input("InputImage") - .Output("OutputImage") - .AddIntsArg("crops", crops_data) - .AddIntsArg("block_shape", block_shape_data) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Output("OutputImage") + .AddIntsArg("crops", crops_data) + .AddIntsArg("block_shape", block_shape_data) + .Finalize(net.NewOperatorDef()); } else if (D == CPU) { - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest") - .Input("InputNCHW") - .Output("OutputNCHW") - .AddIntsArg("crops", crops_data) - .AddIntsArg("block_shape", block_shape_data) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Output("OutputNCHW") + .AddIntsArg("crops", crops_data) + .AddIntsArg("block_shape", block_shape_data) + .Finalize(net.NewOperatorDef()); } // Run @@ -108,10 +102,8 @@ void RunBatchToSpace(const std::vector &input_shape, ImageToBuffer(&net, "OutputImage", "Output", kernels::BufferType::IN_OUT_CHANNEL); } else if (D == CPU) { - net.TransformDataFormat("OutputNCHW", - NCHW, - "Output", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "Output", NHWC); } // Check ExpectTensorNear(*expected, *net.GetOutput("Output")); @@ -124,8 +116,8 @@ void TestBidirectionalTransform(const std::vector &space_shape, const std::vector &padding_data, const std::vector &batch_shape, const std::vector &batch_data) { - auto space_tensor = std::unique_ptr(new Tensor( - GetDeviceAllocator(DeviceType::GPU), DataTypeToEnum::v())); + auto space_tensor = std::unique_ptr( + new Tensor(GetDeviceAllocator(DeviceType::GPU), DataTypeToEnum::v())); space_tensor->Resize(space_shape); { Tensor::MappingGuard space_mapper(space_tensor.get()); @@ -136,8 +128,8 @@ void TestBidirectionalTransform(const std::vector &space_shape, memcpy(space_ptr, space_data.data(), space_data.size() * sizeof(T)); } - auto batch_tensor = std::unique_ptr(new Tensor( - GetDeviceAllocator(DeviceType::GPU), DataTypeToEnum::v())); + auto batch_tensor = std::unique_ptr( + new Tensor(GetDeviceAllocator(DeviceType::GPU), DataTypeToEnum::v())); batch_tensor->Resize(batch_shape); { Tensor::MappingGuard batch_mapper(batch_tensor.get()); @@ -147,12 +139,12 @@ void TestBidirectionalTransform(const std::vector &space_shape, } RunSpaceToBatch(space_shape, space_data, block_data, - padding_data, batch_tensor.get()); + padding_data, batch_tensor.get()); RunSpaceToBatch(space_shape, space_data, block_data, padding_data, batch_tensor.get()); RunBatchToSpace(batch_shape, batch_data, block_data, - padding_data, space_tensor.get()); + padding_data, space_tensor.get()); RunBatchToSpace(batch_shape, batch_data, block_data, padding_data, space_tensor.get()); } @@ -209,45 +201,41 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) { {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, {2, 2}, {0, 0, 0, 0}, {8, 1, 2, 2}, - {1, 2, 5, 6, 17, 18, 21, 22, 3, 4, 7, 8, 19, 20, 23, 24, + {1, 2, 5, 6, 17, 18, 21, 22, 3, 4, 7, 8, 19, 20, 23, 24, 9, 10, 13, 14, 25, 26, 29, 30, 11, 12, 15, 16, 27, 28, 31, 32}); } void TestSpaceToBatchLargeInput(const std::vector &input_shape, - const std::vector &block_shape_data, - const std::vector &padding_data) { + const std::vector &block_shape_data, + const std::vector &padding_data) { OpsTestNet net; net.AddRandomInput("Input", input_shape); // run gpu BufferToImage(&net, "Input", "InputImage", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest") - .Input("InputImage") - .Output("OutputImage") - .AddIntsArg("paddings", padding_data) - .AddIntsArg("block_shape", block_shape_data) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Output("OutputImage") + .AddIntsArg("paddings", padding_data) + .AddIntsArg("block_shape", block_shape_data) + .Finalize(net.NewOperatorDef()); net.RunOp(GPU); ImageToBuffer(&net, "OutputImage", "OutputGPU", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); // run cpu - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("SpaceToBatchND", "SpaceToBatchNDTest") - .Input("InputNCHW") - .Output("OutputNCHW") - .AddIntsArg("paddings", padding_data) - .AddIntsArg("block_shape", block_shape_data) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Output("OutputNCHW") + .AddIntsArg("paddings", padding_data) + .AddIntsArg("block_shape", block_shape_data) + .Finalize(net.NewOperatorDef()); net.RunOp(CPU); - net.TransformDataFormat("OutputNCHW", - NCHW, - "OutputCPU", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "OutputCPU", NHWC); // Check ExpectTensorNear(*net.GetOutput("OutputCPU"), @@ -255,8 +243,8 @@ void TestSpaceToBatchLargeInput(const std::vector &input_shape, } void TestoBatchToSpaceLargeInput(const std::vector &input_shape, - const std::vector &block_shape_data, - const std::vector &crops_data) { + const std::vector &block_shape_data, + const std::vector &crops_data) { OpsTestNet net; net.AddRandomInput("Input", input_shape); @@ -264,38 +252,33 @@ void TestoBatchToSpaceLargeInput(const std::vector &input_shape, BufferToImage(&net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest") - .Input("InputImage") - .Output("OutputImage") - .AddIntsArg("crops", crops_data) - .AddIntsArg("block_shape", block_shape_data) - .Finalize(net.NewOperatorDef()); + .Input("InputImage") + .Output("OutputImage") + .AddIntsArg("crops", crops_data) + .AddIntsArg("block_shape", block_shape_data) + .Finalize(net.NewOperatorDef()); net.RunOp(GPU); ImageToBuffer(&net, "OutputImage", "OutputGPU", kernels::BufferType::IN_OUT_CHANNEL); // run cpu - net.TransformDataFormat("Input", - NHWC, - "InputNCHW", + net.TransformDataFormat("Input", NHWC, "InputNCHW", NCHW); OpDefBuilder("BatchToSpaceND", "BatchToSpaceNDTest") - .Input("InputNCHW") - .Output("OutputNCHW") - .AddIntsArg("crops", crops_data) - .AddIntsArg("block_shape", block_shape_data) - .Finalize(net.NewOperatorDef()); + .Input("InputNCHW") + .Output("OutputNCHW") + .AddIntsArg("crops", crops_data) + .AddIntsArg("block_shape", block_shape_data) + .Finalize(net.NewOperatorDef()); net.RunOp(CPU); - net.TransformDataFormat("OutputNCHW", - NCHW, - "OutputCPU", - NHWC); + net.TransformDataFormat("OutputNCHW", NCHW, + "OutputCPU", NHWC); // Check ExpectTensorNear(*net.GetOutput("OutputCPU"), *net.GetOutput("OutputGPU")); } - TEST(SpaceToBatchTest, LargeData) { TestSpaceToBatchLargeInput({1, 256, 256, 32}, {8, 8}, {0, 0, 0, 0}); TestSpaceToBatchLargeInput({1, 256, 256, 32}, {8, 8}, {4, 4, 4, 4}); diff --git a/mace/ops/space_to_depth.h b/mace/ops/space_to_depth.h index cece41f8..44ca7e5c 100644 --- a/mace/ops/space_to_depth.h +++ b/mace/ops/space_to_depth.h @@ -24,20 +24,18 @@ namespace mace { namespace ops { -template +template class SpaceToDepthOp : public Operator { public: SpaceToDepthOp(const OperatorDef &op_def, Workspace *ws) - : Operator(op_def, ws), - functor_(OperatorBase::GetOptionalArg("block_size", 1), false) { - } + : Operator(op_def, ws), + functor_(OperatorBase::GetOptionalArg("block_size", 1), false) {} MaceStatus Run(StatsFuture *future) override { const Tensor *input = this->Input(INPUT); Tensor *output = this->Output(OUTPUT); MACE_CHECK(input->dim_size() == 4, "input dim should be 4"); - const int block_size = - OperatorBase::GetOptionalArg("block_size", 1); + const int block_size = OperatorBase::GetOptionalArg("block_size", 1); index_t input_height; index_t input_width; index_t input_depth; @@ -55,9 +53,9 @@ class SpaceToDepthOp : public Operator { MACE_CHECK((input_depth % 4) == 0, "input channel should be dividable by 4"); MACE_CHECK( - (input_width % block_size == 0) && (input_height % block_size == 0), - "input width and height should be dividable by block_size", - input->dim(3)); + (input_width % block_size == 0) && (input_height % block_size == 0), + "input width and height should be dividable by block_size", + input->dim(3)); return functor_(input, output, future); } diff --git a/mace/ops/transpose.cc b/mace/ops/transpose.cc index 9571fcbf..a0c726af 100644 --- a/mace/ops/transpose.cc +++ b/mace/ops/transpose.cc @@ -19,9 +19,9 @@ namespace ops { void Register_Transpose(OperatorRegistry *op_registry) { MACE_REGISTER_OPERATOR(op_registry, OpKeyBuilder("Transpose") - .Device(DeviceType::CPU) - .TypeConstraint("T") - .Build(), + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), TransposeOp); } diff --git a/mace/ops/transpose.h b/mace/ops/transpose.h index 5fb497ac..1ad73db9 100644 --- a/mace/ops/transpose.h +++ b/mace/ops/transpose.h @@ -18,31 +18,31 @@ #include #include "mace/core/operator.h" -#include "mace/kernels/transpose.h" #include "mace/kernels/softmax.h" +#include "mace/kernels/transpose.h" namespace mace { -template +template class TransposeOp : public Operator { public: TransposeOp(const OperatorDef &operator_def, Workspace *ws) - : Operator(operator_def, ws), - dims_(OperatorBase::GetRepeatedArgs("dims")), - functor_(dims_) {} + : Operator(operator_def, ws), + dims_(OperatorBase::GetRepeatedArgs("dims")), + functor_(dims_) {} MaceStatus Run(StatsFuture *future) override { const Tensor *input = this->Input(INPUT); Tensor *output = this->Output(OUTPUT); const std::vector &input_shape = input->shape(); - MACE_CHECK((input_shape.size() == 4 && dims_.size() == 4) - || (input_shape.size() == 2 && dims_.size() == 2), + MACE_CHECK((input_shape.size() == 4 && dims_.size() == 4) || + (input_shape.size() == 2 && dims_.size() == 2), "rank should be 2 or 4"); std::vector output_shape; for (size_t i = 0; i < dims_.size(); ++i) { output_shape.push_back(input_shape[dims_[i]]); } - MACE_FAILURE_RETURN(output->Resize(output_shape)); + MACE_RETURN_IF_ERROR(output->Resize(output_shape)); return functor_(input, output, future); } diff --git a/mace/ops/transpose_test.cc b/mace/ops/transpose_test.cc index 3a4b5729..76bfc57a 100644 --- a/mace/ops/transpose_test.cc +++ b/mace/ops/transpose_test.cc @@ -29,18 +29,16 @@ void TransposeNCHWTest(const std::vector &input_shape) { net.AddRandomInput("Input", input_shape); OpDefBuilder("Transpose", "TransposeNCHWTest") - .Input("Input") - .Output("Output") - .AddIntsArg("dims", {0, 3, 1, 2}) - .Finalize(net.NewOperatorDef()); + .Input("Input") + .Output("Output") + .AddIntsArg("dims", {0, 3, 1, 2}) + .Finalize(net.NewOperatorDef()); // Run on cpu net.RunOp(); - net.TransformDataFormat("Input", - DataFormat::NHWC, - "InputNCHW", - DataFormat::NCHW); + net.TransformDataFormat( + "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW); ExpectTensorNear(*net.GetOutput("InputNCHW"), *net.GetOutput("Output")); @@ -53,18 +51,16 @@ void TransposeNHWCTest(const std::vector &input_shape) { net.AddRandomInput("Input", input_shape); OpDefBuilder("Transpose", "TransposeNHWCTest") - .Input("Input") - .Output("Output") - .AddIntsArg("dims", {0, 2, 3, 1}) - .Finalize(net.NewOperatorDef()); + .Input("Input") + .Output("Output") + .AddIntsArg("dims", {0, 2, 3, 1}) + .Finalize(net.NewOperatorDef()); // Run on cpu net.RunOp(); - net.TransformDataFormat("Input", - DataFormat::NCHW, - "InputNHWC", - DataFormat::NHWC); + net.TransformDataFormat( + "Input", DataFormat::NCHW, "InputNHWC", DataFormat::NHWC); ExpectTensorNear(*net.GetOutput("InputNHWC"), *net.GetOutput("Output")); @@ -91,16 +87,15 @@ TEST_F(TransposeOpTest, Rank2) { net.AddInputFromArray("Input", {2, 3}, {1, 2, 3, 4, 5, 6}); OpDefBuilder("Transpose", "TransposeNCHWTest") - .Input("Input") - .Output("Output") - .AddIntsArg("dims", {1, 0}) - .Finalize(net.NewOperatorDef()); + .Input("Input") + .Output("Output") + .AddIntsArg("dims", {1, 0}) + .Finalize(net.NewOperatorDef()); // Run on cpu net.RunOp(); - net.AddInputFromArray("ExpectedOutput", - {3, 2}, + net.AddInputFromArray("ExpectedOutput", {3, 2}, {1, 4, 2, 5, 3, 6}); ExpectTensorNear(*net.GetOutput("ExpectedOutput"), diff --git a/mace/ops/winograd_convolution_test.cc b/mace/ops/winograd_convolution_test.cc index f6d06985..68890a91 100644 --- a/mace/ops/winograd_convolution_test.cc +++ b/mace/ops/winograd_convolution_test.cc @@ -83,7 +83,7 @@ void WinogradConvolution(const index_t batch, // Transfer output ImageToBuffer(&net, "OutputImage", "ConvOutput", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); Tensor expected; expected.Copy(*net.GetOutput("ConvOutput")); auto output_shape = expected.shape(); @@ -132,34 +132,29 @@ void WinogradConvolution(const index_t batch, ImageToBuffer(&net, "WinoOutputImage", "WinoOutput", kernels::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DataType::DT_HALF) { - ExpectTensorNear(expected, *net.GetOutput("WinoOutput"), - 1e-2, 1e-2); + ExpectTensorNear(expected, *net.GetOutput("WinoOutput"), 1e-2, 1e-2); } else { - ExpectTensorNear(expected, *net.GetOutput("WinoOutput"), - 1e-5, 1e-4); + ExpectTensorNear(expected, *net.GetOutput("WinoOutput"), 1e-5, 1e-4); } } } // namespace TEST_F(WinogradConvlutionTest, AlignedConvolution) { WinogradConvolution(1, 32, 32, 32, 16, - Padding::VALID); - WinogradConvolution(1, 32, 32, 32, 16, - Padding::SAME); + Padding::VALID); + WinogradConvolution(1, 32, 32, 32, 16, Padding::SAME); } TEST_F(WinogradConvlutionTest, UnAlignedConvolution) { WinogradConvolution(1, 61, 67, 31, 37, - Padding::VALID); - WinogradConvolution(1, 61, 67, 37, 31, - Padding::SAME); + Padding::VALID); + WinogradConvolution(1, 61, 67, 37, 31, Padding::SAME); } TEST_F(WinogradConvlutionTest, BatchConvolution) { WinogradConvolution(3, 64, 64, 32, 32, - Padding::VALID); - WinogradConvolution(5, 61, 67, 37, 31, - Padding::SAME); + Padding::VALID); + WinogradConvolution(5, 61, 67, 37, 31, Padding::SAME); } namespace { @@ -199,7 +194,7 @@ void WinogradConvolutionWithPad(const index_t batch, // Transfer output ImageToBuffer(&net, "OutputImage", "ConvOutput", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); Tensor expected; expected.Copy(*net.GetOutput("ConvOutput")); auto output_shape = expected.shape(); @@ -248,34 +243,26 @@ void WinogradConvolutionWithPad(const index_t batch, ImageToBuffer(&net, "WinoOutputImage", "WinoOutput", kernels::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DataType::DT_HALF) { - ExpectTensorNear(expected, *net.GetOutput("WinoOutput"), - 1e-2, 1e-2); + ExpectTensorNear(expected, *net.GetOutput("WinoOutput"), 1e-2, 1e-2); } else { - ExpectTensorNear(expected, *net.GetOutput("WinoOutput"), - 1e-5, 1e-4); + ExpectTensorNear(expected, *net.GetOutput("WinoOutput"), 1e-5, 1e-4); } } } // namespace TEST_F(WinogradConvlutionTest, AlignedConvolutionWithPad) { - WinogradConvolutionWithPad(1, 32, 32, 32, 16, - 1); - WinogradConvolutionWithPad(1, 32, 32, 32, 16, - 2); + WinogradConvolutionWithPad(1, 32, 32, 32, 16, 1); + WinogradConvolutionWithPad(1, 32, 32, 32, 16, 2); } TEST_F(WinogradConvlutionTest, UnAlignedConvolutionWithPad) { - WinogradConvolutionWithPad(1, 61, 67, 31, 37, - 1); - WinogradConvolutionWithPad(1, 61, 67, 37, 31, - 2); + WinogradConvolutionWithPad(1, 61, 67, 31, 37, 1); + WinogradConvolutionWithPad(1, 61, 67, 37, 31, 2); } TEST_F(WinogradConvlutionTest, BatchConvolutionWithPad) { - WinogradConvolutionWithPad(3, 64, 64, 32, 32, - 1); - WinogradConvolutionWithPad(5, 61, 67, 37, 31, - 2); + WinogradConvolutionWithPad(3, 64, 64, 32, 32, 1); + WinogradConvolutionWithPad(5, 61, 67, 37, 31, 2); } } // namespace test diff --git a/mace/public/mace.h b/mace/public/mace.h index b4511481..bd2e390b 100644 --- a/mace/public/mace.h +++ b/mace/public/mace.h @@ -65,7 +65,7 @@ enum MaceStatus { MACE_OUT_OF_RESOURCES = 2 }; -#define MACE_FAILURE_RETURN(stmt) \ +#define MACE_RETURN_IF_ERROR(stmt) \ { \ MaceStatus status = (stmt); \ if (status != MACE_SUCCESS) { \ -- GitLab