diff --git a/mace/core/operator.cc b/mace/core/operator.cc index 3e3d2ba9a47813cece91e0f5de44c93e81acd25a..9add3e1a08e9235301058d9460955a099155334d 100644 --- a/mace/core/operator.cc +++ b/mace/core/operator.cc @@ -97,7 +97,6 @@ extern void Register_MatMul(OperatorRegistry *op_registry); extern void Register_Pad(OperatorRegistry *op_registry); extern void Register_Pooling(OperatorRegistry *op_registry); extern void Register_Proposal(OperatorRegistry *op_registry); -extern void Register_PSROIAlign(OperatorRegistry *op_registry); extern void Register_Quantize(OperatorRegistry *op_registry); extern void Register_ReduceMean(OperatorRegistry *op_registry); extern void Register_Requantize(OperatorRegistry *op_registry); @@ -146,7 +145,6 @@ OperatorRegistry::OperatorRegistry() { ops::Register_Pad(this); ops::Register_Pooling(this); ops::Register_Proposal(this); - ops::Register_PSROIAlign(this); ops::Register_Quantize(this); ops::Register_ReduceMean(this); ops::Register_Requantize(this); diff --git a/mace/kernels/opencl/concat.cc b/mace/kernels/opencl/concat.cc index 4ccb42a167715f4a20c94095da5ca256fccf1bdc..65540e486c606cfc53c3d92e3f05236c931cb139 100644 --- a/mace/kernels/opencl/concat.cc +++ b/mace/kernels/opencl/concat.cc @@ -214,7 +214,7 @@ static MaceStatus ConcatN(cl::Kernel *kernel, MACE_CHECK(*kerror_code == 0) << "Kernel error code: " << *kerror_code; (*kernel_error)->UnMap(); } - if (runtime->is_profiling_enabled()) { + if (future != nullptr && runtime->is_profiling_enabled()) { event.wait(); CallStats tmp_stats; runtime->GetCallStats(event, &tmp_stats); diff --git a/mace/kernels/opencl/slice.cc b/mace/kernels/opencl/slice.cc index f865091fd75f0175fb2245965ca0731f2f93bf22..160ad003df465891c0b23a3659635dfb699c5be4 100644 --- a/mace/kernels/opencl/slice.cc +++ b/mace/kernels/opencl/slice.cc @@ -114,7 +114,7 @@ MaceStatus SliceFunctor::operator()( MACE_CHECK(*kerror_code == 0) << "Kernel error code: " << *kerror_code; kernel_error_->UnMap(); } - if (runtime->is_profiling_enabled()) { + if (future != nullptr && runtime->is_profiling_enabled()) { event.wait(); CallStats tmp_stats; runtime->GetCallStats(event, &tmp_stats); diff --git a/mace/kernels/proposal.h b/mace/kernels/proposal.h index a1277f4ce230efda081857326196fe037bacc6a6..748264e5fe503884804da023d4064e3d82a98d5c 100644 --- a/mace/kernels/proposal.h +++ b/mace/kernels/proposal.h @@ -137,10 +137,10 @@ struct ProposalFunctor { anchors_(GenerateAnchors(scales, ratios, base_size)) {} MaceStatus operator()(const Tensor *rpn_cls_prob, - const Tensor *rpn_bbox_pred, - const Tensor *img_info_tensor, - Tensor *output, - StatsFuture *future) { + const Tensor *rpn_bbox_pred, + const Tensor *img_info_tensor, + Tensor *output, + StatsFuture *future) { MACE_UNUSED(future); MACE_CHECK(rpn_cls_prob->dim(1) == rpn_bbox_pred->dim(1) && rpn_cls_prob->dim(2) == rpn_bbox_pred->dim(2)); diff --git a/mace/kernels/psroi_align.h b/mace/kernels/psroi_align.h deleted file mode 100644 index 757bec3cc2b409ff96320d4711467a302f7f8b5b..0000000000000000000000000000000000000000 --- a/mace/kernels/psroi_align.h +++ /dev/null @@ -1,192 +0,0 @@ -// Copyright 2018 Xiaomi, Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_KERNELS_PSROI_ALIGN_H_ -#define MACE_KERNELS_PSROI_ALIGN_H_ - -#include -#include - -#include "mace/core/future.h" -#include "mace/core/tensor.h" -#include "mace/public/mace.h" - -namespace mace { -namespace kernels { - -template -struct PSROIAlignFunctor { - PSROIAlignFunctor(const T spatial_scale, - const int output_dim, - const int group_size) : - spatial_scale_(spatial_scale), - output_dim_(output_dim), - group_size_(group_size) {} - - MaceStatus operator()(const Tensor *input, - const Tensor *rois, - Tensor *output, - StatsFuture *future) { - MACE_UNUSED(future); - const int height = static_cast(input->dim(1)); - const int width = static_cast(input->dim(2)); - const int channels = static_cast(input->dim(3)); - const int pooled_height = group_size_; - const int pooled_width = group_size_; - const T *input_ptr = input->data(); - const T *rois_ptr = rois->data(); - // Number of ROIs - const index_t num_rois = rois->dim(0); - const index_t batch_size = input->dim(0); - - MACE_RETURN_IF_ERROR(output->Resize({num_rois, pooled_height, pooled_width, - output_dim_})); - T *output_ptr = output->mutable_data(); - - for (int n = 0; n < num_rois; ++n) { - int roi_batch_ind = rois_ptr[0]; - T roi_start_w = - static_cast(rois_ptr[1]) * spatial_scale_; - T roi_start_h = - static_cast(rois_ptr[2]) * spatial_scale_; - T roi_end_w = - static_cast(rois_ptr[3] + 1.) * spatial_scale_; - T roi_end_h = - static_cast(rois_ptr[4] + 1.) * spatial_scale_; - MACE_CHECK(roi_batch_ind >= 0); - MACE_CHECK(roi_batch_ind < batch_size); - - // Force too small ROIs to be 1x1 - T roi_width = std::max(roi_end_w - roi_start_w, static_cast(0.1)); - T roi_height = std::max(roi_end_h - roi_start_h, static_cast(0.1)); - - // Compute w and h at bottom - T bin_size_h = roi_height / static_cast(pooled_height); - T bin_size_w = roi_width / static_cast(pooled_width); - - const T *batch_data = input_ptr + - roi_batch_ind * height * width * channels; - - std::vector vhstart, vwstart, vhend, vwend; - - for (int ph = 0; ph < pooled_height; ++ph) { - for (int pw = 0; pw < pooled_width; ++pw) { - T hstart = static_cast(ph) * bin_size_h - + roi_start_h; - T wstart = static_cast(pw) * bin_size_w - + roi_start_w; - T hend = static_cast(ph + 1) * bin_size_h - + roi_start_h; - T wend = static_cast(pw + 1) * bin_size_w - + roi_start_w; - // Add roi offsets and clip to input boundaries - hstart = std::min(std::max(hstart, static_cast(0.)), - static_cast(height)); - hend = std::min(std::max(hend, static_cast(0.)), - static_cast(height)); - wstart = std::min(std::max(wstart, static_cast(0.)), - static_cast(width)); - wend = std::min(std::max(wend, static_cast(0.)), - static_cast(width)); - - vhstart.push_back(hstart); - vwstart.push_back(wstart); - vhend.push_back(hend); - vwend.push_back(wend); - } - } - -#pragma omp parallel for collapse(3) - for (int ph = 0; ph < pooled_height; ++ph) { - for (int pw = 0; pw < pooled_width; ++pw) { - for (int c = 0; c < output_dim_; ++c) { - const int pool_index = ph * pooled_width + pw; - const int out_idx = pool_index * output_dim_ + c; - const int in_chan_idx = (c * pooled_height + ph) - * pooled_width + pw; - T hstart = vhstart[pool_index]; - T hend = vhend[pool_index]; - T wstart = vwstart[pool_index]; - T wend = vwend[pool_index]; - bool is_empty = (hend <= hstart) || (wend <= wstart); - - T out_sum = 0; - for (T h = hstart; h < hend; h += 1.) { - for (T w = wstart; w < wend; w += 1.) { - // Selecting four regular locations for bilinear interpolation - int x_left = std::floor(w); - int x_right = std::ceil(w); - int y_bottom = std::floor(h); - int y_top = std::ceil(h); - - int top_left_index = (y_top * width + x_left) - * channels + in_chan_idx; - int top_right_index = (y_top * width + x_right) - * channels + in_chan_idx; - int bottom_left_index = (y_bottom * width + x_left) - * channels + in_chan_idx; - int bottom_right_index = (y_bottom * width + x_right) - * channels + in_chan_idx; - - bool is_top_left_in = x_left >= 0 && x_left <= width - 1 - && y_top >= 0 && y_top <= height - 1; - bool is_top_right_in = x_right >= 0 && x_right <= width - 1 - && y_top >= 0 && y_top <= height - 1; - bool is_bottom_left_in = x_left >= 0 && x_left <= width - 1 - && y_bottom >= 0 && y_bottom <= height - 1; - bool is_bottom_right_in = x_right >= 0 && x_right <= width - 1 - && y_bottom >= 0 && y_bottom <= height - 1; - - if (is_top_left_in) { - out_sum += (1 - w + x_left) * (1 - y_top + h) - * batch_data[top_left_index]; - } - if (is_top_right_in) { - out_sum += (1 - x_right + w) * (1 - y_top + h) - * batch_data[top_right_index]; - } - if (is_bottom_left_in) { - out_sum += (1 - w + x_left) * (1 - h + y_bottom) - * batch_data[bottom_left_index]; - } - if (is_bottom_right_in) { - out_sum += (1 - x_right + w) * (1 - h + y_bottom) - * batch_data[bottom_right_index]; - } - } - } - - T bin_area = (hend - hstart) * (wend - wstart); - output_ptr[out_idx] = is_empty ? 0. : out_sum / bin_area; - } - } - } - - // Increment ROI data pointer - rois_ptr += 5; - output_ptr += pooled_height * pooled_width * output_dim_; - } - - return MACE_SUCCESS; - } - - const T spatial_scale_; - const int output_dim_; - const int group_size_; -}; - -} // namespace kernels -} // namespace mace - -#endif // MACE_KERNELS_PSROI_ALIGN_H_ diff --git a/mace/ops/psroi_align.cc b/mace/ops/psroi_align.cc deleted file mode 100644 index 17a02d61da819ef88beb84f24ffb9b2dc6176901..0000000000000000000000000000000000000000 --- a/mace/ops/psroi_align.cc +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright 2018 Xiaomi, Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "mace/ops/psroi_align.h" - -namespace mace { -namespace ops { - -void Register_PSROIAlign(OperatorRegistry *op_registry) { - MACE_REGISTER_OPERATOR(op_registry, OpKeyBuilder("PSROIAlign") - .Device(DeviceType::CPU) - .TypeConstraint("T") - .Build(), - PSROIAlignOp); -} - -} // namespace ops -} // namespace mace diff --git a/mace/ops/psroi_align.h b/mace/ops/psroi_align.h deleted file mode 100644 index b75956827b56b1c27c033fc2855eebd751cfb236..0000000000000000000000000000000000000000 --- a/mace/ops/psroi_align.h +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright 2018 Xiaomi, Inc. All rights reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef MACE_OPS_PSROI_ALIGN_H_ -#define MACE_OPS_PSROI_ALIGN_H_ - -#include "mace/core/operator.h" -#include "mace/kernels/psroi_align.h" - -namespace mace { -namespace ops { - -template -class PSROIAlignOp : public Operator { - public: - PSROIAlignOp(const OperatorDef &operator_def, Workspace *ws) - : Operator(operator_def, ws), - functor_(OperatorBase::GetOptionalArg("spatial_scale", 0), - OperatorBase::GetOptionalArg("output_dim", 0), - OperatorBase::GetOptionalArg("group_size", 0)) {} - - MaceStatus Run(StatsFuture *future) override { - const Tensor *input = this->Input(INPUT); - const Tensor *rois = this->Input(ROIS); - - Tensor *output = this->Output(OUTPUT); - - return functor_(input, rois, output, future); - } - - private: - kernels::PSROIAlignFunctor functor_; - - protected: - MACE_OP_INPUT_TAGS(INPUT, ROIS); - MACE_OP_OUTPUT_TAGS(OUTPUT); -}; - -} // namespace ops -} // namespace mace - -#endif // MACE_OPS_PSROI_ALIGN_H_ diff --git a/mace/python/tools/converter_tool/base_converter.py b/mace/python/tools/converter_tool/base_converter.py index 5b5b041a5d33323c841ee9ce9c1d33f14608ab22..b3731b9803524f6c06f96de5548199a7295715fa 100644 --- a/mace/python/tools/converter_tool/base_converter.py +++ b/mace/python/tools/converter_tool/base_converter.py @@ -92,7 +92,6 @@ MaceSupportedOps = [ 'Pad', 'Pooling', 'Proposal', - 'PSROIAlign', 'Quantize', 'ReduceMean', 'Requantize', diff --git a/mace/python/tools/converter_tool/transformer.py b/mace/python/tools/converter_tool/transformer.py index a6d8e42c24885555bd74229fb3c38d888f9baa4f..7f37fa136b219b3df96e2b5f33ac6ea22e6f2e68 100644 --- a/mace/python/tools/converter_tool/transformer.py +++ b/mace/python/tools/converter_tool/transformer.py @@ -467,14 +467,15 @@ class Transformer(base_converter.ConverterInterface): if filter_height != 3 or filter_width != 3 or strides[0] > 1 \ or strides[1] > 1 or dilations[0] > 1 or dilations[1] > 1: return False - block_size = self._gpu_wino_blk + self._gpu_wino_blk = self._option.winograd + block_size = self._option.winograd blk_sqr = (block_size + 2) * (block_size + 2) width =\ batch * ((out_height + block_size - 1) / block_size) *\ ((out_width + block_size - 1) / block_size) - if blk_sqr * in_channels > OPENCL_IMAGE_MAX_SIZE \ - or blk_sqr * out_channels > OPENCL_IMAGE_MAX_SIZE \ - or width > OPENCL_IMAGE_MAX_SIZE: + if blk_sqr * in_channels >= OPENCL_IMAGE_MAX_SIZE \ + or blk_sqr * out_channels >= OPENCL_IMAGE_MAX_SIZE \ + or width >= OPENCL_IMAGE_MAX_SIZE: self._gpu_wino_blk = 2 block_size = self._gpu_wino_blk blk_sqr = (block_size + 2) * (block_size + 2) diff --git a/tools/converter.py b/tools/converter.py index ea212ec99e63f2066aa200a05a323673c438c95f..3a8d0276598f4e1a378e7044bd1c27239327e181 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -808,6 +808,7 @@ def build_specific_lib(target_abi, target_soc, serial_num, sh_commands.build_benchmark_model(target_abi, build_tmp_binary_dir, hexagon_mode, + enable_openmp, linkshared) # generate library diff --git a/tools/sh_commands.py b/tools/sh_commands.py index 3e231ca823db732bf4a9ff88395fb17e002c1749..df23c488d1d59f7fa8f68e7ae4828a633273981d 100644 --- a/tools/sh_commands.py +++ b/tools/sh_commands.py @@ -1051,16 +1051,17 @@ def packaging_lib(libmace_output_dir, project_name): def build_benchmark_model(abi, model_output_dir, hexagon_mode, + enable_openmp, linkshared=False): - if linkshared == 0: + if not linkshared: target_name = "benchmark_model_static" else: target_name = "benchmark_model_shared" - benchmark_target = "//mace/benchmark:benchmark_model_shared" benchmark_target = "//mace/benchmark:%s" % target_name bazel_build(benchmark_target, abi=abi, + enable_openmp=enable_openmp, hexagon_mode=hexagon_mode) benchmark_binary_file = "%s/%s" % (model_output_dir, target_name)