Merge branch 'remove-unused-code' into 'master'

Fix winograd transformer bug and remove unused code. See merge request !609

Merge branch 'remove-unused-code' into 'master'
Fix winograd transformer bug and remove unused code. See merge request !609
7ac05858 · 刘琦 · 5247ac40 · 1132af81 · 7ac05858 · 7ac05858
11 changed file
--- a/mace/core/operator.cc
+++ b/mace/core/operator.cc
@@ -97,7 +97,6 @@ extern void Register_MatMul(OperatorRegistry *op_registry);
 extern void Register_Pad(OperatorRegistry *op_registry);
 extern void Register_Pooling(OperatorRegistry *op_registry);
 extern void Register_Proposal(OperatorRegistry *op_registry);
-extern void Register_PSROIAlign(OperatorRegistry *op_registry);
 extern void Register_Quantize(OperatorRegistry *op_registry);
 extern void Register_ReduceMean(OperatorRegistry *op_registry);
 extern void Register_Requantize(OperatorRegistry *op_registry);
@@ -146,7 +145,6 @@ OperatorRegistry::OperatorRegistry() {
  ops::Register_Pad(this);
  ops::Register_Pooling(this);
  ops::Register_Proposal(this);
-  ops::Register_PSROIAlign(this);
  ops::Register_Quantize(this);
  ops::Register_ReduceMean(this);
  ops::Register_Requantize(this);

--- a/mace/kernels/opencl/concat.cc
+++ b/mace/kernels/opencl/concat.cc
@@ -214,7 +214,7 @@ static MaceStatus ConcatN(cl::Kernel *kernel,
      MACE_CHECK(*kerror_code == 0) << "Kernel error code: " << *kerror_code;
      (*kernel_error)->UnMap();
    }
-    if (runtime->is_profiling_enabled()) {
+    if (future != nullptr && runtime->is_profiling_enabled()) {
      event.wait();
      CallStats tmp_stats;
      runtime->GetCallStats(event, &tmp_stats);

--- a/mace/kernels/opencl/slice.cc
+++ b/mace/kernels/opencl/slice.cc
@@ -114,7 +114,7 @@ MaceStatus SliceFunctor<DeviceType::GPU, T>::operator()(
      MACE_CHECK(*kerror_code == 0) << "Kernel error code: " << *kerror_code;
      kernel_error_->UnMap();
    }
-    if (runtime->is_profiling_enabled()) {
+    if (future != nullptr && runtime->is_profiling_enabled()) {
      event.wait();
      CallStats tmp_stats;
      runtime->GetCallStats(event, &tmp_stats);

--- a/mace/kernels/proposal.h
+++ b/mace/kernels/proposal.h
@@ -137,10 +137,10 @@ struct ProposalFunctor {
      anchors_(GenerateAnchors(scales, ratios, base_size)) {}

  MaceStatus operator()(const Tensor *rpn_cls_prob,
-                  const Tensor *rpn_bbox_pred,
-                  const Tensor *img_info_tensor,
-                  Tensor *output,
-                  StatsFuture *future) {
+                        const Tensor *rpn_bbox_pred,
+                        const Tensor *img_info_tensor,
+                        Tensor *output,
+                        StatsFuture *future) {
    MACE_UNUSED(future);
    MACE_CHECK(rpn_cls_prob->dim(1) == rpn_bbox_pred->dim(1) &&
        rpn_cls_prob->dim(2) == rpn_bbox_pred->dim(2));

--- a/mace/kernels/psroi_align.h
+++ b/mace/kernels/psroi_align.h
-// Copyright 2018 Xiaomi, Inc.  All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef MACE_KERNELS_PSROI_ALIGN_H_
-#define MACE_KERNELS_PSROI_ALIGN_H_
-
-#include <algorithm>
-#include <vector>
-
-#include "mace/core/future.h"
-#include "mace/core/tensor.h"
-#include "mace/public/mace.h"
-
-namespace mace {
-namespace kernels {
-
-template<DeviceType D, typename T>
-struct PSROIAlignFunctor {
-  PSROIAlignFunctor(const T spatial_scale,
-                    const int output_dim,
-                    const int group_size) :
-      spatial_scale_(spatial_scale),
-      output_dim_(output_dim),
-      group_size_(group_size) {}
-
-  MaceStatus operator()(const Tensor *input,
-                  const Tensor *rois,
-                  Tensor *output,
-                  StatsFuture *future) {
-    MACE_UNUSED(future);
-    const int height = static_cast<int>(input->dim(1));
-    const int width = static_cast<int>(input->dim(2));
-    const int channels = static_cast<int>(input->dim(3));
-    const int pooled_height = group_size_;
-    const int pooled_width = group_size_;
-    const T *input_ptr = input->data<T>();
-    const T *rois_ptr = rois->data<T>();
-    // Number of ROIs
-    const index_t num_rois = rois->dim(0);
-    const index_t batch_size = input->dim(0);
-
-    MACE_RETURN_IF_ERROR(output->Resize({num_rois, pooled_height, pooled_width,
-                                        output_dim_}));
-    T *output_ptr = output->mutable_data<T>();
-
-    for (int n = 0; n < num_rois; ++n) {
-      int roi_batch_ind = rois_ptr[0];
-      T roi_start_w =
-          static_cast<T>(rois_ptr[1]) * spatial_scale_;
-      T roi_start_h =
-          static_cast<T>(rois_ptr[2]) * spatial_scale_;
-      T roi_end_w =
-          static_cast<T>(rois_ptr[3] + 1.) * spatial_scale_;
-      T roi_end_h =
-          static_cast<T>(rois_ptr[4] + 1.) * spatial_scale_;
-      MACE_CHECK(roi_batch_ind >= 0);
-      MACE_CHECK(roi_batch_ind < batch_size);
-
-      // Force too small ROIs to be 1x1
-      T roi_width = std::max(roi_end_w - roi_start_w, static_cast<T>(0.1));
-      T roi_height = std::max(roi_end_h - roi_start_h, static_cast<T>(0.1));
-
-      // Compute w and h at bottom
-      T bin_size_h = roi_height / static_cast<T>(pooled_height);
-      T bin_size_w = roi_width / static_cast<T>(pooled_width);
-
-      const T *batch_data = input_ptr +
-          roi_batch_ind * height * width * channels;
-
-      std::vector<T> vhstart, vwstart, vhend, vwend;
-
-      for (int ph = 0; ph < pooled_height; ++ph) {
-        for (int pw = 0; pw < pooled_width; ++pw) {
-          T hstart = static_cast<T>(ph) * bin_size_h
-              + roi_start_h;
-          T wstart = static_cast<T>(pw) * bin_size_w
-              + roi_start_w;
-          T hend = static_cast<T>(ph + 1) * bin_size_h
-              + roi_start_h;
-          T wend = static_cast<T>(pw + 1) * bin_size_w
-              + roi_start_w;
-          // Add roi offsets and clip to input boundaries
-          hstart = std::min(std::max(hstart, static_cast<T>(0.)),
-                            static_cast<T>(height));
-          hend = std::min(std::max(hend, static_cast<T>(0.)),
-                          static_cast<T>(height));
-          wstart = std::min(std::max(wstart, static_cast<T>(0.)),
-                            static_cast<T>(width));
-          wend = std::min(std::max(wend, static_cast<T>(0.)),
-                          static_cast<T>(width));
-
-          vhstart.push_back(hstart);
-          vwstart.push_back(wstart);
-          vhend.push_back(hend);
-          vwend.push_back(wend);
-        }
-      }
-
-#pragma omp parallel for collapse(3)
-      for (int ph = 0; ph < pooled_height; ++ph) {
-        for (int pw = 0; pw < pooled_width; ++pw) {
-          for (int c = 0; c < output_dim_; ++c) {
-            const int pool_index = ph * pooled_width + pw;
-            const int out_idx = pool_index * output_dim_ + c;
-            const int in_chan_idx = (c * pooled_height + ph)
-                * pooled_width + pw;
-            T hstart = vhstart[pool_index];
-            T hend = vhend[pool_index];
-            T wstart = vwstart[pool_index];
-            T wend = vwend[pool_index];
-            bool is_empty = (hend <= hstart) || (wend <= wstart);
-
-            T out_sum = 0;
-            for (T h = hstart; h < hend; h += 1.) {
-              for (T w = wstart; w < wend; w += 1.) {
-                // Selecting four regular locations for bilinear interpolation
-                int x_left = std::floor(w);
-                int x_right = std::ceil(w);
-                int y_bottom = std::floor(h);
-                int y_top = std::ceil(h);
-
-                int top_left_index = (y_top * width + x_left)
-                    * channels + in_chan_idx;
-                int top_right_index = (y_top * width + x_right)
-                    * channels + in_chan_idx;
-                int bottom_left_index = (y_bottom * width + x_left)
-                    * channels + in_chan_idx;
-                int bottom_right_index = (y_bottom * width + x_right)
-                    * channels + in_chan_idx;
-
-                bool is_top_left_in = x_left >= 0 && x_left <= width - 1
-                    && y_top >= 0 && y_top <= height - 1;
-                bool is_top_right_in = x_right >= 0 && x_right <= width - 1
-                    && y_top >= 0 && y_top <= height - 1;
-                bool is_bottom_left_in = x_left >= 0 && x_left <= width - 1
-                    && y_bottom >= 0 && y_bottom <= height - 1;
-                bool is_bottom_right_in = x_right >= 0 && x_right <= width - 1
-                    && y_bottom >= 0 && y_bottom <= height - 1;
-
-                if (is_top_left_in) {
-                  out_sum += (1 - w + x_left) * (1 - y_top + h)
-                      * batch_data[top_left_index];
-                }
-                if (is_top_right_in) {
-                  out_sum += (1 - x_right + w) * (1 - y_top + h)
-                      * batch_data[top_right_index];
-                }
-                if (is_bottom_left_in) {
-                  out_sum += (1 - w + x_left) * (1 - h + y_bottom)
-                      * batch_data[bottom_left_index];
-                }
-                if (is_bottom_right_in) {
-                  out_sum += (1 - x_right + w) * (1 - h + y_bottom)
-                      * batch_data[bottom_right_index];
-                }
-              }
-            }
-
-            T bin_area = (hend - hstart) * (wend - wstart);
-            output_ptr[out_idx] = is_empty ? 0. : out_sum / bin_area;
-          }
-        }
-      }
-
-      // Increment ROI data pointer
-      rois_ptr += 5;
-      output_ptr += pooled_height * pooled_width * output_dim_;
-    }
-
-    return MACE_SUCCESS;
-  }
-
-  const T spatial_scale_;
-  const int output_dim_;
-  const int group_size_;
-};
-
-}  // namespace kernels
-}  // namespace mace
-
-#endif  // MACE_KERNELS_PSROI_ALIGN_H_
--- a/mace/ops/psroi_align.cc
+++ b/mace/ops/psroi_align.cc
-// Copyright 2018 Xiaomi, Inc.  All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "mace/ops/psroi_align.h"
-
-namespace mace {
-namespace ops {
-
-void Register_PSROIAlign(OperatorRegistry *op_registry) {
-  MACE_REGISTER_OPERATOR(op_registry, OpKeyBuilder("PSROIAlign")
-                                          .Device(DeviceType::CPU)
-                                          .TypeConstraint<float>("T")
-                                          .Build(),
-                         PSROIAlignOp<DeviceType::CPU, float>);
-}
-
-}  // namespace ops
-}  // namespace mace
--- a/mace/ops/psroi_align.h
+++ b/mace/ops/psroi_align.h
-// Copyright 2018 Xiaomi, Inc.  All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef MACE_OPS_PSROI_ALIGN_H_
-#define MACE_OPS_PSROI_ALIGN_H_
-
-#include "mace/core/operator.h"
-#include "mace/kernels/psroi_align.h"
-
-namespace mace {
-namespace ops {
-
-template <DeviceType D, class T>
-class PSROIAlignOp : public Operator<D, T> {
- public:
-  PSROIAlignOp(const OperatorDef &operator_def, Workspace *ws)
-      : Operator<D, T>(operator_def, ws),
-        functor_(OperatorBase::GetOptionalArg<T>("spatial_scale", 0),
-                 OperatorBase::GetOptionalArg<int>("output_dim", 0),
-                 OperatorBase::GetOptionalArg<int>("group_size", 0)) {}
-
-  MaceStatus Run(StatsFuture *future) override {
-    const Tensor *input = this->Input(INPUT);
-    const Tensor *rois = this->Input(ROIS);
-
-    Tensor *output = this->Output(OUTPUT);
-
-    return functor_(input, rois, output, future);
-  }
-
- private:
-  kernels::PSROIAlignFunctor<D, T> functor_;
-
- protected:
-  MACE_OP_INPUT_TAGS(INPUT, ROIS);
-  MACE_OP_OUTPUT_TAGS(OUTPUT);
-};
-
-}  // namespace ops
-}  // namespace mace
-
-#endif  // MACE_OPS_PSROI_ALIGN_H_
--- a/mace/python/tools/converter_tool/base_converter.py
+++ b/mace/python/tools/converter_tool/base_converter.py
@@ -92,7 +92,6 @@ MaceSupportedOps = [
    'Pad',
    'Pooling',
    'Proposal',
-    'PSROIAlign',
    'Quantize',
    'ReduceMean',
    'Requantize',

--- a/mace/python/tools/converter_tool/transformer.py
+++ b/mace/python/tools/converter_tool/transformer.py
@@ -467,14 +467,15 @@ class Transformer(base_converter.ConverterInterface):
        if filter_height != 3 or filter_width != 3 or strides[0] > 1 \
                or strides[1] > 1 or dilations[0] > 1 or dilations[1] > 1:
            return False
-        block_size = self._gpu_wino_blk
+        self._gpu_wino_blk = self._option.winograd
+        block_size = self._option.winograd
        blk_sqr = (block_size + 2) * (block_size + 2)
        width =\
            batch * ((out_height + block_size - 1) / block_size) *\
            ((out_width + block_size - 1) / block_size)
-        if blk_sqr * in_channels > OPENCL_IMAGE_MAX_SIZE \
-                or blk_sqr * out_channels > OPENCL_IMAGE_MAX_SIZE \
-                or width > OPENCL_IMAGE_MAX_SIZE:
+        if blk_sqr * in_channels >= OPENCL_IMAGE_MAX_SIZE \
+                or blk_sqr * out_channels >= OPENCL_IMAGE_MAX_SIZE \
+                or width >= OPENCL_IMAGE_MAX_SIZE:
            self._gpu_wino_blk = 2
            block_size = self._gpu_wino_blk
            blk_sqr = (block_size + 2) * (block_size + 2)

--- a/tools/converter.py
+++ b/tools/converter.py
@@ -808,6 +808,7 @@ def build_specific_lib(target_abi, target_soc, serial_num,
    sh_commands.build_benchmark_model(target_abi,
                                      build_tmp_binary_dir,
                                      hexagon_mode,
+                                      enable_openmp,
                                      linkshared)

    # generate library

--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py
@@ -1051,16 +1051,17 @@ def packaging_lib(libmace_output_dir, project_name):
 def build_benchmark_model(abi,
                          model_output_dir,
                          hexagon_mode,
+                          enable_openmp,
                          linkshared=False):
-    if linkshared == 0:
+    if not linkshared:
        target_name = "benchmark_model_static"
    else:
        target_name = "benchmark_model_shared"
-        benchmark_target = "//mace/benchmark:benchmark_model_shared"
    benchmark_target = "//mace/benchmark:%s" % target_name

    bazel_build(benchmark_target,
                abi=abi,
+                enable_openmp=enable_openmp,
                hexagon_mode=hexagon_mode)

    benchmark_binary_file = "%s/%s" % (model_output_dir, target_name)