Fix ONNX Upsample

5660644a · Bin Li · 11f20df4 · 5660644a · 5660644a · 5660644a
6 changed file
--- a/mace/ops/opencl/image/resize_bilinear.cc
+++ b/mace/ops/opencl/image/resize_bilinear.cc
@@ -25,6 +25,8 @@ namespace image {
 MaceStatus ResizeBilinearKernel::Compute(
    OpContext *context,
    const Tensor *input,
+    const index_t out_height,
+    const index_t out_width,
    Tensor *output) {
  const index_t batch = input->dim(0);
  const index_t in_height = input->dim(1);
@@ -32,8 +34,6 @@ MaceStatus ResizeBilinearKernel::Compute(
  const index_t channels = input->dim(3);

  const index_t channel_blocks = RoundUpDiv4(channels);
-  const index_t out_height = out_height_;
-  const index_t out_width = out_width_;

  const uint32_t gws[3] = {static_cast<uint32_t>(channel_blocks),
                           static_cast<uint32_t>(out_width),

--- a/mace/ops/opencl/image/resize_bilinear.h
+++ b/mace/ops/opencl/image/resize_bilinear.h
@@ -66,22 +66,18 @@ inline std::vector<uint32_t> LocalWS(OpenCLRuntime *runtime,

 class ResizeBilinearKernel : public OpenCLResizeBilinearKernel {
 public:
-  ResizeBilinearKernel(bool align_corners,
-                       const index_t out_height,
-                       const index_t out_width)
-      : align_corners_(align_corners),
-        out_height_(out_height),
-        out_width_(out_width) {}
+  explicit ResizeBilinearKernel(bool align_corners)
+      : align_corners_(align_corners) {}

  MaceStatus Compute(
      OpContext *context,
      const Tensor *input,
+      const index_t out_height,
+      const index_t out_width,
      Tensor *output) override;

 private:
  bool align_corners_;
-  index_t out_height_;
-  index_t out_width_;
  cl::Kernel kernel_;
  uint32_t kwg_size_;
  std::vector<index_t> input_shape_;

--- a/mace/ops/opencl/resize_bilinear.h
+++ b/mace/ops/opencl/resize_bilinear.h
@@ -30,6 +30,8 @@ class OpenCLResizeBilinearKernel {
  virtual MaceStatus Compute(
      OpContext *context,
      const Tensor *input,
+      const index_t out_height,
+      const index_t out_width,
      Tensor *output) = 0;
  MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLResizeBilinearKernel);
 };

--- a/mace/ops/resize_bilinear.cc
+++ b/mace/ops/resize_bilinear.cc
@@ -181,7 +181,9 @@ class ResizeBilinearOp<DeviceType::CPU, T> : public Operation {
  explicit ResizeBilinearOp(OpConstructContext *context)
      : Operation(context),
        align_corners_(Operation::GetOptionalArg<bool>("align_corners", false)),
-        size_(Operation::GetRepeatedArgs<index_t>("size", {-1, -1})) {}
+        size_(Operation::GetRepeatedArgs<index_t>("size", {-1, -1})),
+        height_scale_(Operation::GetOptionalArg<float>("height_scale", 0)),
+        width_scale_(Operation::GetOptionalArg<float>("width_scale", 0)) {}

  MaceStatus Run(OpContext *context) override {
    MACE_UNUSED(context);
@@ -196,9 +198,16 @@ class ResizeBilinearOp<DeviceType::CPU, T> : public Operation {
    const index_t in_height = input->dim(2);
    const index_t in_width = input->dim(3);

-    index_t out_height = size_[0];
-    index_t out_width = size_[1];
-    MACE_CHECK(out_height > 0 && out_width > 0);
+    index_t out_height = 0;
+    index_t out_width = 0;
+    if (height_scale_ > 0) {  // for ONNX
+      out_height = static_cast<index_t>(height_scale_ * in_height);
+      out_width = static_cast<index_t>(width_scale_ * in_width);
+    } else {  // for tensor (Tf and Caffe)
+      out_height = size_[0];
+      out_width = size_[1];
+    }
+    MACE_CHECK(out_height > 0 && out_width > 0, out_height, out_width);
    std::vector<index_t> out_shape{batch, channels, out_height, out_width};
    MACE_RETURN_IF_ERROR(output->Resize(out_shape));

@@ -214,14 +223,15 @@ class ResizeBilinearOp<DeviceType::CPU, T> : public Operation {
      return MaceStatus::MACE_SUCCESS;
    }

-    float height_scale =
-        common::utils::CalculateResizeScale(in_height,
-                                            out_height,
-                                            align_corners_);
-    float width_scale =
-        common::utils::CalculateResizeScale(in_width,
-                                            out_width,
-                                            align_corners_);
+    // ONNX's scale is the opposite of ours
+    float height_scale = height_scale_ > 0 ? 1 / height_scale_ :
+                         common::utils::CalculateResizeScale(in_height,
+                                                             out_height,
+                                                             align_corners_);
+    float width_scale = width_scale_ > 0 ? 1 / width_scale_ :
+                        common::utils::CalculateResizeScale(in_width,
+                                                            out_width,
+                                                            align_corners_);

    std::vector<CachedInterpolation> ys(out_height + 1);
    std::vector<CachedInterpolation> xs(out_width + 1);
@@ -248,6 +258,8 @@ class ResizeBilinearOp<DeviceType::CPU, T> : public Operation {
 private:
  bool align_corners_;
  std::vector<index_t> size_;
+  float height_scale_;
+  float width_scale_;
 };

 #ifdef MACE_ENABLE_QUANTIZE
@@ -257,7 +269,9 @@ class ResizeBilinearOp<DeviceType::CPU, uint8_t> : public Operation {
  explicit ResizeBilinearOp(OpConstructContext *context)
      : Operation(context),
        align_corners_(Operation::GetOptionalArg<bool>("align_corners", false)),
-        size_(Operation::GetRepeatedArgs<index_t>("size", {-1, -1})) {}
+        size_(Operation::GetRepeatedArgs<index_t>("size", {-1, -1})),
+        height_scale_(Operation::GetOptionalArg<float>("height_scale", 0)),
+        width_scale_(Operation::GetOptionalArg<float>("width_scale", 0)) {}

  MaceStatus Run(OpContext *context) override {
    MACE_UNUSED(context);
@@ -272,8 +286,15 @@ class ResizeBilinearOp<DeviceType::CPU, uint8_t> : public Operation {
    const index_t in_width = input->dim(2);
    const index_t channels = input->dim(3);

-    index_t out_height = size_[0];
-    index_t out_width = size_[1];
+    index_t out_height = 0;
+    index_t out_width = 0;
+    if (height_scale_ > 0) {  // for ONNX
+      out_height = static_cast<index_t>(height_scale_ * in_height);
+      out_width = static_cast<index_t>(width_scale_ * in_width);
+    } else {  // for tensor (Tf and Caffe)
+      out_height = size_[0];
+      out_width = size_[1];
+    }
    MACE_CHECK(out_height > 0 && out_width > 0);
    std::vector<index_t> out_shape{batch, out_height, out_width, channels};
    MACE_RETURN_IF_ERROR(output->Resize(out_shape));
@@ -290,14 +311,15 @@ class ResizeBilinearOp<DeviceType::CPU, uint8_t> : public Operation {
      return MaceStatus::MACE_SUCCESS;
    }

-    float height_scale =
-        common::utils::CalculateResizeScale(in_height,
-                                            out_height,
-                                            align_corners_);
-    float width_scale =
-        common::utils::CalculateResizeScale(in_width,
-                                            out_width,
-                                            align_corners_);
+    // ONNX's scale is the opposite of ours
+    float height_scale = height_scale_ > 0 ? 1 / height_scale_ :
+                         common::utils::CalculateResizeScale(in_height,
+                                                             out_height,
+                                                             align_corners_);
+    float width_scale = width_scale_ > 0 ? 1 / width_scale_ :
+                        common::utils::CalculateResizeScale(in_width,
+                                                            out_width,
+                                                            align_corners_);

    std::vector<CachedInterpolation> ys(out_height + 1);
    std::vector<CachedInterpolation> xs(out_width + 1);
@@ -324,6 +346,8 @@ class ResizeBilinearOp<DeviceType::CPU, uint8_t> : public Operation {
 private:
  bool align_corners_;
  std::vector<index_t> size_;
+  float height_scale_;
+  float width_scale_;
 };
 #endif  // MACE_ENABLE_QUANTIZE

@@ -332,15 +356,14 @@ template<>
 class ResizeBilinearOp<DeviceType::GPU, float> : public Operation {
 public:
  explicit ResizeBilinearOp(OpConstructContext *context)
-      : Operation(context) {
+      : Operation(context),
+        size_(Operation::GetRepeatedArgs<index_t>("size", {-1, -1})),
+        height_scale_(Operation::GetOptionalArg<float>("height_scale", 0)),
+        width_scale_(Operation::GetOptionalArg<float>("width_scale", 0))  {
    bool align_corners = Operation::GetOptionalArg<bool>(
        "align_corners", false);
-    std::vector<index_t> size = Operation::GetRepeatedArgs<index_t>(
-        "size", {-1, -1});
-    MACE_CHECK(size.size() == 2);
    if (context->GetOpMemoryType() == MemoryType::GPU_IMAGE) {
-      kernel_ = make_unique<opencl::image::ResizeBilinearKernel>(
-          align_corners, size[0], size[1]);
+      kernel_ = make_unique<opencl::image::ResizeBilinearKernel>(align_corners);
    } else {
      MACE_NOT_IMPLEMENTED;
    }
@@ -351,11 +374,25 @@ class ResizeBilinearOp<DeviceType::GPU, float> : public Operation {
    MACE_CHECK(input->dim_size() == 4, "input must be 4-dimensional.",
               input->dim_size());

-    return kernel_->Compute(context, input, output);
+    index_t out_height = 0;
+    index_t out_width = 0;
+    if (height_scale_ > 0) {  // for ONNX
+      out_height = static_cast<index_t>(height_scale_ * input->dim(1));
+      out_width = static_cast<index_t>(width_scale_ * input->dim(2));
+    } else {  // for tensor (Tf and Caffe)
+      out_height = size_[0];
+      out_width = size_[1];
+    }
+    MACE_CHECK(out_height > 0 && out_width > 0);
+
+    return kernel_->Compute(context, input, out_height, out_width, output);
  }

 private:
  std::unique_ptr<OpenCLResizeBilinearKernel> kernel_;
+  std::vector<index_t> size_;
+  float height_scale_;
+  float width_scale_;
 };
 #endif  // MACE_ENABLE_OPENCL


--- a/mace/ops/resize_nearest_neighbor.cc
+++ b/mace/ops/resize_nearest_neighbor.cc
@@ -97,10 +97,10 @@ class ResizeNearestNeighborOp<DeviceType::CPU, T> : public Operation {

    index_t out_height = 0;
    index_t out_width = 0;
-    if (height_scale_ > 0) {  // for Caffe
+    if (height_scale_ > 0) {  // for Caffe and ONNX
      out_height = static_cast<index_t>(height_scale_ * in_height);
      out_width = static_cast<index_t>(width_scale_ * in_width);
-    } else {  // for tensor (Tf and ONNX)
+    } else {  // for tensor (Tf)
      const Tensor *size = this->Input(1);
      Tensor::MappingGuard size_mapper(size);
      MACE_CHECK(size->dim_size() == 1,
@@ -124,7 +124,7 @@ class ResizeNearestNeighborOp<DeviceType::CPU, T> : public Operation {
      return MaceStatus::MACE_SUCCESS;
    }

-    // Caffe's scale is the opposite of ours
+    // Caffe/ONNX's scale is the opposite of ours
    float height_scale = height_scale_ > 0 ? 1 / height_scale_ :
                         common::utils::CalculateResizeScale(in_height,
                                                             out_height,
@@ -179,17 +179,17 @@ class ResizeNearestNeighborOp<DeviceType::GPU, float> : public Operation {

    index_t out_height = 0;
    index_t out_width = 0;
-    if (height_scale_ > 0) {  // for Caffe
+    if (height_scale_ > 0) {  // for Caffe and ONNX
      out_height = static_cast<index_t>(height_scale_ * input->dim(1));
      out_width = static_cast<index_t>(width_scale_ * input->dim(2));
-    } else if (dim_.size() < 2) {  // for variable tensor (Tf and ONNX)
+    } else if (dim_.size() < 2) {  // for variable tensor (Tf)
      const Tensor *size = this->Input(1);
      Tensor::MappingGuard size_mapper(size);
      MACE_CHECK(size->dim_size() == 1,
                 "size must be 1-dimensional.", size->dim_size());
      out_height = size->data<int32_t>()[0];
      out_width = size->data<int32_t>()[1];
-    } else {  // for const tensor (Tf and ONNX)
+    } else {  // for const tensor (Tf)
      out_height = dim_[0];
      out_width = dim_[1];
    }

--- a/tools/python/transform/onnx_converter.py
+++ b/tools/python/transform/onnx_converter.py
@@ -1502,21 +1502,19 @@ class OnnxConverter(base_converter.ConverterInterface):

    def convert_upsample(self, node):
        op = self.convert_general_op(node)
-        del op.input[1:]  # cut all unnecessary inputs (onnx>=1.5)

-        output_size = self._graph_shapes_dict[op.output[0]]
-        output_size = np.array(output_size[-2:]).astype(np.int32)
        if node.attrs['mode'] == 'nearest':
            op.type = MaceOp.ResizeNearestNeighbor.name
-            size_tensor_name = op.name + ":size"
-            self.add_tensor(size_tensor_name, output_size.shape,
-                            mace_pb2.DT_INT32, output_size)
-            op.input.append(size_tensor_name)
        else:
            op.type = MaceOp.ResizeBilinear.name
-            size_arg = op.arg.add()
-            size_arg.name = MaceKeyword.mace_resize_size_str
-            size_arg.ints.extend(output_size.tolist())
+
+        scale_tensor = self._consts[node.inputs[1]]
+        height_scale_arg = op.arg.add()
+        height_scale_arg.name = MaceKeyword.mace_height_scale_str
+        width_scale_arg = op.arg.add()
+        width_scale_arg.name = MaceKeyword.mace_width_scale_str
+        height_scale_arg.f = scale_tensor.float_data[2]
+        width_scale_arg.f = scale_tensor.float_data[3]

        align_corners_arg = op.arg.add()
        align_corners_arg.name = MaceKeyword.mace_align_corners_str