[LITE][BM] adaptive pool,test=develop

300d7628 · baolei.an · 32bf8eec · 300d7628 · 300d7628 · 300d7628
7 changed file
--- a/lite/api/_paddle_use_ops.h
+++ b/lite/api/_paddle_use_ops.h
@@ -48,6 +48,7 @@ USE_LITE_OP(concat)
 USE_LITE_OP(conv2d)
 USE_LITE_OP(depthwise_conv2d)
 USE_LITE_OP(pool2d)
+USE_LITE_OP(max_pool2d_with_index)
 USE_LITE_OP(batch_norm)
 USE_LITE_OP(fusion_elementwise_sub_activation)
 USE_LITE_OP(transpose)

--- a/lite/kernels/bm/bridges/CMakeLists.txt
+++ b/lite/kernels/bm/bridges/CMakeLists.txt
@@ -35,6 +35,7 @@ lite_cc_library(subgraph_bridge_assign_value_op_bm SRCS assign_value_op.cc DEPS
 lite_cc_library(subgraph_bridge_shape_op_bm SRCS shape_op.cc DEPS ${bm_subgraph_bridge_deps})
 lite_cc_library(subgraph_bridge_split_op_bm SRCS split_op.cc DEPS ${bm_subgraph_bridge_deps})
 lite_cc_library(subgraph_bridge_matmul_op_bm SRCS matmul_op.cc DEPS ${bm_subgraph_bridge_deps})
+lite_cc_library(subgraph_bridge_max_pool_with_index_op_bm SRCS max_pool_with_index_op.cc DEPS ${bm_subgraph_bridge_deps})

 set(bm_subgraph_bridges
        subgraph_bridge_registry
@@ -68,4 +69,5 @@ set(bm_subgraph_bridges
        subgraph_bridge_shape_op_bm
        subgraph_bridge_split_op_bm
        subgraph_bridge_matmul_op_bm
+        subgraph_bridge_max_pool_with_index_op_bm
        CACHE INTERNAL "bm_subgraph_bridges")
--- a/lite/kernels/bm/bridges/max_pool_with_index_op.cc
+++ b/lite/kernels/bm/bridges/max_pool_with_index_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <bmcompiler_defs.h>
+#include <bmcompiler_if.h>
+#include <bmcompiler_if_lite.h>
+#include <user_bmcpu_common.h>
+#include "lite/kernels/bm/bridges/graph.h"
+#include "lite/kernels/bm/bridges/utility.h"
+#include "lite/kernels/npu/bridges/registry.h"
+
+namespace paddle {
+namespace lite {
+namespace subgraph {
+namespace bm {
+
+int MaxPoolWithIndexConverter(void* ctx, OpLite* op, KernelBase* kernel) {
+  CHECK(ctx != nullptr);
+  CHECK(op != nullptr);
+  auto graph = static_cast<Graph*>(ctx);
+  auto scope = op->scope();
+  auto op_info = op->op_info();
+  auto op_type = op_info->Type();
+  // input
+  auto x_var_name = op_info->Input("X").front();
+  auto x = scope->FindVar(x_var_name)->GetMutable<lite::Tensor>();
+  auto x_dims = x->dims();
+  std::vector<int32_t> i_x_shape_data(x_dims.size());
+  for (size_t i = 0; i < x_dims.size(); i++) {
+    i_x_shape_data[i] = static_cast<int>(x_dims[i]);
+  }
+  // output
+  auto output_var_name = op_info->Output("Out").front();
+  auto output = scope->FindVar(output_var_name)->GetMutable<lite::Tensor>();
+  auto output_dims = output->dims();
+  std::vector<int32_t> i_output_shape_data(output_dims.size());
+  for (size_t i = 0; i < output_dims.size(); i++) {
+    i_output_shape_data[i] = static_cast<int>(output_dims[i]);
+  }
+
+  // ignore mask right now
+  auto ksize = op_info->GetAttr<std::vector<int>>("ksize");
+  auto paddings = op_info->GetAttr<std::vector<int>>("paddings");
+  auto strides = op_info->GetAttr<std::vector<int>>("strides");
+  auto global_pooling = op_info->GetAttr<bool>("global_pooling");
+  auto adaptive = op_info->GetAttr<bool>("adaptive");
+
+  if (global_pooling) {
+    paddings[0] = 0;
+    paddings[1] = 0;
+    ksize[0] = i_x_shape_data[2];
+    ksize[1] = i_x_shape_data[3];
+  }
+  CHECK_EQ(adaptive, true);
+  user_cpu_param_t bm_param;
+  bm_param.op_type = USER_PADDLE_ADAPTIVE_POOL;
+  bm_param.u.adaptive_pool_parm.is_avg = 0;
+  int32_t* in_shape[1];
+  int32_t in_dim[1];
+  const char* in_name[1];
+  in_shape[0] = &i_x_shape_data[0];
+  in_name[0] = static_cast<const char*>(x_var_name.c_str());
+  in_dim[0] = x_dims.size();
+  int32_t* shape[1];
+  int32_t dim[1];
+  const char* name[1];
+  shape[0] = &i_output_shape_data[0];
+  name[0] = static_cast<const char*>(output_var_name.c_str());
+  dim[0] = output_dims.size();
+  add_user_cpu_layer(graph->GetCompilerHandle(),
+                     1,
+                     in_shape,
+                     in_dim,
+                     in_name,
+                     1,
+                     shape,
+                     dim,
+                     name,
+                     &bm_param,
+                     static_cast<int>(sizeof(bm_param)));
+  graph->AddNode(output_var_name);
+  return SUCCESS;
+}
+
+}  // namespace bm
+}  // namespace subgraph
+}  // namespace lite
+}  // namespace paddle
+REGISTER_SUBGRAPH_BRIDGE(max_pool2d_with_index,
+                         kBM,
+                         paddle::lite::subgraph::bm::MaxPoolWithIndexConverter);
--- a/lite/kernels/bm/bridges/paddle_use_bridges.h
+++ b/lite/kernels/bm/bridges/paddle_use_bridges.h
@@ -58,3 +58,4 @@ USE_SUBGRAPH_BRIDGE(depthwise_conv2d_transpose, kBM);
 USE_SUBGRAPH_BRIDGE(shape, kBM);
 USE_SUBGRAPH_BRIDGE(split, kBM);
 USE_SUBGRAPH_BRIDGE(matmul, kBM);
+USE_SUBGRAPH_BRIDGE(max_pool2d_with_index, kBM);
--- a/lite/operators/CMakeLists.txt
+++ b/lite/operators/CMakeLists.txt
@@ -108,6 +108,7 @@ add_operator(collect_fpn_proposals_op_lite extra SRCS collect_fpn_proposals_op.c
 add_operator(distribute_fpn_proposals_op_lite extra SRCS distribute_fpn_proposals_op.cc DEPS ${op_DEPS})
 add_operator(crf_decoding_op_lite extra SRCS crf_decoding_op.cc DEPS ${op_DEPS})
 add_operator(ctc_align_op_lite extra SRCS ctc_align_op.cc DEPS ${op_DEPS})
+add_operator(max_pool_with_index_op extra SRCS max_pool_with_index_op.cc DEPS ${op_DEPS})

 # for OCR specific
 add_operator(while_op extra SRCS while_op.cc DEPS ${op_DEPS})

--- a/lite/operators/max_pool_with_index_op.cc
+++ b/lite/operators/max_pool_with_index_op.cc
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lite/operators/max_pool_with_index_op.h"
+#include <algorithm>
+#include "lite/core/op_registry.h"
+
+namespace paddle {
+namespace lite {
+namespace operators {
+
+bool MaxPoolWithIndexOpLite::CheckShape() const {
+  CHECK_OR_FALSE(param_.x);
+  CHECK_OR_FALSE(param_.output);
+
+  const auto& x_dims = param_.x->dims();
+  const auto& strides = param_.strides;
+  const auto& ksize = param_.ksize;
+  const auto& paddings = *param_.paddings;
+  // "Pooling intput should be 4-D or 5-D tensor."
+  CHECK_OR_FALSE(x_dims.size() == 4 || x_dims.size() == 5);
+  // Input size and pooling size should be consistent.
+  CHECK_OR_FALSE(x_dims.size() - ksize.size() == 2U);
+  // Strides size and pooling size should be the same.
+  CHECK_OR_FALSE(ksize.size() == strides.size());
+  // Paddings size must be 4.
+  CHECK_OR_FALSE(paddings.size() == 4L);
+
+  return true;
+}
+
+inline int MaxPoolOutputSize(int input_size,
+                             int filter_size,
+                             int padding,
+                             int stride) {
+  int output_size = (input_size - filter_size + 2 * padding) / stride + 1;
+  return output_size;
+}
+
+bool MaxPoolWithIndexOpLite::InferShapeImpl() const {
+  const auto x_dims = param_.x->dims();
+  const auto ksize = param_.ksize;
+  std::vector<int64_t> output_shape({x_dims[0], x_dims[1]});
+  const auto& strides = param_.strides;
+  const auto& paddings = *param_.paddings;
+  const auto adaptive = param_.adaptive;
+
+  if (adaptive) {
+    output_shape.insert(output_shape.end(), ksize.begin(), ksize.end());
+  } else {
+    for (size_t i = 0; i < ksize.size(); ++i) {
+      output_shape.push_back(
+          MaxPoolOutputSize(x_dims[i + 2], ksize[i], paddings[i], strides[i]));
+    }
+  }
+  param_.output->Resize(lite::DDim(output_shape));
+  return true;
+}
+
+}  // namespace operators
+}  // namespace lite
+}  // namespace paddle
+
+REGISTER_LITE_OP(max_pool2d_with_index,
+                 paddle::lite::operators::MaxPoolWithIndexOpLite);
--- a/lite/operators/max_pool_with_index_op.h
+++ b/lite/operators/max_pool_with_index_op.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <algorithm>
+#include <memory>
+#include <string>
+#include <vector>
+#include "lite/core/kernel.h"
+#include "lite/core/op_lite.h"
+#include "lite/core/scope.h"
+#include "lite/core/tensor.h"
+#include "lite/operators/op_params.h"
+#include "lite/utils/all.h"
+
+namespace paddle {
+namespace lite {
+namespace operators {
+
+class MaxPoolWithIndexOpLite : public OpLite {
+ public:
+  MaxPoolWithIndexOpLite() {}
+
+  explicit MaxPoolWithIndexOpLite(const std::string &type) : OpLite(type) {}
+
+  bool CheckShape() const override;
+
+  bool InferShapeImpl() const override;
+
+  // TODO(Superjomn) replace framework::OpDesc with a lite one.
+  bool AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) override {
+    auto x = op_desc.Input("X").front();
+    auto out = op_desc.Output("Out").front();
+    auto mask = op_desc.Output("Mask").front();
+
+    CHECK(scope->FindVar(x));
+    CHECK(scope->FindVar(out));
+    CHECK(scope->FindVar(mask));
+    param_.x = scope->FindVar(x)->GetMutable<lite::Tensor>();
+    param_.output = scope->FindVar(out)->GetMutable<lite::Tensor>();
+
+    param_.ksize = op_desc.GetAttr<std::vector<int>>("ksize");
+    param_.global_pooling = op_desc.GetAttr<bool>("global_pooling");
+    param_.strides = op_desc.GetAttr<std::vector<int>>("strides");
+    auto paddings = op_desc.GetAttr<std::vector<int>>("paddings");
+    if (op_desc.HasAttr("adaptive")) {
+      param_.adaptive = op_desc.GetAttr<bool>("adaptive");
+    }
+    // 2-pad to 4-pad
+    if (paddings.size() == 2L) {
+      for (size_t i = 0; i < 2L; ++i) {
+        int copy_pad = *(paddings.begin() + 2 * i);
+        paddings.insert(paddings.begin() + 2 * i + 1, copy_pad);
+      }
+    } else {
+      if (paddings.size() != 4L) {
+        LOG(FATAL)
+            << "Paddings size should be the same or twice as the inputs size.";
+      }
+    }
+    param_.paddings = std::make_shared<std::vector<int>>(paddings);
+    return true;
+  }
+
+  void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); }
+
+  std::string DebugString() const override { return "max_pool2d_with_index"; }
+
+ private:
+  mutable PoolParam param_;
+};
+
+}  // namespace operators
+}  // namespace lite
+}  // namespace paddle