add setvalue trt converter (#50341)

* add cast setvalue op * add set_value to op teller * renew test and add description * add setAxis and add complex test * change test

add setvalue trt converter (#50341)
* add cast setvalue op * add set_value to op teller * renew test and add description * add setAxis and add complex test * change test
2548657e · xjmxyt · GitHub · cf48d20f · 2548657e · 2548657e
5 changed file
--- a/paddle/fluid/inference/api/analysis_predictor.cc
+++ b/paddle/fluid/inference/api/analysis_predictor.cc
@@ -2526,6 +2526,9 @@ USE_TRT_CONVERTER(preln_groupnorm_act)
 USE_TRT_CONVERTER(flash_multihead_matmul)
 USE_TRT_CONVERTER(cross_multihead_matmul)
 #endif
+#if IS_TRT_VERSION_GE(8200)
+USE_TRT_CONVERTER(set_value)
+#endif
 #if PADDLE_WITH_CUSPARSELT && IS_TRT_VERSION_GE(8000)
 USE_TRT_CONVERTER(sparse_fc)
 USE_TRT_CONVERTER(sparse_multihead_matmul)

--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -111,6 +111,11 @@ if(CUSPARSELT_FOUND AND ${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 8)
  list(APPEND CONVERT_FILES sparse_fc_op.cc sparse_multihead_matmul_op.cc)
 endif()

+if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 8 AND ${TENSORRT_MINOR_VERSION}
+                                                 GREATER_EQUAL 2)
+  list(APPEND CONVERT_FILES set_value_op.cc)
+endif()
+
 nv_library(
  tensorrt_converter
  SRCS ${CONVERT_FILES}

--- a/paddle/fluid/inference/tensorrt/convert/set_value_op.cc
+++ b/paddle/fluid/inference/tensorrt/convert/set_value_op.cc
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
+
+#define GET_ATTR_FROM_VECTOR(attr_name__)                                   \
+  do {                                                                      \
+    std::vector<int64_t> vec_##attr_name__;                                 \
+    if (op_desc.HasAttr(#attr_name__)) {                                    \
+      vec_##attr_name__ = PADDLE_GET_CONST(std::vector<int64_t>,            \
+                                           op_desc.GetAttr(#attr_name__));  \
+      if (vec_##attr_name__.size() > 0) attr_name__ = vec_##attr_name__[0]; \
+    }                                                                       \
+  } while (0)
+
+namespace paddle {
+namespace framework {
+class Scope;
+
+namespace proto {
+class OpDesc;
+}  // namespace proto
+}  // namespace framework
+}  // namespace paddle
+
+namespace paddle {
+namespace inference {
+namespace tensorrt {
+// we use tensorrt ScatterElement to generate set value
+// For example, if indices has dimensions [N,C,H,W] and axis is 2, then the
+// updates happen as: for n in [0,n)
+//     for c in [0,n)
+//         for h in [0,n)
+//             for w in [0,n)
+//                 output[n,c,indices[n,c,h,w],w] = updates[n,c,h,w]]
+//
+class SetValueConverter : public OpConverter {
+ public:
+  void operator()(const framework::proto::OpDesc& op,
+                  const framework::Scope& scope,
+                  bool test_mode) override {
+    VLOG(3) << "convert a set value op to tensorrt";
+    framework::OpDesc op_desc(op, nullptr);
+
+    auto* inputs = engine_->GetITensor(op_desc.Input("Input")[0]);
+    auto* updates = engine_->GetITensor(op_desc.Input("ValueTensor")[0]);
+
+    int64_t axes = 0;
+    int64_t starts = 0;
+    int64_t steps = 1;
+    int64_t ends = 0;
+
+    GET_ATTR_FROM_VECTOR(axes);
+    GET_ATTR_FROM_VECTOR(starts);
+    GET_ATTR_FROM_VECTOR(steps);
+    GET_ATTR_FROM_VECTOR(ends);
+
+    // calculate dims
+    auto input_dims = inputs->getDimensions();
+    auto update_dims = updates->getDimensions();
+
+    // check params and refill
+    if (axes == -1) {
+      axes = input_dims.nbDims - 1;
+    }
+
+    if (axes >= input_dims.nbDims) {
+      platform::errors::InvalidArgument(
+          "The axes %d is larger than total axes %d", axes, input_dims.nbDims);
+    }
+    if (starts >= input_dims.d[axes]) {
+      platform::errors::InvalidArgument(
+          "The start %d of dim %d is larger than origin shape %d",
+          starts,
+          axes,
+          input_dims.d[axes]);
+    }
+    if (update_dims.d[axes] != (input_dims.d[axes] - starts) / steps) {
+      platform::errors::InvalidArgument("The update dim error, should be %d",
+                                        (input_dims.d[axes] - starts) / steps);
+    }
+    if (engine_->with_dynamic_shape()) {
+      // generate indice
+      int post_size = 1;
+      for (int j = axes + 1; j < update_dims.nbDims; ++j) {
+        post_size = post_size * update_dims.d[j];
+      }
+      std::vector<int> axes_index;
+      for (int i = starts; i < ends; i += steps) {
+        for (int j = 0; j < post_size; ++j) {
+          axes_index.emplace_back(i);
+        }
+      }
+      int pre_size = 1;
+      for (int i = 0; i < axes; ++i) {
+        pre_size *= update_dims.d[i];
+      }
+      std::vector<int> indices;
+      for (int i = 0; i < pre_size; ++i) {
+        indices.insert(indices.end(), axes_index.begin(), axes_index.end());
+      }
+
+      nvinfer1::Dims indice_dims = update_dims;
+
+      // create a tensor to store data
+      std::vector<int> indice_dim_vec;
+      for (int i = 0; i < update_dims.nbDims; i++) {
+        indice_dim_vec.emplace_back(update_dims.d[i]);
+      }
+      auto indice_tensor_dims = phi::make_ddim(indice_dim_vec);
+      std::unique_ptr<phi::DenseTensor> indice_tensor(
+          std::make_unique<phi::DenseTensor>());
+      indice_tensor->Resize(indice_tensor_dims);
+
+      auto* dev_ctx = static_cast<phi::CPUContext*>(
+          platform::DeviceContextPool::Instance().Get(platform::CPUPlace()));
+      auto* weight_data = dev_ctx->template HostAlloc<int>(indice_tensor.get());
+
+      memcpy(weight_data, indices.data(), sizeof(int) * indice_tensor->numel());
+
+      TensorRTEngine::Weight weight{
+          nvinfer1::DataType::kINT32,
+          static_cast<void*>(weight_data),
+          static_cast<size_t>(indice_tensor->numel())};
+      auto output_name = op_desc.Output("Out")[0];
+      engine_->SetWeights("set_value_index_" + output_name,
+                          std::move(indice_tensor));
+
+      auto const_layer =
+          TRT_ENGINE_ADD_LAYER(engine_, Constant, indice_dims, weight.get());
+
+      auto* layer = TRT_ENGINE_ADD_LAYER(engine_,
+                                         Scatter,
+                                         *inputs,
+                                         *const_layer->getOutput(0),
+                                         *updates,
+                                         nvinfer1::ScatterMode::kELEMENT);
+
+      layer->setAxis(axes);
+
+      RreplenishLayerAndOutput(layer, "set_value", {output_name}, test_mode);
+    } else {
+      PADDLE_THROW(platform::errors::Fatal(
+          "static shape mode not supported in set value yet"));
+    }
+  }
+};
+
+}  // namespace tensorrt
+}  // namespace inference
+}  // namespace paddle
+
+REGISTER_TRT_OP_CONVERTER(set_value, SetValueConverter);
--- a/paddle/fluid/inference/tensorrt/op_teller.cc
+++ b/paddle/fluid/inference/tensorrt/op_teller.cc
@@ -75,6 +75,7 @@ struct SimpleOpTypeSetTeller : public Teller {
 #if IS_TRT_VERSION_GE(8200)
    teller_set.insert("round");
    int8_teller_set.insert("round");
+    teller_set.insert("set_value");
 #endif
  }

@@ -2369,6 +2370,27 @@ struct SimpleOpTypeSetTeller : public Teller {
      }
    }

+    if (op_type == "set_value") {
+#if !IS_TRT_VERSION_GE(8200)
+      return false;
+#endif
+      if (!(desc.HasAttr("axes") && desc.HasAttr("starts") &&
+            desc.HasAttr("steps"))) {
+        VLOG(3) << "the " << op_type
+                << " does not have attr (axes or "
+                   "starts or steps)";
+        return false;
+      }
+      auto* block = desc.Block();
+      auto input_name = desc.Input("Input")[0];
+      auto* input_desc = block->FindVar(input_name);
+      const auto input_shape = input_desc->GetShape();
+      auto update_name = desc.Input("ValueTensor")[0];
+      auto* update_desc = block->FindVar(update_name);
+      const auto update_shape = update_desc->GetShape();
+      if (update_shape.size() != input_shape.size()) return false;
+    }
+
    if (op_type == "top_k_v2" || op_type == "top_k") {
      auto* block = desc.Block();
      auto x_var_name = desc.Input("X")[0];

--- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_set_value.py
+++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_set_value.py
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from functools import partial
+
+import numpy as np
+from program_config import ProgramConfig, TensorConfig
+from trt_layer_auto_scan_test import TrtLayerAutoScanTest
+
+import paddle.inference as paddle_infer
+
+
+class TrtConvertSetValue(TrtLayerAutoScanTest):
+    def is_program_valid(self, program_config: ProgramConfig) -> bool:
+        return True
+
+    def sample_program_configs(self):
+        def generate_input1():
+            return np.random.random([1, 6, 20, 50, 10, 3]).astype(np.float32)
+
+        def generate_input2():
+            return np.random.random([1, 6, 20, 50, 10, 1]).astype(np.float32)
+
+        ops_config = [
+            {
+                "op_type": "set_value",
+                "op_inputs": {
+                    "Input": ["input_data"],
+                    "ValueTensor": ["update_data"],
+                },
+                "op_outputs": {"Out": ["set_output_data"]},
+                "op_attrs": {
+                    "axes": [5],
+                    "starts": [0],
+                    "ends": [1],
+                    "steps": [1],
+                },
+            },
+            {
+                "op_type": "gelu",
+                "op_inputs": {
+                    "X": ["set_output_data"],
+                },
+                "op_outputs": {"Out": ["set_tmp_output_data"]},
+                "op_attrs": {"approximate": True},
+            },
+            {
+                "op_type": "slice",
+                "op_inputs": {"Input": ["set_tmp_output_data"]},
+                "op_outputs": {"Out": ["slice3_output_data"]},
+                "op_attrs": {
+                    "decrease_axis": [],
+                    "axes": [5],
+                    "starts": [1],
+                    "ends": [2],
+                },
+            },
+            {
+                "op_type": "scale",
+                "op_inputs": {"X": ["slice3_output_data"]},
+                "op_outputs": {"Out": ["scale5_output_data"]},
+                "op_attrs": {
+                    "scale": 62.1,
+                    "bias": 1,
+                    "bias_after_scale": True,
+                },
+            },
+            {
+                "op_type": "scale",
+                "op_inputs": {"X": ["scale5_output_data"]},
+                "op_outputs": {"Out": ["scale6_output_data"]},
+                "op_attrs": {
+                    "scale": 0.1,
+                    "bias": 0,
+                    "bias_after_scale": True,
+                },
+            },
+            {
+                "op_type": "set_value",
+                "op_inputs": {
+                    "Input": ["set_tmp_output_data"],
+                    "ValueTensor": ["scale6_output_data"],
+                },
+                "op_outputs": {"Out": ["output_data"]},
+                "op_attrs": {
+                    "axes": [5],
+                    "starts": [1],
+                    "ends": [2],
+                    "steps": [1],
+                },
+            },
+        ]
+        ops = self.generate_op_config(ops_config)
+        program_config = ProgramConfig(
+            ops=ops,
+            weights={},
+            inputs={
+                "input_data": TensorConfig(data_gen=partial(generate_input1)),
+                "update_data": TensorConfig(data_gen=partial(generate_input2)),
+            },
+            outputs=["output_data"],
+        )
+
+        yield program_config
+
+    def sample_predictor_configs(self, program_config):
+        def generate_dynamic_shape(attrs):
+            self.dynamic_shape.min_input_shape = {
+                "input_data": [1, 6, 20, 50, 10, 3],
+                "update_data": [1, 6, 20, 50, 10, 1],
+                "output_data": [1, 6, 20, 50, 10, 3],
+                "set_output_data": [1, 6, 20, 50, 10, 3],
+            }
+            self.dynamic_shape.max_input_shape = {
+                "input_data": [1, 6, 20, 50, 10, 3],
+                "update_data": [1, 6, 20, 50, 10, 1],
+                "output_data": [1, 6, 20, 50, 10, 3],
+                "set_output_data": [1, 6, 20, 50, 10, 3],
+            }
+            self.dynamic_shape.opt_input_shape = {
+                "input_data": [1, 6, 20, 50, 10, 3],
+                "update_data": [1, 6, 20, 50, 10, 1],
+                "output_data": [1, 6, 20, 50, 10, 3],
+                "set_output_data": [1, 6, 20, 50, 10, 3],
+            }
+
+        def clear_dynamic_shape():
+            self.dynamic_shape.max_input_shape = {}
+            self.dynamic_shape.min_input_shape = {}
+            self.dynamic_shape.opt_input_shape = {}
+
+        def generate_trt_nodes_num(attrs, dynamic_shape):
+            if dynamic_shape:
+                ver = paddle_infer.get_trt_compile_version()
+                if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8200:
+                    return 1, 5
+                return 1, 3
+
+        attrs = [
+            program_config.ops[i].attrs for i in range(len(program_config.ops))
+        ]
+
+        # for dynamic_shape
+        generate_dynamic_shape(attrs)
+        self.trt_param.precision = paddle_infer.PrecisionType.Float32
+        self.trt_param.workspace_size = 2013265920
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), (1e-5, 1e-4)
+        self.trt_param.precision = paddle_infer.PrecisionType.Half
+        yield self.create_inference_config(), generate_trt_nodes_num(
+            attrs, True
+        ), (1e-3, 1e-3)
+
+    def test(self):
+        self.run_test()