未验证 提交 2548657e 编写于 作者: X xjmxyt 提交者: GitHub

add setvalue trt converter (#50341)

* add cast setvalue op

* add set_value to op teller

* renew test and add description

* add setAxis and add complex test

* change test
上级 cf48d20f
......@@ -2526,6 +2526,9 @@ USE_TRT_CONVERTER(preln_groupnorm_act)
USE_TRT_CONVERTER(flash_multihead_matmul)
USE_TRT_CONVERTER(cross_multihead_matmul)
#endif
#if IS_TRT_VERSION_GE(8200)
USE_TRT_CONVERTER(set_value)
#endif
#if PADDLE_WITH_CUSPARSELT && IS_TRT_VERSION_GE(8000)
USE_TRT_CONVERTER(sparse_fc)
USE_TRT_CONVERTER(sparse_multihead_matmul)
......
......@@ -111,6 +111,11 @@ if(CUSPARSELT_FOUND AND ${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 8)
list(APPEND CONVERT_FILES sparse_fc_op.cc sparse_multihead_matmul_op.cc)
endif()
if(${TENSORRT_MAJOR_VERSION} GREATER_EQUAL 8 AND ${TENSORRT_MINOR_VERSION}
GREATER_EQUAL 2)
list(APPEND CONVERT_FILES set_value_op.cc)
endif()
nv_library(
tensorrt_converter
SRCS ${CONVERT_FILES}
......
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#define GET_ATTR_FROM_VECTOR(attr_name__) \
do { \
std::vector<int64_t> vec_##attr_name__; \
if (op_desc.HasAttr(#attr_name__)) { \
vec_##attr_name__ = PADDLE_GET_CONST(std::vector<int64_t>, \
op_desc.GetAttr(#attr_name__)); \
if (vec_##attr_name__.size() > 0) attr_name__ = vec_##attr_name__[0]; \
} \
} while (0)
namespace paddle {
namespace framework {
class Scope;
namespace proto {
class OpDesc;
} // namespace proto
} // namespace framework
} // namespace paddle
namespace paddle {
namespace inference {
namespace tensorrt {
// we use tensorrt ScatterElement to generate set value
// For example, if indices has dimensions [N,C,H,W] and axis is 2, then the
// updates happen as: for n in [0,n)
// for c in [0,n)
// for h in [0,n)
// for w in [0,n)
// output[n,c,indices[n,c,h,w],w] = updates[n,c,h,w]]
//
class SetValueConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope,
bool test_mode) override {
VLOG(3) << "convert a set value op to tensorrt";
framework::OpDesc op_desc(op, nullptr);
auto* inputs = engine_->GetITensor(op_desc.Input("Input")[0]);
auto* updates = engine_->GetITensor(op_desc.Input("ValueTensor")[0]);
int64_t axes = 0;
int64_t starts = 0;
int64_t steps = 1;
int64_t ends = 0;
GET_ATTR_FROM_VECTOR(axes);
GET_ATTR_FROM_VECTOR(starts);
GET_ATTR_FROM_VECTOR(steps);
GET_ATTR_FROM_VECTOR(ends);
// calculate dims
auto input_dims = inputs->getDimensions();
auto update_dims = updates->getDimensions();
// check params and refill
if (axes == -1) {
axes = input_dims.nbDims - 1;
}
if (axes >= input_dims.nbDims) {
platform::errors::InvalidArgument(
"The axes %d is larger than total axes %d", axes, input_dims.nbDims);
}
if (starts >= input_dims.d[axes]) {
platform::errors::InvalidArgument(
"The start %d of dim %d is larger than origin shape %d",
starts,
axes,
input_dims.d[axes]);
}
if (update_dims.d[axes] != (input_dims.d[axes] - starts) / steps) {
platform::errors::InvalidArgument("The update dim error, should be %d",
(input_dims.d[axes] - starts) / steps);
}
if (engine_->with_dynamic_shape()) {
// generate indice
int post_size = 1;
for (int j = axes + 1; j < update_dims.nbDims; ++j) {
post_size = post_size * update_dims.d[j];
}
std::vector<int> axes_index;
for (int i = starts; i < ends; i += steps) {
for (int j = 0; j < post_size; ++j) {
axes_index.emplace_back(i);
}
}
int pre_size = 1;
for (int i = 0; i < axes; ++i) {
pre_size *= update_dims.d[i];
}
std::vector<int> indices;
for (int i = 0; i < pre_size; ++i) {
indices.insert(indices.end(), axes_index.begin(), axes_index.end());
}
nvinfer1::Dims indice_dims = update_dims;
// create a tensor to store data
std::vector<int> indice_dim_vec;
for (int i = 0; i < update_dims.nbDims; i++) {
indice_dim_vec.emplace_back(update_dims.d[i]);
}
auto indice_tensor_dims = phi::make_ddim(indice_dim_vec);
std::unique_ptr<phi::DenseTensor> indice_tensor(
std::make_unique<phi::DenseTensor>());
indice_tensor->Resize(indice_tensor_dims);
auto* dev_ctx = static_cast<phi::CPUContext*>(
platform::DeviceContextPool::Instance().Get(platform::CPUPlace()));
auto* weight_data = dev_ctx->template HostAlloc<int>(indice_tensor.get());
memcpy(weight_data, indices.data(), sizeof(int) * indice_tensor->numel());
TensorRTEngine::Weight weight{
nvinfer1::DataType::kINT32,
static_cast<void*>(weight_data),
static_cast<size_t>(indice_tensor->numel())};
auto output_name = op_desc.Output("Out")[0];
engine_->SetWeights("set_value_index_" + output_name,
std::move(indice_tensor));
auto const_layer =
TRT_ENGINE_ADD_LAYER(engine_, Constant, indice_dims, weight.get());
auto* layer = TRT_ENGINE_ADD_LAYER(engine_,
Scatter,
*inputs,
*const_layer->getOutput(0),
*updates,
nvinfer1::ScatterMode::kELEMENT);
layer->setAxis(axes);
RreplenishLayerAndOutput(layer, "set_value", {output_name}, test_mode);
} else {
PADDLE_THROW(platform::errors::Fatal(
"static shape mode not supported in set value yet"));
}
}
};
} // namespace tensorrt
} // namespace inference
} // namespace paddle
REGISTER_TRT_OP_CONVERTER(set_value, SetValueConverter);
......@@ -75,6 +75,7 @@ struct SimpleOpTypeSetTeller : public Teller {
#if IS_TRT_VERSION_GE(8200)
teller_set.insert("round");
int8_teller_set.insert("round");
teller_set.insert("set_value");
#endif
}
......@@ -2369,6 +2370,27 @@ struct SimpleOpTypeSetTeller : public Teller {
}
}
if (op_type == "set_value") {
#if !IS_TRT_VERSION_GE(8200)
return false;
#endif
if (!(desc.HasAttr("axes") && desc.HasAttr("starts") &&
desc.HasAttr("steps"))) {
VLOG(3) << "the " << op_type
<< " does not have attr (axes or "
"starts or steps)";
return false;
}
auto* block = desc.Block();
auto input_name = desc.Input("Input")[0];
auto* input_desc = block->FindVar(input_name);
const auto input_shape = input_desc->GetShape();
auto update_name = desc.Input("ValueTensor")[0];
auto* update_desc = block->FindVar(update_name);
const auto update_shape = update_desc->GetShape();
if (update_shape.size() != input_shape.size()) return false;
}
if (op_type == "top_k_v2" || op_type == "top_k") {
auto* block = desc.Block();
auto x_var_name = desc.Input("X")[0];
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from functools import partial
import numpy as np
from program_config import ProgramConfig, TensorConfig
from trt_layer_auto_scan_test import TrtLayerAutoScanTest
import paddle.inference as paddle_infer
class TrtConvertSetValue(TrtLayerAutoScanTest):
def is_program_valid(self, program_config: ProgramConfig) -> bool:
return True
def sample_program_configs(self):
def generate_input1():
return np.random.random([1, 6, 20, 50, 10, 3]).astype(np.float32)
def generate_input2():
return np.random.random([1, 6, 20, 50, 10, 1]).astype(np.float32)
ops_config = [
{
"op_type": "set_value",
"op_inputs": {
"Input": ["input_data"],
"ValueTensor": ["update_data"],
},
"op_outputs": {"Out": ["set_output_data"]},
"op_attrs": {
"axes": [5],
"starts": [0],
"ends": [1],
"steps": [1],
},
},
{
"op_type": "gelu",
"op_inputs": {
"X": ["set_output_data"],
},
"op_outputs": {"Out": ["set_tmp_output_data"]},
"op_attrs": {"approximate": True},
},
{
"op_type": "slice",
"op_inputs": {"Input": ["set_tmp_output_data"]},
"op_outputs": {"Out": ["slice3_output_data"]},
"op_attrs": {
"decrease_axis": [],
"axes": [5],
"starts": [1],
"ends": [2],
},
},
{
"op_type": "scale",
"op_inputs": {"X": ["slice3_output_data"]},
"op_outputs": {"Out": ["scale5_output_data"]},
"op_attrs": {
"scale": 62.1,
"bias": 1,
"bias_after_scale": True,
},
},
{
"op_type": "scale",
"op_inputs": {"X": ["scale5_output_data"]},
"op_outputs": {"Out": ["scale6_output_data"]},
"op_attrs": {
"scale": 0.1,
"bias": 0,
"bias_after_scale": True,
},
},
{
"op_type": "set_value",
"op_inputs": {
"Input": ["set_tmp_output_data"],
"ValueTensor": ["scale6_output_data"],
},
"op_outputs": {"Out": ["output_data"]},
"op_attrs": {
"axes": [5],
"starts": [1],
"ends": [2],
"steps": [1],
},
},
]
ops = self.generate_op_config(ops_config)
program_config = ProgramConfig(
ops=ops,
weights={},
inputs={
"input_data": TensorConfig(data_gen=partial(generate_input1)),
"update_data": TensorConfig(data_gen=partial(generate_input2)),
},
outputs=["output_data"],
)
yield program_config
def sample_predictor_configs(self, program_config):
def generate_dynamic_shape(attrs):
self.dynamic_shape.min_input_shape = {
"input_data": [1, 6, 20, 50, 10, 3],
"update_data": [1, 6, 20, 50, 10, 1],
"output_data": [1, 6, 20, 50, 10, 3],
"set_output_data": [1, 6, 20, 50, 10, 3],
}
self.dynamic_shape.max_input_shape = {
"input_data": [1, 6, 20, 50, 10, 3],
"update_data": [1, 6, 20, 50, 10, 1],
"output_data": [1, 6, 20, 50, 10, 3],
"set_output_data": [1, 6, 20, 50, 10, 3],
}
self.dynamic_shape.opt_input_shape = {
"input_data": [1, 6, 20, 50, 10, 3],
"update_data": [1, 6, 20, 50, 10, 1],
"output_data": [1, 6, 20, 50, 10, 3],
"set_output_data": [1, 6, 20, 50, 10, 3],
}
def clear_dynamic_shape():
self.dynamic_shape.max_input_shape = {}
self.dynamic_shape.min_input_shape = {}
self.dynamic_shape.opt_input_shape = {}
def generate_trt_nodes_num(attrs, dynamic_shape):
if dynamic_shape:
ver = paddle_infer.get_trt_compile_version()
if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8200:
return 1, 5
return 1, 3
attrs = [
program_config.ops[i].attrs for i in range(len(program_config.ops))
]
# for dynamic_shape
generate_dynamic_shape(attrs)
self.trt_param.precision = paddle_infer.PrecisionType.Float32
self.trt_param.workspace_size = 2013265920
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), (1e-5, 1e-4)
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), generate_trt_nodes_num(
attrs, True
), (1e-3, 1e-3)
def test(self):
self.run_test()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册