未验证 提交 e25e86f4 编写于 作者: Z Zhang Jun 提交者: GitHub

[inference][trt] optimize set_value and top_k op (#54372)

* set_value update

* support ValueTensor's rank != Input'rank & update topk

* update range to avoid coredump

* fix addShape error

* Dims definition differ between 7.2 and 8.0+

* Update test_trt_convert_top_k_v2.py

* update top_k

* Update test_trt_convert_top_k_v2.py
上级 34cfbe79
......@@ -373,6 +373,13 @@ class OpConverter {
engine->ClearWeights();
}
nvinfer1::ITensor* Cast(nvinfer1::ITensor* input, nvinfer1::DataType dtype) {
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Identity, *input);
layer->setOutputType(0, dtype);
layer->getOutput(0)->setType(dtype);
return layer->getOutput(0);
}
// rank(result) = rank(input)
nvinfer1::ITensor* Gather(nvinfer1::ITensor* input,
const std::vector<int32_t> indices,
......@@ -384,6 +391,59 @@ class OpConverter {
return result;
}
nvinfer1::ITensor* Unsqueeze(nvinfer1::ITensor* input,
const std::vector<int32_t> axis) {
const auto dims = input->getDimensions();
const std::unordered_set<int32_t> axis_data(axis.begin(), axis.end());
std::vector<int32_t> subscripts(dims.nbDims);
std::iota(subscripts.begin(), subscripts.end(), 0);
for (const auto& axis_value : axis_data) {
subscripts.insert(subscripts.begin() + axis_value, dims.nbDims);
}
nvinfer1::ITensor* input_shape{nullptr};
if (engine_->with_dynamic_shape()) {
input_shape = Shape(input);
} else {
input_shape = Add1DConstantLayer(dims);
}
auto* new_dim =
TRT_ENGINE_ADD_LAYER(engine_,
Gather,
*Concat(std::vector<nvinfer1::ITensor*>{
input_shape, Add1DConstantLayer(1)}),
*Add1DConstantLayer(subscripts),
0)
->getOutput(0);
auto result = Reshape(input, new_dim);
return result;
}
nvinfer1::ITensor* Squeeze(nvinfer1::ITensor* input,
const std::vector<int32_t> axis) {
const auto dims = input->getDimensions();
std::vector<int32_t> subscripts(dims.nbDims);
std::iota(subscripts.begin(), subscripts.end(), 0);
auto p =
std::remove_if(subscripts.begin(), subscripts.end(), [axis](int x) {
return std::find(axis.begin(), axis.end(), x) != axis.end();
});
subscripts.resize(p - subscripts.begin());
nvinfer1::ITensor* input_shape{nullptr};
if (engine_->with_dynamic_shape()) {
input_shape = Shape(input);
} else {
input_shape = Add1DConstantLayer(dims);
}
auto* new_dim =
TRT_ENGINE_ADD_LAYER(
engine_, Gather, *input_shape, *Add1DConstantLayer(subscripts), 0)
->getOutput(0);
auto result = Reshape(input, new_dim);
return result;
}
// paddle allows negative index
// for axis length = 5, paddle allows [-5, 4]
nvinfer1::ITensor* FixNegIndices(nvinfer1::ITensor* input_shape,
......@@ -406,7 +466,23 @@ class OpConverter {
nvinfer1::ITensor* newShape,
const std::string& name = "") {
auto* shuffle = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
shuffle->setInput(1, *newShape);
if (engine_->with_dynamic_shape()) {
shuffle->setInput(1, *newShape);
} else {
auto shape = newShape->getDimensions();
shuffle->setReshapeDimensions(shape);
}
if (name != "") {
shuffle->setName(name.c_str());
}
return shuffle->getOutput(0);
}
nvinfer1::ITensor* Reshape(nvinfer1::ITensor* input,
nvinfer1::Dims shape,
const std::string& name = "") {
auto* shuffle = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
shuffle->setReshapeDimensions(shape);
if (name != "") {
shuffle->setName(name.c_str());
}
......
......@@ -46,8 +46,13 @@ class RangeOpConverter : public OpConverter {
quotient_tensor = fquotient_tensor;
}
auto number_tensor = Max(Sub(zero_tensor, quotient_tensor), zero_tensor);
auto* start1 = engine_->GetITensor(op_desc.Input("Start")[0], true);
auto* start1 = engine_->GetITensor(op_desc.Input("Start")[0]);
#if IS_TRT_VERSION_LT(8000)
nvinfer1::Dims start_dims{0, {1}, { nvinfer1::DimensionType::kSPATIAL }};
#else
nvinfer1::Dims start_dims{0, {1}};
#endif
start1 = Reshape(start1, start_dims);
layer = TRT_ENGINE_ADD_LAYER(
engine_, Fill, nvinfer1::Dims{}, nvinfer1::FillOperation::kLINSPACE);
layer->setInput(0, *number_tensor);
......
......@@ -24,16 +24,6 @@ limitations under the License. */
} \
} while (0)
namespace paddle {
namespace framework {
class Scope;
namespace proto {
class OpDesc;
} // namespace proto
} // namespace framework
} // namespace paddle
namespace paddle {
namespace inference {
namespace tensorrt {
......@@ -55,6 +45,14 @@ class SetValueConverter : public OpConverter {
auto* inputs = engine_->GetITensor(op_desc.Input("Input")[0]);
auto* updates = engine_->GetITensor(op_desc.Input("ValueTensor")[0]);
const auto decrease_axes = PADDLE_GET_CONST(
std::vector<int64_t>, op_desc.GetAttr("decrease_axes"));
std::vector<int32_t> decr_axes{decrease_axes.begin(), decrease_axes.end()};
auto value_rank = updates->getDimensions().nbDims;
auto input_rank = inputs->getDimensions().nbDims;
if (decrease_axes.size() > 0 && value_rank != input_rank) {
updates = Unsqueeze(updates, decr_axes);
}
int64_t axes = 0;
int64_t starts = 0;
......@@ -115,39 +113,14 @@ class SetValueConverter : public OpConverter {
indices.insert(indices.end(), axes_index.begin(), axes_index.end());
}
nvinfer1::Dims indice_dims = update_dims;
// create a tensor to store data
std::vector<int> indice_dim_vec;
for (int i = 0; i < update_dims.nbDims; i++) {
indice_dim_vec.emplace_back(update_dims.d[i]);
}
auto indice_tensor_dims = phi::make_ddim(indice_dim_vec);
std::unique_ptr<phi::DenseTensor> indice_tensor(
std::make_unique<phi::DenseTensor>());
indice_tensor->Resize(indice_tensor_dims);
auto* dev_ctx = static_cast<phi::CPUContext*>(
platform::DeviceContextPool::Instance().Get(platform::CPUPlace()));
auto* weight_data = dev_ctx->template HostAlloc<int>(indice_tensor.get());
memcpy(weight_data, indices.data(), sizeof(int) * indice_tensor->numel());
TensorRTEngine::Weight weight{
nvinfer1::DataType::kINT32,
static_cast<void*>(weight_data),
static_cast<size_t>(indice_tensor->numel())};
auto output_name = op_desc.Output("Out")[0];
engine_->SetWeights("set_value_index_" + output_name,
std::move(indice_tensor));
auto const_layer =
TRT_ENGINE_ADD_LAYER(engine_, Constant, indice_dims, weight.get());
const auto const_layer = AddConstantLayer(
indices.data(), update_dims, "set_value_index_" + output_name);
auto* layer = TRT_ENGINE_ADD_LAYER(engine_,
Scatter,
*inputs,
*const_layer->getOutput(0),
*const_layer,
*updates,
nvinfer1::ScatterMode::kELEMENT);
......
......@@ -33,77 +33,71 @@ class TopKOpConverter : public OpConverter {
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope,
bool test_mode) override {
VLOG(3) << "convert a top_k op to tensorrt TopK layer";
// Here the two nullptr looks strange, that's because the
// framework::OpDesc's constructor is strange.
VLOG(3) << "convert a top_k op to tensorrt layer";
framework::OpDesc op_desc(op, nullptr);
auto* input_tensor = engine_->GetITensor(op_desc.Input("X")[0]);
const int k = op_desc.HasAttr("k")
? PADDLE_GET_CONST(int, op_desc.GetAttr("k"))
: 1.0f;
nvinfer1::Dims input_dims = input_tensor->getDimensions();
int axis = input_dims.nbDims;
nvinfer1::ITopKLayer* layer =
TRT_ENGINE_ADD_LAYER(engine_,
TopK,
*input_tensor,
nvinfer1::TopKOperation::kMAX,
k,
1 << (axis - 1));
std::vector<std::string> output_names;
output_names.push_back(op_desc.Output("Out").front());
output_names.push_back(op_desc.Output("Indices").front());
RreplenishLayerAndOutput(layer, "top_k", output_names, test_mode);
}
};
class TopKv2OpConverter : public OpConverter {
public:
TopKv2OpConverter() {}
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope,
bool test_mode) override {
// Here the two nullptr looks strange, that's because the
// framework::OpDesc's constructor is strange.
framework::OpDesc op_desc(op, nullptr);
auto* input_tensor = engine_->GetITensor(op_desc.Input("X")[0]);
const int k = op_desc.HasAttr("k")
? PADDLE_GET_CONST(int, op_desc.GetAttr("k"))
: 1.0f;
const int axis = op_desc.HasAttr("axis")
? PADDLE_GET_CONST(int, op_desc.GetAttr("axis"))
: 1.0f;
const int k =
op_desc.HasAttr("k") ? PADDLE_GET_CONST(int, op_desc.GetAttr("k")) : 1;
int axis = op_desc.HasAttr("axis")
? PADDLE_GET_CONST(int, op_desc.GetAttr("axis"))
: -1;
const bool largest =
op_desc.HasAttr("largest")
? PADDLE_GET_CONST(bool, op_desc.GetAttr("largest"))
: true;
auto flag =
largest ? nvinfer1::TopKOperation::kMAX : nvinfer1::TopKOperation::kMIN;
auto input_dims = input_tensor->getDimensions();
auto input_rank = input_dims.nbDims;
// 1d needs expand to 2d
bool expand_to_2d = (input_rank == 1);
if (engine_->with_dynamic_shape() && expand_to_2d) {
input_tensor = Unsqueeze(input_tensor, std::vector<int32_t>{1});
}
// INT32 only, other data type should to casted to INT32.
nvinfer1::DataType type = input_tensor->getType();
bool cast = (type == nvinfer1::DataType::kINT32);
if (cast) {
input_tensor = Cast(input_tensor, nvinfer1::DataType::kFLOAT);
}
nvinfer1::ITopKLayer* layer = nullptr;
if (axis == -1) {
nvinfer1::Dims input_dims = input_tensor->getDimensions();
layer = TRT_ENGINE_ADD_LAYER(
engine_, TopK, *input_tensor, flag, k, 1 << (input_dims.nbDims - 1));
} else {
if (engine_->with_dynamic_shape()) {
layer = TRT_ENGINE_ADD_LAYER(
engine_, TopK, *input_tensor, flag, k, 1 << axis);
} else {
layer = TRT_ENGINE_ADD_LAYER(
engine_, TopK, *input_tensor, flag, k, 1 << (axis - 1));
}
if (axis > 0 && !engine_->with_dynamic_shape()) {
axis -= 1;
}
std::vector<std::string> output_names;
output_names.push_back(op_desc.Output("Out").front());
output_names.push_back(op_desc.Output("Indices").front());
if (axis < 0) axis += input_rank;
layer =
TRT_ENGINE_ADD_LAYER(engine_, TopK, *input_tensor, flag, k, 1 << axis);
nvinfer1::ITensor* values = layer->getOutput(0);
nvinfer1::ITensor* indices = layer->getOutput(1);
// un-expand to 1d
if (engine_->with_dynamic_shape() && expand_to_2d) {
values = Squeeze(values, std::vector<int32_t>{1});
indices = Squeeze(indices, std::vector<int32_t>{1});
}
// cast back
if (cast) {
values = Cast(values, nvinfer1::DataType::kINT32);
}
auto out_name = op_desc.Output("Out").front();
auto indices_name = op_desc.Output("Indices").front();
values->setName(out_name.c_str());
engine_->SetITensor(out_name.c_str(), values);
indices->setName(indices_name.c_str());
engine_->SetITensor(indices_name.c_str(), indices);
RreplenishLayerAndOutput(layer, "top_k_v2", output_names, test_mode);
layer->setName(
("top_k (Output: " + out_name + "," + indices_name + ")").c_str());
}
};
} // namespace tensorrt
......@@ -111,4 +105,4 @@ class TopKv2OpConverter : public OpConverter {
} // namespace paddle
REGISTER_TRT_OP_CONVERTER(top_k, TopKOpConverter);
REGISTER_TRT_OP_CONVERTER(top_k_v2, TopKv2OpConverter);
REGISTER_TRT_OP_CONVERTER(top_k_v2, TopKOpConverter);
......@@ -2402,6 +2402,22 @@ struct SimpleOpTypeSetTeller : public Teller {
#if !IS_TRT_VERSION_GE(8200)
return false;
#endif
auto inputs = desc.Inputs();
if (inputs.find("StartsTensorList") != inputs.end()) {
if (desc.Input("StartsTensorList").size() >= 1) {
return false;
}
}
if (inputs.find("EndsTensorList") != inputs.end()) {
if (desc.Input("EndsTensorList").size() >= 1) {
return false;
}
}
if (inputs.find("StepsTensorList") != inputs.end()) {
if (desc.Input("StepsTensorList").size() >= 1) {
return false;
}
}
if (!(desc.HasAttr("axes") && desc.HasAttr("starts") &&
desc.HasAttr("steps"))) {
VLOG(3) << "the " << op_type
......@@ -2409,52 +2425,22 @@ struct SimpleOpTypeSetTeller : public Teller {
"starts or steps)";
return false;
}
auto* block = desc.Block();
auto input_name = desc.Input("Input")[0];
auto* input_desc = block->FindVar(input_name);
const auto input_shape = input_desc->GetShape();
auto update_name = desc.Input("ValueTensor")[0];
auto* update_desc = block->FindVar(update_name);
const auto update_shape = update_desc->GetShape();
if (update_shape.size() != input_shape.size()) return false;
}
if (op_type == "top_k_v2" || op_type == "top_k") {
auto* block = desc.Block();
auto x_var_name = desc.Input("X")[0];
if (block == nullptr) {
VLOG(3) << "The block desc is nullptr, we can't continue to analyze. "
"Developers need to check whether block_desc is passed in "
"the pass.";
return false;
}
auto* x_var_desc = block->FindVar(x_var_name);
auto x_dtype = x_var_desc->GetDataType();
if (!(x_dtype == framework::proto::VarType::FP32 ||
x_dtype == framework::proto::VarType::FP16)) {
return false;
}
const auto x_shape = x_var_desc->GetShape();
if (x_shape.size() == 1) {
VLOG(3) << "top_k/top_k_v2 does not support 1-dimensional input in "
"tensorrt";
return false;
}
if (desc.HasAttr("axis")) {
int axis = PADDLE_GET_CONST(int, desc.GetAttr("axis"));
if (axis == 0) {
if (!with_dynamic_shape && axis == 0) {
VLOG(3) << "top_k_v2 does not support axis == 0 in "
"tensorrt";
"tensorrt static shape.";
return false;
}
}
if (desc.HasAttr("sorted")) {
bool sorted = PADDLE_GET_CONST(bool, desc.GetAttr("sorted"));
if (!sorted) {
VLOG(3) << "top_k_v2 does not support results not sorted in "
VLOG(3) << op_type
<< " does not support results not sorted in "
"tensorrt";
return false;
}
......
......@@ -107,7 +107,7 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest):
self.dynamic_shape.opt_input_shape = {}
def generate_trt_nodes_num(attrs, dynamic_shape):
if self.dims == 1:
if not dynamic_shape and self.dims == 1:
return 0, 4
return 1, 3
......
......@@ -23,7 +23,7 @@ from trt_layer_auto_scan_test import TrtLayerAutoScanTest
import paddle.inference as paddle_infer
class TrtConvertActivationTest(TrtLayerAutoScanTest):
class TrtConvertTopKV2Test(TrtLayerAutoScanTest):
def is_program_valid(self, program_config: ProgramConfig) -> bool:
inputs = program_config.inputs
attrs = [
......@@ -31,6 +31,10 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest):
]
if len(inputs['input_data'].shape) <= attrs[0]['axis']:
return False
axis = attrs[0]['axis']
axis = axis if axis >= 0 else axis + len(inputs['input_data'].shape)
if inputs['input_data'].shape[axis] <= attrs[0]['k']:
return False
return True
def sample_program_configs(self):
......@@ -49,11 +53,12 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest):
for dims in [1, 2, 3, 4]:
for batch in [1, 4]:
for k in [1, 3]:
for axis in [-1, 1, 2, 3]:
for axis in [-1, 1, 0, 2, 3]:
for largest in [True, False]:
for sort in [True, False]:
self.dims = dims
self.sort = sort
self.axis = axis
dics = [
{
"k": k,
......@@ -120,7 +125,7 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest):
"input_data": [4, 32, 32, 32]
}
self.dynamic_shape.opt_input_shape = {
"input_data": [1, 3, 32, 32]
"input_data": [4, 3, 32, 32]
}
def clear_dynamic_shape():
......@@ -129,7 +134,7 @@ class TrtConvertActivationTest(TrtLayerAutoScanTest):
self.dynamic_shape.opt_input_shape = {}
def generate_trt_nodes_num(attrs, dynamic_shape):
if self.dims == 1:
if not dynamic_shape and (self.dims == 1 or self.axis == 0):
return 0, 4
if not self.sort:
return 0, 4
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册