未验证 提交 30b10630 编写于 作者: Z zhoutianzi666 提交者: GitHub

add only split (#43424)

上级 65e86580
......@@ -295,20 +295,215 @@ class OpConverter {
engine->ClearWeights();
}
// rank(result) = rank(input)
nvinfer1::ITensor* Gather(nvinfer1::ITensor* input,
const std::vector<int32_t> indices, int axis = 0) {
auto* indices_tensor = Add1DConstantLayer(indices, " ");
auto* result =
TRT_ENGINE_ADD_LAYER(engine_, Gather, *input, *indices_tensor, axis)
->getOutput(0);
return result;
}
// paddle allows negative index
// for axis length = 5, paddle allows [-5, 4]
nvinfer1::ITensor* FixNegIndices(nvinfer1::ITensor* input_shape,
nvinfer1::ITensor* indices) {
int rank = input_shape->getDimensions().nbDims;
std::vector<int32_t> zero = std::vector<int32_t>(rank, 0);
std::vector<int32_t> minus_one = std::vector<int32_t>(rank, -1);
nvinfer1::ITensor* zero_tensor = Add1DConstantLayer(zero);
nvinfer1::ITensor* minus_one_tensor = Add1DConstantLayer(minus_one);
// -1, 0
auto* sign = Max(Min(indices, zero_tensor), minus_one_tensor);
return Sub(indices, Prod(sign, input_shape));
}
nvinfer1::ITensor* Shape(nvinfer1::ITensor* input) {
return TRT_ENGINE_ADD_LAYER(engine_, Shape, *input)->getOutput(0);
}
// Concat not make rank changed
nvinfer1::ITensor* Concat(const std::vector<nvinfer1::ITensor*>& inputs,
int axis = 0) {
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Concatenation, inputs.data(),
inputs.size());
if (axis != 0) layer->setAxis(axis);
nvinfer1::ITensor* c = layer->getOutput(0);
return c;
}
nvinfer1::ITensor* Sum(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *a, *b,
nvinfer1::ElementWiseOperation::kSUM)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Prod(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *a, *b,
nvinfer1::ElementWiseOperation::kPROD)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Min(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *a, *b,
nvinfer1::ElementWiseOperation::kMIN)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Max(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *a, *b,
nvinfer1::ElementWiseOperation::kMAX)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Sub(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *a, *b,
nvinfer1::ElementWiseOperation::kSUB)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Div(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *a, *b,
nvinfer1::ElementWiseOperation::kDIV)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Act(nvinfer1::ITensor* a,
nvinfer1::ActivationType act_type) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(engine_, Activation, *a, act_type)->getOutput(0);
return c;
}
// Get element tensor of 1D shape tensor
nvinfer1::ITensor* GetEleTensorOfShape(nvinfer1::ITensor* shape_tensor,
int index, bool is_scalar = false) {
auto* tensor =
TRT_ENGINE_ADD_LAYER(engine_, Gather, *shape_tensor,
*Add1DConstantLayer(index, " ", is_scalar), 0)
->getOutput(0);
return tensor;
}
// Create and add Multi-D constant float layer
nvinfer1::ITensor* AddConstantLayer(const float* data,
const std::vector<int32_t>& weight_dims,
const std::string& weight_name) {
std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
int data_size = std::accumulate(weight_dims.begin(), weight_dims.end(), 1,
std::multiplies<int>());
tmp_tensor->Resize({data_size});
auto* tmp_data = tmp_tensor->mutable_data<float>(platform::CPUPlace());
for (int i = 0; i < data_size; i++) {
tmp_data[i] = data[i];
}
engine_->SetWeights(weight_name, std::move(tmp_tensor));
TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT,
static_cast<void*>(tmp_data),
static_cast<size_t>(data_size)};
nvinfer1::Dims trt_dims;
trt_dims.nbDims = weight_dims.size();
for (size_t i = 0; i < weight_dims.size(); i++)
trt_dims.d[i] = weight_dims[i];
auto const_layer =
TRT_ENGINE_ADD_LAYER(engine_, Constant, trt_dims, weight.get());
return const_layer->getOutput(0);
}
// Create and add 1D constant float layer
nvinfer1::ITensor* Add1DConstantLayer(const std::vector<float>& data,
const std::string& weight_name = "",
bool scalar = false) {
std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
int data_size = data.size();
tmp_tensor->Resize({data_size});
auto* tmp_data = tmp_tensor->mutable_data<float>(platform::CPUPlace());
for (int i = 0; i < data_size; i++) {
tmp_data[i] = data[i];
}
engine_->SetWeights(weight_name, std::move(tmp_tensor));
TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT,
static_cast<void*>(tmp_data),
static_cast<size_t>(data_size)};
nvinfer1::Dims input_shape;
input_shape.nbDims = scalar ? 0 : 1;
input_shape.d[0] = data_size;
auto const_layer =
TRT_ENGINE_ADD_LAYER(engine_, Constant, input_shape, weight.get());
return const_layer->getOutput(0);
}
// Create and add 1D constant layer
nvinfer1::ITensor* Add1DConstantLayer(const std::vector<int>& data,
const std::string& weight_name = "",
bool scalar = false) {
std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
int data_size = data.size();
tmp_tensor->Resize({data_size});
auto* tmp_data = tmp_tensor->mutable_data<int>(platform::CPUPlace());
for (int i = 0; i < data_size; i++) {
tmp_data[i] = data[i];
}
engine_->SetWeights(weight_name, std::move(tmp_tensor));
TensorRTEngine::Weight weight{nvinfer1::DataType::kINT32,
static_cast<void*>(tmp_data),
static_cast<size_t>(data_size)};
nvinfer1::Dims input_shape;
input_shape.nbDims = scalar ? 0 : 1;
input_shape.d[0] = data_size;
auto const_layer =
TRT_ENGINE_ADD_LAYER(engine_, Constant, input_shape, weight.get());
return const_layer->getOutput(0);
}
nvinfer1::ITensor* Add1DConstantLayer(nvinfer1::Dims data,
const std::string& weight_name = "",
bool scalar = false) {
std::vector<int> tmp_data;
for (int i = 0; i < data.nbDims; i++) tmp_data.push_back(data.d[i]);
return Add1DConstantLayer(tmp_data, weight_name, scalar);
}
nvinfer1::ITensor* Add1DConstantLayer(int32_t data,
const std::string& weight_name = "",
bool scalar = false) {
std::vector<int> tmp_data;
tmp_data.push_back(data);
return Add1DConstantLayer(tmp_data, weight_name, scalar);
}
void RreplenishLayerAndOutput(
nvinfer1::ILayer* layer, const std::string& layer_type,
const std::vector<std::string>& output_tensor_names,
bool test_mode = false) {
size_t num_out = output_tensor_names.size();
std::string layer_name = layer_type + " (Output: ";
for (size_t i = 0; i < num_out; i++) {
layer->getOutput(i)->setName(output_tensor_names[i].c_str());
engine_->SetITensor(output_tensor_names[i], layer->getOutput(i));
if (test_mode) {
engine_->DeclareOutput(output_tensor_names[i]);
}
layer_name += output_tensor_names[i];
if (i != num_out - 1) layer_name += ", ";
}
layer->setName(
(layer_type + " (Output: " + output_tensor_names[0] + ")").c_str());
layer->setName((layer_name + ")").c_str());
}
void SetEngine(TensorRTEngine* engine) { engine_ = engine; }
......
......@@ -29,7 +29,6 @@ class SplitOpConverter : public OpConverter {
// Declare inputs
auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
auto input_dims = input->getDimensions();
size_t input_num = op_desc.Input("X").size();
size_t output_num = op_desc.Output("Out").size();
// Get Attrs
......@@ -41,48 +40,115 @@ class SplitOpConverter : public OpConverter {
if (op_desc.HasAttr("num")) {
num = BOOST_GET_CONST(int, op_desc.GetAttr("num"));
}
nvinfer1::ITensor* shape_tensor = nullptr;
if (engine_->with_dynamic_shape()) {
#if IS_TRT_VERSION_GE(6000)
axis += (axis < 0) ? input_dims.nbDims : 0;
#endif
// only be called in dynamic_shape mode
shape_tensor = Shape(input);
} else {
axis += (axis < 0) ? input_dims.nbDims : -1;
}
if (num > 0) {
int64_t in_axis_dim = input_dims.d[axis];
size_t out_axis_dim = in_axis_dim / num;
for (int i = 0; i < num; ++i) {
output_lengths.push_back(out_axis_dim);
bool in_axis_dim_dynamic = false;
nvinfer1::ITensor* avg_len_tensor = nullptr;
// need infer output_lengths
if (num > 0 && output_lengths.empty()) {
if (input_dims.d[axis] > 0) {
int64_t in_axis_dim = input_dims.d[axis];
size_t out_axis_dim = in_axis_dim / num;
for (int i = 0; i < num; ++i) {
output_lengths.push_back(out_axis_dim);
}
} else {
in_axis_dim_dynamic = true;
auto* num_tensor = Add1DConstantLayer(num);
avg_len_tensor =
Div(GetEleTensorOfShape(shape_tensor, axis), num_tensor);
}
}
nvinfer1::ILayer* layer = nullptr;
#if IS_TRT_VERSION_GE(6000)
if (engine_->with_dynamic_shape()) {
nvinfer1::Dims trt_step_dims;
trt_step_dims.nbDims = input->getDimensions().nbDims;
for (int i = 0; i < trt_step_dims.nbDims; i++) trt_step_dims.d[i] = 1;
std::vector<int32_t> gather_indices;
gather_indices.resize(trt_step_dims.nbDims);
std::iota(gather_indices.begin(), gather_indices.end(), 0);
gather_indices[axis] = gather_indices.size();
std::vector<int32_t> zeros(trt_step_dims.nbDims, 0);
auto* zeros_tensor = Add1DConstantLayer(zeros);
// input : [N,C,H,W]
int start_point = 0;
for (size_t i = 0; i < output_num; i++) {
nvinfer1::ITensor* this_len_tensor = nullptr;
nvinfer1::ITensor* start_point_tensor = nullptr;
if (!in_axis_dim_dynamic) {
this_len_tensor = Add1DConstantLayer(output_lengths[i]);
start_point_tensor = Add1DConstantLayer(start_point);
start_point += output_lengths[i];
} else {
this_len_tensor = avg_len_tensor;
auto* i_tensor = Add1DConstantLayer(i);
start_point_tensor = Prod(i_tensor, avg_len_tensor);
}
std::vector<nvinfer1::ITensor*> concat_inputs1 = {zeros_tensor,
start_point_tensor};
std::vector<nvinfer1::ITensor*> concat_inputs2 = {shape_tensor,
this_len_tensor};
auto* start_tensor = Gather(Concat(concat_inputs1), gather_indices);
auto* size_tensor = Gather(Concat(concat_inputs2), gather_indices);
layer = TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, trt_step_dims,
trt_step_dims, trt_step_dims);
layer->setInput(1, *start_tensor);
layer->setInput(2, *size_tensor);
auto output_name = op_desc.Output("Out")[i];
RreplenishLayerAndOutput(layer, "split", {output_name}, test_mode);
}
} else {
auto chw_input_dims = input->getDimensions();
nvinfer1::Dims trt_start_dims;
trt_start_dims.nbDims = chw_input_dims.nbDims;
memset(trt_start_dims.d, 0, sizeof(int32_t) * chw_input_dims.nbDims);
nvinfer1::Dims trt_size_dims = chw_input_dims;
nvinfer1::Dims trt_step_dims;
trt_step_dims.nbDims = chw_input_dims.nbDims;
for (int i = 0; i < trt_step_dims.nbDims; i++) trt_step_dims.d[i] = 1;
// input : [C,H,W]
for (size_t i = 0; i < output_num; i++) {
trt_start_dims.d[axis] = std::accumulate(output_lengths.begin(),
output_lengths.begin() + i, 0);
trt_size_dims.d[axis] = output_lengths[i];
layer = TRT_ENGINE_ADD_LAYER(engine_, Slice, *input, trt_start_dims,
trt_size_dims, trt_step_dims);
auto output_name = op_desc.Output("Out")[i];
RreplenishLayerAndOutput(layer, "split", {output_name}, test_mode);
}
}
#else
if (engine_->with_dynamic_shape()) {
bool with_fp16 =
engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
plugin::SplitPluginDynamic* plugin =
new plugin::SplitPluginDynamic(axis, output_lengths, with_fp16);
layer = engine_->AddDynamicPlugin(&input, input_num, plugin);
layer = engine_->AddDynamicPlugin(&input, 1, plugin);
} else {
bool with_fp16 =
engine_->WithFp16() && !engine_->disable_trt_plugin_fp16();
plugin::SplitPlugin* plugin =
new plugin::SplitPlugin(axis, output_lengths, with_fp16);
layer = engine_->AddPluginV2Ext(&input, input_num, plugin);
layer = engine_->AddPluginV2Ext(&input, 1, plugin);
}
std::string layer_name = "split (Output: ";
std::vector<std::string> output_names;
for (size_t i = 0; i < output_num; i++) {
auto output_name = op_desc.Output("Out")[i];
layer->getOutput(i)->setName(output_name.c_str());
engine_->SetITensor(output_name, layer->getOutput(i));
layer_name += output_name;
if (test_mode) {
engine_->DeclareOutput(output_name);
}
output_names.push_back(op_desc.Output("Out")[i]);
}
layer->setName((layer_name + ")").c_str());
RreplenishLayerAndOutput(layer, "split", output_names, test_mode);
#endif
}
};
......
......@@ -686,7 +686,7 @@ class TensorRTEngine {
// them, and an macro like this is more extensible when underlying TensorRT
// library add new layer supports.
#define TRT_ENGINE_ADD_LAYER(engine__, layer__, ...) \
engine__->network()->add##layer__(__VA_ARGS__);
engine__->network()->add##layer__(__VA_ARGS__)
class TRTEngineManager {
public:
......
......@@ -1041,15 +1041,6 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
return false;
}
}
} else {
for (size_t i = 0; i < axes.size(); i++) {
if (starts[i] < 0 || ends[i] < 0) {
VLOG(3) << "Invalid slice attribute 'starts' or 'ends'. "
"Negative starts or ends not supported in TensorRT "
"when running in dynamic shape mode.";
return false;
}
}
}
}
}
......
......@@ -29,10 +29,7 @@ class TrtConvertSliceTest(TrtLayerAutoScanTest):
attrs = [
program_config.ops[i].attrs for i in range(len(program_config.ops))
]
for x in attrs[0]["decrease_axis"]:
if x < 0:
return False
out_shape = list(inputs['input_data'].shape)
for x in range(len(attrs[0]["axes"])):
start = 0
end = 0
......@@ -48,15 +45,20 @@ class TrtConvertSliceTest(TrtLayerAutoScanTest):
end = attrs[0]["ends"][x]
start = max(0, start)
end = max(0, end)
out_shape[attrs[0]["axes"][x]] = end - start
if start >= end:
return False
for x in attrs[0]["decrease_axis"]:
if x < 0:
return False
if (out_shape[x] != 1):
return False
return True
def sample_program_configs(self):
def generate_input1(attrs: List[Dict[str, Any]]):
return np.ones([6, 6, 64, 64]).astype(np.float32)
return np.random.random([6, 6, 64, 64]).astype(np.float32)
for axes in [[0, 1], [1, 3], [2, 3]]:
for starts in [[0, 1]]:
......
......@@ -73,13 +73,13 @@ class TrtConvertSplitTest(TrtLayerAutoScanTest):
def generate_input1(attrs: List[Dict[str, Any]], batch):
if self.dims == 4:
return np.ones([batch, 3, 3, 24]).astype(np.float32)
return np.random.random([batch, 3, 3, 24]).astype(np.float32)
elif self.dims == 3:
return np.ones([batch, 3, 24]).astype(np.float32)
return np.random.random([batch, 3, 24]).astype(np.float32)
elif self.dims == 2:
return np.ones([batch, 24]).astype(np.float32)
return np.random.random([batch, 24]).astype(np.float32)
elif self.dims == 1:
return np.ones([24]).astype(np.float32)
return np.random.random([24]).astype(np.float32)
def generate_AxisTensor(attrs: List[Dict[str, Any]]):
return np.ones([1]).astype(np.int32)
......@@ -162,25 +162,33 @@ class TrtConvertSplitTest(TrtLayerAutoScanTest):
def generate_dynamic_shape(attrs):
if self.dims == 4:
self.dynamic_shape.min_input_shape = {
"split_input": [1, 3, 3, 24]
"split_input": [1, 3 - 1, 3 - 1, 24 - 1]
}
self.dynamic_shape.max_input_shape = {
"split_input": [9, 3, 3, 24]
"split_input": [9, 3 + 1, 3 + 1, 24 + 1]
}
self.dynamic_shape.opt_input_shape = {
"split_input": [1, 3, 3, 24]
}
elif self.dims == 3:
self.dynamic_shape.min_input_shape = {"split_input": [1, 3, 24]}
self.dynamic_shape.max_input_shape = {"split_input": [9, 3, 24]}
self.dynamic_shape.min_input_shape = {
"split_input": [1, 3 - 1, 24 - 1]
}
self.dynamic_shape.max_input_shape = {
"split_input": [9, 3 + 1, 24 + 1]
}
self.dynamic_shape.opt_input_shape = {"split_input": [1, 3, 24]}
elif self.dims == 2:
self.dynamic_shape.min_input_shape = {"split_input": [1, 24]}
self.dynamic_shape.max_input_shape = {"split_input": [9, 24]}
self.dynamic_shape.min_input_shape = {
"split_input": [1, 24 - 1]
}
self.dynamic_shape.max_input_shape = {
"split_input": [9, 24 + 1]
}
self.dynamic_shape.opt_input_shape = {"split_input": [1, 24]}
elif self.dims == 1:
self.dynamic_shape.min_input_shape = {"split_input": [24]}
self.dynamic_shape.max_input_shape = {"split_input": [24]}
self.dynamic_shape.min_input_shape = {"split_input": [24 - 1]}
self.dynamic_shape.max_input_shape = {"split_input": [24 + 1]}
self.dynamic_shape.opt_input_shape = {"split_input": [24]}
def clear_dynamic_shape():
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册