未验证 提交 a9b7d1d2 编写于 作者: W wenbin 提交者: GitHub

Cherrypick (#36666)

上级 6ecfe806
......@@ -112,6 +112,18 @@ class Pool2dOpConverter : public OpConverter {
nvinfer1::DimsHW nv_paddings(paddings[0], paddings[1]);
nvinfer1::ILayer *layer = nullptr;
nvinfer1::DimsHW g_pre_pad(0, 0);
nvinfer1::DimsHW g_post_pad(0, 0);
// paddle Non ceil_mode : Output size = (input size - filter size + 2 *
// padding) / stride (stride size) + 1
// tensorrt EXPLICIT_ROUND_DOWN: O = floor((M - DK) / S) + 1
// so if M - DK < 0 we need extra padding
if (input_shape.d[input_dims - 2] - ksize[0] + 2 * paddings[0] < 0) {
g_post_pad.h() = strides[0] - 1;
}
if (input_shape.d[input_dims - 1] - ksize[1] + 2 * paddings[1] < 0) {
g_post_pad.w() = strides[1] - 1;
}
if (op_desc.HasAttr("enable_int8")) {
#if IS_TRT_VERSION_GE(5000)
......@@ -123,6 +135,20 @@ class Pool2dOpConverter : public OpConverter {
if (engine_->with_dynamic_shape()) {
if (!adaptive && !global_pooling && !ceil_mode) {
// input_shape.d < 0 means we can't get shape info here.
// we may suffer from issue if shape is not met finally.
if ((padding_algorithm != "SAME") &&
((g_post_pad.w() > 0 && input_shape.d[input_dims - 2] > 0) ||
(g_post_pad.h() > 0 && input_shape.d[input_dims - 1] > 0))) {
auto *pad_layer = TRT_ENGINE_ADD_LAYER(engine_, Padding, *input1,
g_pre_pad, g_post_pad);
PADDLE_ENFORCE_NOT_NULL(
pad_layer, platform::errors::Fatal(
"Pad layer in poolOp converter could not be "
"created. The pointer to pad layer is `NULL`."));
input1 = pad_layer->getOutput(0);
}
auto *pool_layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling, *input1,
nv_pool_type, nv_ksize);
pool_layer->setStride(nv_strides);
......@@ -157,8 +183,7 @@ class Pool2dOpConverter : public OpConverter {
if (global_pooling == true) {
nv_ksize.d[0] = input_shape.d[input_dims - 2];
nv_ksize.d[1] = input_shape.d[input_dims - 1];
auto *pool_layer = TRT_ENGINE_ADD_LAYER(
engine_, Pooling, *const_cast<nvinfer1::ITensor *>(input1),
auto *pool_layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling, *input1,
nv_pool_type, nv_ksize);
PADDLE_ENFORCE_NOT_NULL(
pool_layer, platform::errors::Fatal(
......@@ -181,27 +206,37 @@ class Pool2dOpConverter : public OpConverter {
}
if (!adaptive) {
// Under ceil mode, the pre_pad and post_pad are used to
// record the the padding size. In some ceil mode cases,
// we do not need padding, so we initialize the two vars to 0.
if (ceil_mode) {
nvinfer1::DimsHW pre_pad(0, 0);
nvinfer1::DimsHW post_pad(0, 0);
if (ceil_mode) {
// If ceil mode is true, we will pad the appropriate size to the input.
DealCeilMode(input_shape, ksize, strides, paddings, &pre_pad, &post_pad,
input_dims);
auto *pad_layer = TRT_ENGINE_ADD_LAYER(
engine_, Padding, *const_cast<nvinfer1::ITensor *>(input1), pre_pad,
post_pad);
auto *pad_layer =
TRT_ENGINE_ADD_LAYER(engine_, Padding, *input1, pre_pad, post_pad);
PADDLE_ENFORCE_NOT_NULL(
pad_layer, platform::errors::Fatal(
"Pad layer in poolOp converter could not be "
"created. The pointer to pad layer is `NULL`."));
input1 = pad_layer->getOutput(0);
}
auto *pool_layer = TRT_ENGINE_ADD_LAYER(
engine_, Pooling, *const_cast<nvinfer1::ITensor *>(input1),
#if IS_TRT_VERSION_GE(8000)
// Exclude padding pixels from the average mean is not supported well by
// TRT
// so enable padding for trt8.0 above.
if ((g_post_pad.w() > 0 || g_post_pad.h() > 0) &&
(padding_algorithm != "SAME") && !ceil_mode) {
auto *pad_layer = TRT_ENGINE_ADD_LAYER(engine_, Padding, *input1,
g_pre_pad, g_post_pad);
PADDLE_ENFORCE_NOT_NULL(
pad_layer, platform::errors::Fatal(
"Pad layer in poolOp converter could not be "
"created. The pointer to pad layer is `NULL`."));
input1 = pad_layer->getOutput(0);
}
#endif
auto *pool_layer = TRT_ENGINE_ADD_LAYER(engine_, Pooling, *input1,
nv_pool_type, nv_ksize);
PADDLE_ENFORCE_NOT_NULL(
pool_layer, platform::errors::Fatal(
......
......@@ -65,6 +65,7 @@ SlicePlugin::SlicePlugin(void const *serial_data, size_t serial_length) {
DeserializeValue(&serial_data, &serial_length, &starts_);
DeserializeValue(&serial_data, &serial_length, &ends_);
DeserializeValue(&serial_data, &serial_length, &axes_);
DeserializeValue(&serial_data, &serial_length, &with_fp16_);
cudaEventCreate(&copy_event_);
cudaStreamCreate(&copy_stream_);
}
......@@ -187,17 +188,17 @@ int SlicePlugin::enqueue(int batch_size, const void *const *inputs,
}
size_t SlicePlugin::getSerializationSize() const TRT_NOEXCEPT {
return getBaseSerializationSize() + SerializedSize(getPluginType()) +
SerializedSize(starts_) + SerializedSize(ends_) +
SerializedSize(axes_);
return getBaseSerializationSize() + SerializedSize(starts_) +
SerializedSize(ends_) + SerializedSize(axes_) +
SerializedSize(with_fp16_);
}
void SlicePlugin::serialize(void *buffer) const TRT_NOEXCEPT {
SerializeValue(&buffer, getPluginType());
serializeBase(buffer);
SerializeValue(&buffer, starts_);
SerializeValue(&buffer, ends_);
SerializeValue(&buffer, axes_);
SerializeValue(&buffer, with_fp16_);
}
// Dynamic Plugin below.
......
......@@ -86,5 +86,19 @@ class SlicePluginTRTTestFp16(SlicePluginTRTTest):
self.enable_trt = True
class StaticSlicePluginTRTTestFp16(SlicePluginTRTTest):
def setUpTensorRTParams(self):
self.trt_parameters = SlicePluginTRTTest.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Half, True, False)
self.enable_trt = True
class StaticSlicePluginTRTTestFp32(SlicePluginTRTTest):
def setUpTensorRTParams(self):
self.trt_parameters = SlicePluginTRTTest.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Float32, True, False)
self.enable_trt = True
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册