未验证 提交 9b54fe41 编写于 作者: P Pei Yang 提交者: GitHub

add trt transpose and flatten converter (#31022)

上级 4c9f96c9
...@@ -141,6 +141,10 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -141,6 +141,10 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("optim_input_shape", pass->Set("optim_input_shape",
new std::map<std::string, std::vector<int>>( new std::map<std::string, std::vector<int>>(
argument->optim_input_shape())); argument->optim_input_shape()));
bool with_dynamic_shape = argument->max_input_shape().size() > 0 &&
argument->min_input_shape().size() > 0 &&
argument->optim_input_shape().size() > 0;
pass->Set("with_dynamic_shape", new bool(with_dynamic_shape));
pass->Set("trt_disabled_ops", new std::vector<std::string>( pass->Set("trt_disabled_ops", new std::vector<std::string>(
argument->tensorrt_disabled_ops())); argument->tensorrt_disabled_ops()));
pass->Set("trt_use_dla", new bool(argument->tensorrt_use_dla())); pass->Set("trt_use_dla", new bool(argument->tensorrt_use_dla()));
......
...@@ -35,6 +35,7 @@ void analysis::TensorRtSubgraphPass::ApplyImpl( ...@@ -35,6 +35,7 @@ void analysis::TensorRtSubgraphPass::ApplyImpl(
auto use_calib_mode = Get<bool>("use_calib_mode"); auto use_calib_mode = Get<bool>("use_calib_mode");
bool no_calib_int8 = enable_int8 && !(use_calib_mode); bool no_calib_int8 = enable_int8 && !(use_calib_mode);
auto trt_disabled_ops = Get<std::vector<std::string>>("trt_disabled_ops"); auto trt_disabled_ops = Get<std::vector<std::string>>("trt_disabled_ops");
auto with_dynamic_shape = Get<bool>("with_dynamic_shape");
auto teller = [&](const framework::ir::Node *node) { auto teller = [&](const framework::ir::Node *node) {
if (!node->IsOp() || !node->Op()) return false; if (!node->IsOp() || !node->Op()) return false;
if (find(trt_disabled_ops.begin(), trt_disabled_ops.end(), if (find(trt_disabled_ops.begin(), trt_disabled_ops.end(),
...@@ -43,8 +44,8 @@ void analysis::TensorRtSubgraphPass::ApplyImpl( ...@@ -43,8 +44,8 @@ void analysis::TensorRtSubgraphPass::ApplyImpl(
<< " is diabled by config in TensorRT"; << " is diabled by config in TensorRT";
return false; return false;
} }
return tensorrt::OpTeller::Global().Tell(node->Op()->Type(), *node->Op(), return tensorrt::OpTeller::Global().Tell(node, no_calib_int8,
no_calib_int8); with_dynamic_shape);
}; };
framework::ir::SubGraphFuser fuser( framework::ir::SubGraphFuser fuser(
......
...@@ -1144,6 +1144,8 @@ USE_TRT_CONVERTER(elementwise_mul_tensor); ...@@ -1144,6 +1144,8 @@ USE_TRT_CONVERTER(elementwise_mul_tensor);
USE_TRT_CONVERTER(elementwise_max_tensor); USE_TRT_CONVERTER(elementwise_max_tensor);
USE_TRT_CONVERTER(elementwise_min_tensor); USE_TRT_CONVERTER(elementwise_min_tensor);
USE_TRT_CONVERTER(elementwise_pow_tensor); USE_TRT_CONVERTER(elementwise_pow_tensor);
USE_TRT_CONVERTER(transpose);
USE_TRT_CONVERTER(flatten);
USE_TRT_CONVERTER(matmul); USE_TRT_CONVERTER(matmul);
USE_TRT_CONVERTER(conv2d); USE_TRT_CONVERTER(conv2d);
USE_TRT_CONVERTER(relu); USE_TRT_CONVERTER(relu);
......
...@@ -3,50 +3,9 @@ nv_library(tensorrt_converter ...@@ -3,50 +3,9 @@ nv_library(tensorrt_converter
SRCS matmul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc SRCS matmul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc
batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc
pad_op.cc split_op.cc prelu_op.cc leaky_relu_op.cc gelu_op.cc layer_norm_op.cc multihead_matmul_op.cc pad_op.cc split_op.cc prelu_op.cc leaky_relu_op.cc gelu_op.cc layer_norm_op.cc multihead_matmul_op.cc
shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc transpose_op.cc flatten_op.cc
emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc clip_op.cc emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc clip_op.cc
DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry) DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry)
nv_test(test_op_converter SRCS test_op_converter.cc DEPS nv_test(test_op_converter SRCS test_op_converter.cc DEPS
paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine tensorrt_converter) paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine tensorrt_converter)
# TODO(xingzhaolong): fix the the following ci ut error.
#nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor)
#nv_test(test_trt_mul_op SRCS test_mul_op.cc mul_op.cc
# DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine mul_op)
#nv_test(test_trt_fc_op SRCS test_fc_op.cc fc_op.cc
# DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine mul_op)
#nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
# DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine activation_op)
#nv_test(test_trt_conv_op SRCS test_conv2d_op.cc conv2d_op.cc
# DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine conv_op conv_transpose_op)
#nv_test(test_trt_pool2d_op SRCS test_pool2d_op.cc pool2d_op.cc
# DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine pool_op tensorrt_plugin)
#nv_test(test_trt_elementwise_op SRCS test_elementwise_op.cc elementwise_op.cc
# DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine tensorrt_plugin
# elementwise_add_op elementwise_mul_op)
#nv_test(test_trt_softmax_op SRCS test_softmax_op.cc softmax_op.cc
# DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine softmax_op)
#nv_test(test_trt_batch_norm_op SRCS test_batch_norm_op.cc batch_norm_op.cc
# DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine batch_norm_op)
#nv_test(test_trt_concat_op SRCS test_concat_op.cc concat_op.cc
# DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine concat_op)
#nv_test(test_trt_dropout_op SRCS test_dropout_op.cc dropout_op.cc
# DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine dropout_op)
#nv_test(test_trt_pad_op SRCS test_pad_op.cc pad_op.cc
# DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine pad_op)
#nv_test(test_trt_split_op SRCS test_split_op.cc split_op.cc
# DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine tensorrt_plugin
# split_op concat_op)
#nv_test(test_trt_prelu_op SRCS test_prelu_op.cc prelu_op.cc
# DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine tensorrt_plugin
# prelu_op)
#nv_test(test_trt_leaky_relu_op SRCS test_leaky_relu_op.cc leaky_relu_op.cc
# DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine activation_op)
#nv_test(test_shuffle_channel_op SRCS test_shuffle_channel_op.cc shuffle_channel_op.cc
# DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine shuffle_channel_op)
#nv_test(test_swish_op SRCS test_swish_op.cc swish_op.cc
# DEPS paddle_framework ${GLOB_OPERATOR_DEPS} tensorrt_engine activation_op tensorrt_plugin)
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
namespace paddle {
namespace framework {
class Scope;
namespace proto {
class OpDesc;
} // namespace proto
} // namespace framework
} // namespace paddle
namespace paddle {
namespace inference {
namespace tensorrt {
/*
* FlattenOp, only support static shape mode currently.
*/
class FlattenOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
int dims = input->getDimensions().nbDims;
int dim_prod = 1;
for (int i = 0; i < dims; i++) {
int dim_i = input->getDimensions().d[i];
PADDLE_ENFORCE_GT(
dim_i, 0, platform::errors::InvalidArgument(
"flatten input dim should be > 0, but got %d.", dim_i));
dim_prod *= dim_i;
}
nvinfer1::Dims flatten_dim;
flatten_dim.nbDims = 1;
flatten_dim.d[0] = dim_prod;
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
layer->setReshapeDimensions(flatten_dim);
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "flatten", {output_name}, test_mode);
}
};
} // namespace tensorrt
} // namespace inference
} // namespace paddle
REGISTER_TRT_OP_CONVERTER(flatten, FlattenOpConverter);
...@@ -109,7 +109,18 @@ class OpConverter { ...@@ -109,7 +109,18 @@ class OpConverter {
it, platform::errors::Unimplemented("no OpConverter for optype [%s]", it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type())); op_desc.Type()));
} }
if (op_desc.Type() == "transpose2") {
it = Registry<OpConverter>::Global().Lookup("transpose");
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
}
if (op_desc.Type() == "flatten2") {
it = Registry<OpConverter>::Global().Lookup("flatten");
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
}
if (!it) { if (!it) {
it = Registry<OpConverter>::Global().Lookup(op_desc.Type()); it = Registry<OpConverter>::Global().Lookup(op_desc.Type());
} }
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <bitset>
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
namespace paddle {
namespace framework {
class Scope;
namespace proto {
class OpDesc;
} // namespace proto
} // namespace framework
} // namespace paddle
namespace paddle {
namespace inference {
namespace tensorrt {
/*
* TransposeOp
*/
class TransposeOpConverter : public OpConverter {
public:
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
int dims = input->getDimensions().nbDims;
std::vector<int> axis =
BOOST_GET_CONST(std::vector<int>, op_desc.GetAttr("axis"));
if (!engine_->with_dynamic_shape()) {
for (size_t i = 1; i < axis.size(); i++) {
axis[i]--;
}
}
nvinfer1::Permutation perm;
for (int i = 0; i < dims; i++) {
int j = engine_->with_dynamic_shape() ? i : i + 1;
perm.order[i] = axis[j];
}
// Permutation is valid if it has nbDims unique values from range [0,
// nbDims-1]
auto is_valid_permutation = [&](int dims,
const nvinfer1::Permutation& permutation) {
std::bitset<nvinfer1::Dims::MAX_DIMS> found;
for (int i = 0; i < dims; ++i) {
const int x = permutation.order[i];
if ((x < 0) || (x >= dims) || found[x])
return false; // Out of bounds or duplicate
found.set(x);
}
return true;
};
PADDLE_ENFORCE_EQ(is_valid_permutation(dims, perm), true,
platform::errors::InvalidArgument(
"Invalid permutation dimensions for trt transpose op "
"converter: duplicate or out of bound."));
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
layer->setFirstTranspose(perm);
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "transpose", {output_name}, test_mode);
}
};
} // namespace tensorrt
} // namespace inference
} // namespace paddle
REGISTER_TRT_OP_CONVERTER(transpose, TransposeOpConverter);
...@@ -102,11 +102,17 @@ struct SimpleOpTypeSetTeller : public Teller { ...@@ -102,11 +102,17 @@ struct SimpleOpTypeSetTeller : public Teller {
"layer_norm", "layer_norm",
"scale", "scale",
"stack", "stack",
"transpose2",
"transpose",
"flatten2",
"flatten",
}; };
}; };
bool OpTeller::Tell(const std::string& op_type, const framework::OpDesc& desc, bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
bool use_no_calib_int8) { bool with_dynamic_shape) {
const std::string op_type = node->Op()->Type();
const framework::OpDesc desc = *node->Op();
// do not support the op which is labeled the `skip_quant` // do not support the op which is labeled the `skip_quant`
if ((desc.HasAttr("namescope") && if ((desc.HasAttr("namescope") &&
BOOST_GET_CONST(std::string, desc.GetAttr("op_namescope")) == BOOST_GET_CONST(std::string, desc.GetAttr("op_namescope")) ==
...@@ -143,6 +149,26 @@ bool OpTeller::Tell(const std::string& op_type, const framework::OpDesc& desc, ...@@ -143,6 +149,26 @@ bool OpTeller::Tell(const std::string& op_type, const framework::OpDesc& desc,
} }
} }
} }
if (op_type == "transpose2" || op_type == "transpose") {
if (!desc.HasAttr("axis")) {
return false;
} else {
std::vector<int> axis =
BOOST_GET_CONST(std::vector<int>, desc.GetAttr("axis"));
if (!with_dynamic_shape && axis[0] != 0) return false;
if (axis.size() >= nvinfer1::Dims::MAX_DIMS) return false;
}
}
if (op_type == "flatten2" || op_type == "flatten") {
// flatten doesn't support dynamic shape currently
if (!desc.HasAttr("axis")) {
return false;
} else {
if (with_dynamic_shape) return false;
int axis = BOOST_GET_CONST(int, desc.GetAttr("axis"));
if (axis != 1) return false;
}
}
if ((*teller)(op_type, desc, use_no_calib_int8)) return true; if ((*teller)(op_type, desc, use_no_calib_int8)) return true;
} }
return false; return false;
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
#include <string> #include <string>
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include "paddle/fluid/framework/ir/node.h"
#include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/fluid/inference/tensorrt/engine.h"
...@@ -65,8 +65,8 @@ class OpTeller { ...@@ -65,8 +65,8 @@ class OpTeller {
return *x; return *x;
} }
bool Tell(const std::string& op_type, const framework::OpDesc& desc, bool Tell(const framework::ir::Node* node, bool use_no_calib_int8 = false,
bool use_no_calib_int8 = false); bool with_dynamic_shape = false);
private: private:
OpTeller(); OpTeller();
......
...@@ -287,6 +287,59 @@ class TensorRTSubgraphPassInstanceNormTest(InferencePassTest): ...@@ -287,6 +287,59 @@ class TensorRTSubgraphPassInstanceNormTest(InferencePassTest):
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass')) PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
class TensorRTSubgraphPassTransposeTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[-1, 6, 64, 64], dtype="float32")
transpose_out = self.append_transpose(data)
out = fluid.layers.batch_norm(transpose_out, is_test=True)
self.feeds = {
"data": np.random.random([1, 6, 64, 64]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassTransposeTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
def append_transpose(self, data):
return fluid.layers.transpose(data, [0, 3, 1, 2])
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
class TensorRTSubgraphPassFlattenTest(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[-1, 6, 64, 64], dtype="float32")
flatten_out = self.append_flatten(data)
reshape_out = fluid.layers.reshape(flatten_out, [-1, 0, 1, 1])
out = fluid.layers.batch_norm(reshape_out, is_test=True)
self.feeds = {
"data": np.random.random([1, 6, 64, 64]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TensorRTSubgraphPassFlattenTest.TensorRTParam(
1 << 30, 32, 0, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
def append_flatten(self, data):
return fluid.layers.flatten(data, axis=1)
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu)
self.assertTrue(
PassVersionChecker.IsCompatible('tensorrt_subgraph_pass'))
class TensorRTSubgraphPassLayerNormTest(InferencePassTest): class TensorRTSubgraphPassLayerNormTest(InferencePassTest):
def setUp(self): def setUp(self):
self.set_params() self.set_params()
......
...@@ -27,14 +27,15 @@ class TransposeFlattenConcatFusePassTRTTest(InferencePassTest): ...@@ -27,14 +27,15 @@ class TransposeFlattenConcatFusePassTRTTest(InferencePassTest):
name="data1", shape=[8, 32, 128], dtype="float32") name="data1", shape=[8, 32, 128], dtype="float32")
data2 = fluid.data( data2 = fluid.data(
name="data2", shape=[8, 32, 128], dtype="float32") name="data2", shape=[8, 32, 128], dtype="float32")
trans1 = fluid.layers.transpose(data1, perm=[2, 1, 0]) trans1 = fluid.layers.transpose(data1, perm=[0, 2, 1])
trans2 = fluid.layers.transpose(data2, perm=[2, 1, 0]) trans2 = fluid.layers.transpose(data2, perm=[0, 2, 1])
flatt1 = fluid.layers.flatten(trans1) flatt1 = fluid.layers.flatten(trans1)
flatt2 = fluid.layers.flatten(trans2) flatt2 = fluid.layers.flatten(trans2)
concat_out = fluid.layers.concat([flatt1, flatt2]) concat_out = fluid.layers.concat([flatt1, flatt2], axis=1)
# There is no parameters for above structure. # There is no parameters for above structure.
# Hence, append a batch_norm to avoid failure caused by load_combined. # Hence, append a batch_norm to avoid failure caused by load_combined.
out = fluid.layers.batch_norm(concat_out, is_test=True) reshape_out = fluid.layers.reshape(concat_out, [-1, 0, 1, 1])
out = fluid.layers.batch_norm(reshape_out, is_test=True)
self.feeds = { self.feeds = {
"data1": np.random.random([8, 32, 128]).astype("float32"), "data1": np.random.random([8, 32, 128]).astype("float32"),
...@@ -42,7 +43,7 @@ class TransposeFlattenConcatFusePassTRTTest(InferencePassTest): ...@@ -42,7 +43,7 @@ class TransposeFlattenConcatFusePassTRTTest(InferencePassTest):
} }
self.enable_trt = True self.enable_trt = True
self.trt_parameters = TransposeFlattenConcatFusePassTRTTest.TensorRTParam( self.trt_parameters = TransposeFlattenConcatFusePassTRTTest.TensorRTParam(
1 << 20, 8, 3, AnalysisConfig.Precision.Float32, False, False) 1 << 20, 8, 0, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out] self.fetch_list = [out]
def test_check_output(self): def test_check_output(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册