未验证 提交 17a2003d 编写于 作者: Z zhoutianzi666 提交者: GitHub

[Inference TRT] elementwise layer support (#43851)

* elementwise support

* commit
上级 ff70a269
...@@ -19,236 +19,115 @@ namespace paddle { ...@@ -19,236 +19,115 @@ namespace paddle {
namespace inference { namespace inference {
namespace tensorrt { namespace tensorrt {
static bool CheckDims(const nvinfer1::Dims& dims_x, class ElementwiseTensorOpConverter : public OpConverter {
const nvinfer1::Dims& dims_y) {
if (dims_x.nbDims != dims_y.nbDims) {
return false;
}
for (int i = 0; i < dims_x.nbDims; i++) {
if (dims_x.d[i] != dims_y.d[i]) {
return false;
}
}
return true;
}
class ElementwiseWeightOpConverter : public OpConverter {
public: public:
ElementwiseWeightOpConverter() {} ElementwiseTensorOpConverter() {}
void operator()(const framework::proto::OpDesc& op, void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override { const framework::Scope& scope,
// Here the two nullptr looks strange, that's because the bool test_mode) override {
// framework::OpDesc's constructor is strange. VLOG(3) << "Convert a fluid elementwise op to TensorRT IElementWiseLayer";
nvinfer1::ILayer* layer = nullptr;
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
VLOG(3) << "Convert a fluid elementwise op to TensorRT IScaleLayer";
auto* X = engine_->GetITensor(op_desc.Input("X").front()); auto* X = engine_->GetITensor(op_desc.Input("X").front());
nvinfer1::ITensor* Y = nullptr;
auto* Y_v = scope.FindVar(op_desc.Input("Y").front()); auto* Y_v = scope.FindVar(op_desc.Input("Y").front());
PADDLE_ENFORCE_NOT_NULL( if (Y_v) {
Y_v, platform::errors::NotFound("Variable %s not found in scope.", // Y is weight
op_desc.Input("Y").front().c_str())); auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>(); float* weight_data =
float* weight_data = nullptr; engine_->GetWeightCPUData(op_desc.Input("Y").front(), Y_t);
auto output_name = op_desc.Output("Out")[0]; std::vector<int> dims_y = phi::vectorize<int>(Y_t->dims());
weight_data = engine_->GetWeightCPUData(op_desc.Input("Y").front(), Y_t); TensorRTEngine::Weight y_weight{nvinfer1::DataType::kFLOAT,
nvinfer1::Dims dims_x = X->getDimensions(); static_cast<void*>(weight_data),
static_cast<size_t>(Y_t->numel())};
auto regist_eltwise_weight = [&](nvinfer1::ScaleMode scale_mode) { nvinfer1::Dims trt_dims_y;
TensorRTEngine::Weight shift_weights{nvinfer1::DataType::kFLOAT, trt_dims_y.nbDims = dims_y.size();
static_cast<void*>(weight_data), for (int i = 0; i < trt_dims_y.nbDims; i++) {
static_cast<size_t>(Y_t->numel())}; trt_dims_y.d[i] = dims_y[i];
TensorRTEngine::Weight scale_weights{nvinfer1::DataType::kFLOAT, nullptr,
0};
TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr,
0};
nvinfer1::IShuffleLayer* expand_layer = nullptr;
nvinfer1::IShuffleLayer* squeeze_layer = nullptr;
int dynamic_shape_offset = engine_->with_dynamic_shape() ? 1 : 0;
auto input_dim = X->getDimensions();
if (input_dim.nbDims < 3 + dynamic_shape_offset) {
nvinfer1::Dims expand_shape;
expand_shape.nbDims = 3 + dynamic_shape_offset;
for (int i = 0; i < expand_shape.nbDims; i++) {
if (i < input_dim.nbDims) {
expand_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i];
} else {
expand_shape.d[i] = 1;
}
}
expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
expand_layer->setReshapeDimensions(expand_shape);
X = expand_layer->getOutput(0);
expand_layer->getOutput(0)->setName(
("elementwise_reshape_out: " + output_name).c_str());
expand_layer->setName(
("Elewise: Shuffle: (Output: " + output_name + ")").c_str());
}
if (op_type_ == "add") {
nvinfer1::IScaleLayer* scale_layer = TRT_ENGINE_ADD_LAYER(
engine_, ScaleNd, *X, scale_mode, shift_weights.get(),
scale_weights.get(), power_weights.get(), dynamic_shape_offset);
layer = scale_layer;
} else if (op_type_ == "mul") {
nvinfer1::IScaleLayer* scale_layer = TRT_ENGINE_ADD_LAYER(
engine_, Scale, *X, scale_mode, scale_weights.get(),
shift_weights.get(), power_weights.get());
layer = scale_layer;
}
if (input_dim.nbDims < 3 + dynamic_shape_offset) {
nvinfer1::Dims squeeze_shape;
squeeze_shape.nbDims = input_dim.nbDims;
for (int i = 0; i < squeeze_shape.nbDims; i++) {
squeeze_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i];
}
squeeze_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0)));
squeeze_layer->setReshapeDimensions(squeeze_shape);
RreplenishLayerAndOutput(squeeze_layer, "elementwise_" + op_type_,
{output_name}, test_mode);
} else {
RreplenishLayerAndOutput(layer, "elementwise_" + op_type_,
{output_name}, test_mode);
}
};
if (engine_->with_dynamic_shape()) {
if (Y_t->dims().size() == 1) {
auto scale_mode = nvinfer1::ScaleMode::kCHANNEL;
PADDLE_ENFORCE_EQ(Y_t->dims()[0], dims_x.d[1],
platform::errors::InvalidArgument(
"The Bias's size(%d) should be equal to the "
"first dim(%d) of the Input.",
Y_t->dims()[0], dims_x.d[1]));
regist_eltwise_weight(scale_mode);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"The size of input bias's dims is %d, but TensorRT dynamic shape "
"only support size = 1 for Elementwise op!",
Y_t->dims().size()));
} }
return; Y = TRT_ENGINE_ADD_LAYER(engine_, Constant, trt_dims_y, y_weight.get())
->getOutput(0);
} else {
Y = engine_->GetITensor(op_desc.Input("Y").front());
} }
std::vector<int> no_batch_dims; if (X->getDimensions().nbDims < Y->getDimensions().nbDims) {
int start_index = 0; auto* tmp = X;
X = Y;
for (; start_index < dims_x.nbDims; start_index++) Y = tmp;
no_batch_dims.push_back(dims_x.d[start_index]); }
nvinfer1::Dims dims_x = X->getDimensions();
auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; nvinfer1::Dims dims_y = Y->getDimensions();
auto output_name = op_desc.Output("Out")[0];
std::vector<int> dims_y = phi::vectorize<int>(Y_t->dims()); // axis here is relative to explicit batch
if (dims_y.size() == no_batch_dims.size() + 1) { int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis"));
if (dims_y[0] == 1) dims_y.erase(dims_y.begin()); int real_x_rank = dims_x.nbDims;
int real_y_rank = dims_y.nbDims;
if (!engine_->with_dynamic_shape()) {
real_x_rank++;
real_y_rank++;
if (Y_v) real_y_rank--;
}
if (axis == -1) {
axis = real_x_rank - real_y_rank;
}
if (!engine_->with_dynamic_shape() && axis > 0) {
axis--;
} }
if (dims_y.size() == 1 && dims_y[0] == no_batch_dims[0]) { // X: - - - - - - -
scale_mode = nvinfer1::ScaleMode::kCHANNEL; // axis
} else if (dims_y.size() == no_batch_dims.size() && // Y: - - -
dims_y[0] == no_batch_dims[0]) { // we need expand Y's rank = X's rank
scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; int left_one_num = axis;
for (size_t i = 1; i < no_batch_dims.size(); i++) { int right_one_num = dims_x.nbDims - axis - dims_y.nbDims;
if (dims_y[i] != no_batch_dims[i]) { nvinfer1::IShuffleLayer* reshape_layer;
scale_mode = nvinfer1::ScaleMode::kCHANNEL; nvinfer1::ITensor* reshape_y_tensor;
break; if (left_one_num > 0 || right_one_num > 0) {
if (engine_->with_dynamic_shape()) {
auto* y_shape_tensor = Shape(Y);
auto* new_y_shape_tensor = y_shape_tensor;
if (axis > 0) {
std::vector<int32_t> left_one(left_one_num, 1);
auto* left_one_tensor = Add1DConstantLayer(left_one);
new_y_shape_tensor = Concat(std::vector<nvinfer1::ITensor*>{
left_one_tensor, new_y_shape_tensor});
} }
} if (right_one_num > 0) {
if (scale_mode == nvinfer1::ScaleMode::kCHANNEL) { std::vector<int32_t> right_one(right_one_num, 1);
for (size_t i = 1; i < no_batch_dims.size(); i++) { auto* right_one_tensor = Add1DConstantLayer(right_one);
if (dims_y[i] != 1) new_y_shape_tensor = Concat(std::vector<nvinfer1::ITensor*>{
PADDLE_THROW(platform::errors::InvalidArgument( new_y_shape_tensor, right_one_tensor});
"The bias's %d dim is %d, but TensorRT dynamic shape only "
"support it equals to 1 for Elementwise op!",
i, dims_y[i]));
} }
} reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *Y);
} else { reshape_layer->setInput(1, *new_y_shape_tensor);
if (dims_y.size() >= 1) {
PADDLE_THROW(platform::errors::InvalidArgument(
"The size of bias's dims is %d and bias's size is %d. TensorRT "
"doesn't support this shape for Elementwise op!",
dims_y.size(), dims_y[0]));
} else { } else {
PADDLE_THROW(platform::errors::InvalidArgument( nvinfer1::Dims new_y_dims;
"The size of bias's dims is %d. TensorRT doesn't support " new_y_dims.nbDims = left_one_num + dims_y.nbDims + right_one_num;
"this shape for Elementwise op!", for (int i = 0; i < new_y_dims.nbDims; i++) new_y_dims.d[i] = 1;
dims_y.size())); for (int i = 0; i < dims_y.nbDims; i++)
new_y_dims.d[left_one_num + i] = dims_y.d[i];
reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *Y);
reshape_layer->setReshapeDimensions(new_y_dims);
} }
reshape_y_tensor = reshape_layer->getOutput(0);
} else {
// In fact , we can remove this `else`, but -> rt_resnet50_test CI in trt
// 6015 faling, how ridiculous!
reshape_y_tensor = Y;
} }
regist_eltwise_weight(scale_mode);
}
protected:
std::string op_type_;
};
class ElementwiseTensorOpConverter : public OpConverter {
public:
ElementwiseTensorOpConverter() {}
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
auto op_pair = ops.find(op_type_); auto op_pair = ops.find(op_type_);
PADDLE_ENFORCE_NE(op_pair, ops.end(), PADDLE_ENFORCE_NE(op_pair,
ops.end(),
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"Elementwise op's type(%s) is not supported. Please " "Elementwise op's type(%s) is not supported. Please "
"check if the op_type is correct.", "check if the op_type is correct.",
op_type_)); op_type_));
// Here the two nullptr looks strange, that's because the auto* layer = TRT_ENGINE_ADD_LAYER(
// framework::OpDesc's constructor is strange. engine_, ElementWise, *X, *reshape_y_tensor, op_pair->second);
framework::OpDesc op_desc(op, nullptr); RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode);
nvinfer1::ILayer* layer = nullptr;
auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Y = engine_->GetITensor(op_desc.Input("Y").front());
std::vector<nvinfer1::ITensor*> itensors;
itensors.push_back(X);
itensors.push_back(Y);
nvinfer1::Dims dims_x = X->getDimensions();
nvinfer1::Dims dims_y = Y->getDimensions();
int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis"));
auto output_name = op_desc.Output("Out")[0];
auto common_func = [&](nvinfer1::ILayer* layer) {
RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode);
};
if (dims_x.nbDims == dims_y.nbDims) {
// The two input tensor should have the same dims
VLOG(3) << "Convert a fluid elementwise op to TensorRT IElementWiseLayer";
nvinfer1::IElementWiseLayer* elet_layer =
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *X, *Y, op_pair->second);
layer = elet_layer;
} else {
VLOG(3) << "Convert a fluid elementwise op to TensorRT "
"ElementWisePluginLayer";
if (engine_->with_dynamic_shape()) {
#if IS_TRT_VERSION_GE(6000)
plugin::ElementwisePluginDynamic* plugin =
new plugin::ElementwisePluginDynamic(op_type_, axis);
layer = engine_->AddDynamicPlugin(itensors.data(), 2, plugin);
#else
PADDLE_THROW(platform::errors::Fatal(
"You are running the TRT Dynamic Shape mode, need to confirm that "
"your TRT version is no less than 6.0"));
#endif
} else {
plugin::ElementWisePlugin* plugin =
new plugin::ElementWisePlugin(op_type_, dims_x, dims_y, axis);
std::vector<nvinfer1::ITensor*> inputs{X, Y};
auto* plugin_layer = engine_->AddPlugin(
inputs.data(), inputs.size(),
reinterpret_cast<plugin::PluginTensorRT*>(plugin));
layer = plugin_layer;
}
}
common_func(layer);
} }
protected: protected:
...@@ -268,16 +147,6 @@ const std::unordered_map<std::string, nvinfer1::ElementWiseOperation> ...@@ -268,16 +147,6 @@ const std::unordered_map<std::string, nvinfer1::ElementWiseOperation>
{"max", nvinfer1::ElementWiseOperation::kMAX}, {"max", nvinfer1::ElementWiseOperation::kMAX},
}; };
class ElementwiseWeightAddOpConverter : public ElementwiseWeightOpConverter {
public:
ElementwiseWeightAddOpConverter() { op_type_ = "add"; }
};
class ElementwiseWeightMulOpConverter : public ElementwiseWeightOpConverter {
public:
ElementwiseWeightMulOpConverter() { op_type_ = "mul"; }
};
class ElementwiseTensorAddOpConverter : public ElementwiseTensorOpConverter { class ElementwiseTensorAddOpConverter : public ElementwiseTensorOpConverter {
public: public:
ElementwiseTensorAddOpConverter() { op_type_ = "add"; } ElementwiseTensorAddOpConverter() { op_type_ = "add"; }
...@@ -318,9 +187,15 @@ class ElementwiseTensorPowOpConverter : public ElementwiseTensorOpConverter { ...@@ -318,9 +187,15 @@ class ElementwiseTensorPowOpConverter : public ElementwiseTensorOpConverter {
} // namespace paddle } // namespace paddle
REGISTER_TRT_OP_CONVERTER(elementwise_add_weight, REGISTER_TRT_OP_CONVERTER(elementwise_add_weight,
ElementwiseWeightAddOpConverter); ElementwiseTensorAddOpConverter);
REGISTER_TRT_OP_CONVERTER(elementwise_mul_weight, REGISTER_TRT_OP_CONVERTER(elementwise_mul_weight,
ElementwiseWeightMulOpConverter); ElementwiseTensorMulOpConverter);
REGISTER_TRT_OP_CONVERTER(elementwise_sub_weight,
ElementwiseTensorSubOpConverter);
REGISTER_TRT_OP_CONVERTER(elementwise_div_weight,
ElementwiseTensorDivOpConverter);
REGISTER_TRT_OP_CONVERTER(elementwise_pow_weight,
ElementwiseTensorPowOpConverter);
REGISTER_TRT_OP_CONVERTER(elementwise_add_tensor, REGISTER_TRT_OP_CONVERTER(elementwise_add_tensor,
ElementwiseTensorAddOpConverter); ElementwiseTensorAddOpConverter);
......
...@@ -18,6 +18,7 @@ limitations under the License. */ ...@@ -18,6 +18,7 @@ limitations under the License. */
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
...@@ -46,14 +47,16 @@ class OpConverter { ...@@ -46,14 +47,16 @@ class OpConverter {
// test_mode: whether the instance executes in an unit test. // test_mode: whether the instance executes in an unit test.
void ConvertOp(const framework::proto::OpDesc& op, void ConvertOp(const framework::proto::OpDesc& op,
const std::unordered_set<std::string>& parameters, const std::unordered_set<std::string>& parameters,
const framework::Scope& scope, TensorRTEngine* engine, const framework::Scope& scope,
TensorRTEngine* engine,
bool test_mode = false) { bool test_mode = false) {
framework::OpDesc op_desc(op, nullptr); framework::OpDesc op_desc(op, nullptr);
OpConverter* it{nullptr}; OpConverter* it{nullptr};
if (op_desc.Type() == "mul") { if (op_desc.Type() == "mul") {
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL, PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(),
1UL,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The input op mul's Input(\"Y\")." "The input op mul's Input(\"Y\")."
"size() should equal to 1, but reveceid " "size() should equal to 1, but reveceid "
...@@ -67,11 +70,10 @@ class OpConverter { ...@@ -67,11 +70,10 @@ class OpConverter {
if (op_desc.Type().find("elementwise") != std::string::npos) { if (op_desc.Type().find("elementwise") != std::string::npos) {
static std::unordered_set<std::string> add_tensor_op_set{ static std::unordered_set<std::string> add_tensor_op_set{
"add", "mul", "sub", "div", "max", "min", "pow"}; "add", "mul", "sub", "div", "max", "min", "pow"};
// TODO(xingzhaolong): all mul, sub, div static std::unordered_set<std::string> add_weight_op_set{
// static std::unordered_set<std::string> add_weight_op_set {"add", "mul", "add", "mul", "sub", "div", "pow"};
// "sub", "div"}; PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(),
static std::unordered_set<std::string> add_weight_op_set{"add", "mul"}; 1UL,
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The input op's Input(\"Y\")." "The input op's Input(\"Y\")."
"size() should equal to 1, but reveceid " "size() should equal to 1, but reveceid "
...@@ -82,64 +84,74 @@ class OpConverter { ...@@ -82,64 +84,74 @@ class OpConverter {
std::string Y = op_desc.Input("Y")[0]; std::string Y = op_desc.Input("Y")[0];
if (parameters.count(Y)) { if (parameters.count(Y)) {
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
add_weight_op_set.count(op_type), 0, add_weight_op_set.count(op_type),
0,
platform::errors::Unimplemented("Unsupported elementwise type %s", platform::errors::Unimplemented("Unsupported elementwise type %s",
op_type.c_str())); op_type.c_str()));
it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type + it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type +
"_weight"); "_weight");
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented( it,
"no OpConverter for optype [%s]", op_desc.Type())); platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
} else { } else {
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
add_tensor_op_set.count(op_type), 0, add_tensor_op_set.count(op_type),
0,
platform::errors::Unimplemented("Unsupported elementwise type %s", platform::errors::Unimplemented("Unsupported elementwise type %s",
op_type.c_str())); op_type.c_str()));
it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type + it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type +
"_tensor"); "_tensor");
} }
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]", it,
op_desc.Type())); platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
} }
if (op_desc.Type() == "depthwise_conv2d") { if (op_desc.Type() == "depthwise_conv2d") {
it = Registry<OpConverter>::Global().Lookup("conv2d"); it = Registry<OpConverter>::Global().Lookup("conv2d");
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]", it,
op_desc.Type())); platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
} }
if (op_desc.Type() == "depthwise_conv2d_transpose") { if (op_desc.Type() == "depthwise_conv2d_transpose") {
it = Registry<OpConverter>::Global().Lookup("conv2d_transpose"); it = Registry<OpConverter>::Global().Lookup("conv2d_transpose");
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]", it,
op_desc.Type())); platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
} }
if (op_desc.Type() == "transpose2") { if (op_desc.Type() == "transpose2") {
it = Registry<OpConverter>::Global().Lookup("transpose"); it = Registry<OpConverter>::Global().Lookup("transpose");
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]", it,
op_desc.Type())); platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
} }
if (op_desc.Type() == "flatten2") { if (op_desc.Type() == "flatten2") {
it = Registry<OpConverter>::Global().Lookup("flatten"); it = Registry<OpConverter>::Global().Lookup("flatten");
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]", it,
op_desc.Type())); platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
} }
// reshape2 == reshape // reshape2 == reshape
if (op_desc.Type() == "reshape2") { if (op_desc.Type() == "reshape2") {
it = Registry<OpConverter>::Global().Lookup("reshape"); it = Registry<OpConverter>::Global().Lookup("reshape");
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]", it,
op_desc.Type())); platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
} }
if (!it) { if (!it) {
it = Registry<OpConverter>::Global().Lookup(op_desc.Type()); it = Registry<OpConverter>::Global().Lookup(op_desc.Type());
} }
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]", it,
op_desc.Type())); platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
it->SetEngine(engine); it->SetEngine(engine);
(*it)(op, scope, test_mode); (*it)(op, scope, test_mode);
...@@ -215,7 +227,8 @@ class OpConverter { ...@@ -215,7 +227,8 @@ class OpConverter {
// the INetwork's inputs and outputs should specified in some other modules. // the INetwork's inputs and outputs should specified in some other modules.
void ConvertBlock(const framework::proto::BlockDesc& block, void ConvertBlock(const framework::proto::BlockDesc& block,
const std::unordered_set<std::string>& parameters, const std::unordered_set<std::string>& parameters,
const framework::Scope& scope, TensorRTEngine* engine) { const framework::Scope& scope,
TensorRTEngine* engine) {
std::unique_lock<std::mutex> lk(mut_); std::unique_lock<std::mutex> lk(mut_);
for (int i = 0; i < block.ops_size(); i++) { for (int i = 0; i < block.ops_size(); i++) {
const auto& op = block.ops(i); const auto& op = block.ops(i);
...@@ -225,20 +238,24 @@ class OpConverter { ...@@ -225,20 +238,24 @@ class OpConverter {
// The scope here should be inited with the parameter vars. // The scope here should be inited with the parameter vars.
void ConvertBlockToTRTEngine( void ConvertBlockToTRTEngine(
framework::BlockDesc* block_desc, const framework::Scope& scope, framework::BlockDesc* block_desc,
const framework::Scope& scope,
const std::vector<std::string>& inputs, const std::vector<std::string>& inputs,
const std::unordered_set<std::string>& parameters, const std::unordered_set<std::string>& parameters,
const std::vector<std::string>& outputs, TensorRTEngine* engine) { const std::vector<std::string>& outputs,
TensorRTEngine* engine) {
engine->InitNetwork(); engine->InitNetwork();
bool all_dynamic_shape_set = true; bool all_dynamic_shape_set = true;
for (auto& input : inputs) { for (auto& input : inputs) {
if (parameters.count(input)) continue; if (parameters.count(input)) continue;
auto* var = block_desc->FindVar(input); auto* var = block_desc->FindVar(input);
PADDLE_ENFORCE_NOT_NULL( PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::NotFound("no variable called %s in block.", var,
input.c_str())); platform::errors::NotFound("no variable called %s in block.",
input.c_str()));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
var->GetType(), FluidDT::VarType_Type_LOD_TENSOR, var->GetType(),
FluidDT::VarType_Type_LOD_TENSOR,
platform::errors::InvalidArgument("TensorRT engine only takes " platform::errors::InvalidArgument("TensorRT engine only takes "
"LoDTensor as input")); "LoDTensor as input"));
auto var_shape = var->GetShape(); auto var_shape = var->GetShape();
...@@ -263,25 +280,29 @@ class OpConverter { ...@@ -263,25 +280,29 @@ class OpConverter {
} else { } else {
input_shape.push_back(min_input_shape[i]); input_shape.push_back(min_input_shape[i]);
// the i dimension should be same. // the i dimension should be same.
PADDLE_ENFORCE_EQ(min_input_shape[i], optim_input_shape[i], PADDLE_ENFORCE_EQ(min_input_shape[i],
optim_input_shape[i],
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The dim (%d) of the min_input_shape and " "The dim (%d) of the min_input_shape and "
"optim_input_shape should be same.")); "optim_input_shape should be same."));
} }
} }
engine->DeclareInput( engine->DeclareInput(
input, FluidDataType2TRT( input,
var->Proto()->type().lod_tensor().tensor().data_type()), FluidDataType2TRT(
var->Proto()->type().lod_tensor().tensor().data_type()),
Vec2TRT_Dims(input_shape, input, true)); Vec2TRT_Dims(input_shape, input, true));
#endif #endif
} else { } else {
engine->DeclareInput( engine->DeclareInput(
input, FluidDataType2TRT( input,
var->Proto()->type().lod_tensor().tensor().data_type()), FluidDataType2TRT(
var->Proto()->type().lod_tensor().tensor().data_type()),
Vec2TRT_Dims(var_shape, input)); Vec2TRT_Dims(var_shape, input));
} }
} }
PADDLE_ENFORCE_EQ(all_dynamic_shape_set, true, PADDLE_ENFORCE_EQ(all_dynamic_shape_set,
true,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"some trt inputs dynamic shape info not set, " "some trt inputs dynamic shape info not set, "
"check the INFO log above for more details.")); "check the INFO log above for more details."));
...@@ -294,20 +315,221 @@ class OpConverter { ...@@ -294,20 +315,221 @@ class OpConverter {
engine->ClearWeights(); engine->ClearWeights();
} }
// rank(result) = rank(input)
nvinfer1::ITensor* Gather(nvinfer1::ITensor* input,
const std::vector<int32_t> indices,
int axis = 0) {
auto* indices_tensor = Add1DConstantLayer(indices, " ");
auto* result =
TRT_ENGINE_ADD_LAYER(engine_, Gather, *input, *indices_tensor, axis)
->getOutput(0);
return result;
}
// paddle allows negative index
// for axis length = 5, paddle allows [-5, 4]
nvinfer1::ITensor* FixNegIndices(nvinfer1::ITensor* input_shape,
nvinfer1::ITensor* indices) {
int rank = input_shape->getDimensions().nbDims;
std::vector<int32_t> zero = std::vector<int32_t>(rank, 0);
std::vector<int32_t> minus_one = std::vector<int32_t>(rank, -1);
nvinfer1::ITensor* zero_tensor = Add1DConstantLayer(zero);
nvinfer1::ITensor* minus_one_tensor = Add1DConstantLayer(minus_one);
// -1, 0
auto* sign = Max(Min(indices, zero_tensor), minus_one_tensor);
return Sub(indices, Prod(sign, input_shape));
}
nvinfer1::ITensor* Shape(nvinfer1::ITensor* input) {
return TRT_ENGINE_ADD_LAYER(engine_, Shape, *input)->getOutput(0);
}
// Concat not make rank changed
nvinfer1::ITensor* Concat(const std::vector<nvinfer1::ITensor*>& inputs,
int axis = 0) {
auto* layer = TRT_ENGINE_ADD_LAYER(
engine_, Concatenation, inputs.data(), inputs.size());
if (axis != 0) layer->setAxis(axis);
nvinfer1::ITensor* c = layer->getOutput(0);
return c;
}
nvinfer1::ITensor* Sum(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(
engine_, ElementWise, *a, *b, nvinfer1::ElementWiseOperation::kSUM)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Prod(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(
engine_, ElementWise, *a, *b, nvinfer1::ElementWiseOperation::kPROD)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Min(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(
engine_, ElementWise, *a, *b, nvinfer1::ElementWiseOperation::kMIN)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Max(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(
engine_, ElementWise, *a, *b, nvinfer1::ElementWiseOperation::kMAX)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Sub(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(
engine_, ElementWise, *a, *b, nvinfer1::ElementWiseOperation::kSUB)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Div(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(
engine_, ElementWise, *a, *b, nvinfer1::ElementWiseOperation::kDIV)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Act(nvinfer1::ITensor* a,
nvinfer1::ActivationType act_type) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(engine_, Activation, *a, act_type)->getOutput(0);
return c;
}
// Get element tensor of 1D shape tensor
nvinfer1::ITensor* GetEleTensorOfShape(nvinfer1::ITensor* shape_tensor,
int index,
bool is_scalar = false) {
auto* tensor =
TRT_ENGINE_ADD_LAYER(engine_,
Gather,
*shape_tensor,
*Add1DConstantLayer(index, " ", is_scalar),
0)
->getOutput(0);
return tensor;
}
// Create and add Multi-D constant float layer
nvinfer1::ITensor* AddConstantLayer(const float* data,
const std::vector<int32_t>& weight_dims,
const std::string& weight_name) {
std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
int data_size = std::accumulate(
weight_dims.begin(), weight_dims.end(), 1, std::multiplies<int>());
tmp_tensor->Resize({data_size});
auto* tmp_data = tmp_tensor->mutable_data<float>(platform::CPUPlace());
for (int i = 0; i < data_size; i++) {
tmp_data[i] = data[i];
}
engine_->SetWeights(weight_name, std::move(tmp_tensor));
TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT,
static_cast<void*>(tmp_data),
static_cast<size_t>(data_size)};
nvinfer1::Dims trt_dims;
trt_dims.nbDims = weight_dims.size();
for (size_t i = 0; i < weight_dims.size(); i++)
trt_dims.d[i] = weight_dims[i];
auto const_layer =
TRT_ENGINE_ADD_LAYER(engine_, Constant, trt_dims, weight.get());
return const_layer->getOutput(0);
}
// Create and add 1D constant float layer
nvinfer1::ITensor* Add1DConstantLayer(const std::vector<float>& data,
const std::string& weight_name = "",
bool scalar = false) {
std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
int data_size = data.size();
tmp_tensor->Resize({data_size});
auto* tmp_data = tmp_tensor->mutable_data<float>(platform::CPUPlace());
for (int i = 0; i < data_size; i++) {
tmp_data[i] = data[i];
}
engine_->SetWeights(weight_name, std::move(tmp_tensor));
TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT,
static_cast<void*>(tmp_data),
static_cast<size_t>(data_size)};
nvinfer1::Dims input_shape;
input_shape.nbDims = scalar ? 0 : 1;
input_shape.d[0] = data_size;
auto const_layer =
TRT_ENGINE_ADD_LAYER(engine_, Constant, input_shape, weight.get());
return const_layer->getOutput(0);
}
// Create and add 1D constant layer
nvinfer1::ITensor* Add1DConstantLayer(const std::vector<int>& data,
const std::string& weight_name = "",
bool scalar = false) {
std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
int data_size = data.size();
tmp_tensor->Resize({data_size});
auto* tmp_data = tmp_tensor->mutable_data<int>(platform::CPUPlace());
for (int i = 0; i < data_size; i++) {
tmp_data[i] = data[i];
}
engine_->SetWeights(weight_name, std::move(tmp_tensor));
TensorRTEngine::Weight weight{nvinfer1::DataType::kINT32,
static_cast<void*>(tmp_data),
static_cast<size_t>(data_size)};
nvinfer1::Dims input_shape;
input_shape.nbDims = scalar ? 0 : 1;
input_shape.d[0] = data_size;
auto const_layer =
TRT_ENGINE_ADD_LAYER(engine_, Constant, input_shape, weight.get());
return const_layer->getOutput(0);
}
nvinfer1::ITensor* Add1DConstantLayer(nvinfer1::Dims data,
const std::string& weight_name = "",
bool scalar = false) {
std::vector<int> tmp_data;
for (int i = 0; i < data.nbDims; i++) tmp_data.push_back(data.d[i]);
return Add1DConstantLayer(tmp_data, weight_name, scalar);
}
nvinfer1::ITensor* Add1DConstantLayer(int32_t data,
const std::string& weight_name = "",
bool scalar = false) {
std::vector<int> tmp_data;
tmp_data.push_back(data);
return Add1DConstantLayer(tmp_data, weight_name, scalar);
}
void RreplenishLayerAndOutput( void RreplenishLayerAndOutput(
nvinfer1::ILayer* layer, const std::string& layer_type, nvinfer1::ILayer* layer,
const std::string& layer_type,
const std::vector<std::string>& output_tensor_names, const std::vector<std::string>& output_tensor_names,
bool test_mode = false) { bool test_mode = false) {
size_t num_out = output_tensor_names.size(); size_t num_out = output_tensor_names.size();
std::string layer_name = layer_type + " (Output: ";
for (size_t i = 0; i < num_out; i++) { for (size_t i = 0; i < num_out; i++) {
layer->getOutput(i)->setName(output_tensor_names[i].c_str()); layer->getOutput(i)->setName(output_tensor_names[i].c_str());
engine_->SetITensor(output_tensor_names[i], layer->getOutput(i)); engine_->SetITensor(output_tensor_names[i], layer->getOutput(i));
if (test_mode) { if (test_mode) {
engine_->DeclareOutput(output_tensor_names[i]); engine_->DeclareOutput(output_tensor_names[i]);
} }
layer_name += output_tensor_names[i];
if (i != num_out - 1) layer_name += ", ";
} }
layer->setName( layer->setName((layer_name + ")").c_str());
(layer_type + " (Output: " + output_tensor_names[0] + ")").c_str());
} }
void SetEngine(TensorRTEngine* engine) { engine_ = engine; } void SetEngine(TensorRTEngine* engine) { engine_ = engine; }
......
...@@ -66,13 +66,16 @@ TRT_DT FluidDataType2TRT(FluidDT type) { ...@@ -66,13 +66,16 @@ TRT_DT FluidDataType2TRT(FluidDT type) {
// The T can be int32 or int64 type. // The T can be int32 or int64 type.
template <typename T> template <typename T>
nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input, nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape,
std::string input,
bool with_dynamic_shape = false) { bool with_dynamic_shape = false) {
PADDLE_ENFORCE_GT(shape.size(), 0UL, PADDLE_ENFORCE_GT(shape.size(),
0UL,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"TensorRT's tensor input requires at least 1 " "TensorRT's tensor input requires at least 1 "
"dimensions, but input %s has %d dims.", "dimensions, but input %s has %d dims.",
input, shape.size())); input,
shape.size()));
auto ShapeStr = [](const std::vector<T>& shape) { auto ShapeStr = [](const std::vector<T>& shape) {
std::ostringstream os; std::ostringstream os;
...@@ -93,7 +96,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input, ...@@ -93,7 +96,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"The input [%s] shape of trt subgraph is %s, please enable " "The input [%s] shape of trt subgraph is %s, please enable "
"trt dynamic_shape mode by SetTRTDynamicShapeInfo.", "trt dynamic_shape mode by SetTRTDynamicShapeInfo.",
input, ShapeStr(shape))); input,
ShapeStr(shape)));
} }
return nvinfer1::Dims3(shape[1], shape[2], shape[3]); return nvinfer1::Dims3(shape[1], shape[2], shape[3]);
} else if (shape.size() == 5UL) { } else if (shape.size() == 5UL) {
...@@ -101,7 +105,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input, ...@@ -101,7 +105,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"The input [%s] shape of trt subgraph is %s, please enable " "The input [%s] shape of trt subgraph is %s, please enable "
"trt dynamic_shape mode by SetTRTDynamicShapeInfo.", "trt dynamic_shape mode by SetTRTDynamicShapeInfo.",
input, ShapeStr(shape))); input,
ShapeStr(shape)));
} }
return nvinfer1::Dims4(shape[1], shape[2], shape[3], shape[4]); return nvinfer1::Dims4(shape[1], shape[2], shape[3], shape[4]);
} else if (shape.size() == 3UL) { } else if (shape.size() == 3UL) {
...@@ -109,7 +114,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input, ...@@ -109,7 +114,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"The input [%s] shape of trt subgraph is %s, please enable " "The input [%s] shape of trt subgraph is %s, please enable "
"trt dynamic_shape mode by SetTRTDynamicShapeInfo.", "trt dynamic_shape mode by SetTRTDynamicShapeInfo.",
input, ShapeStr(shape))); input,
ShapeStr(shape)));
} }
return nvinfer1::Dims2(shape[1], shape[2]); return nvinfer1::Dims2(shape[1], shape[2]);
} else if (shape.size() == 2UL) { } else if (shape.size() == 2UL) {
...@@ -117,7 +123,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input, ...@@ -117,7 +123,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"The input [%s] shape of trt subgraph is %s, please enable " "The input [%s] shape of trt subgraph is %s, please enable "
"trt dynamic_shape mode by SetTRTDynamicShapeInfo.", "trt dynamic_shape mode by SetTRTDynamicShapeInfo.",
input, ShapeStr(shape))); input,
ShapeStr(shape)));
} }
nvinfer1::Dims dims; nvinfer1::Dims dims;
dims.nbDims = 1; dims.nbDims = 1;
...@@ -125,11 +132,13 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input, ...@@ -125,11 +132,13 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
return dims; return dims;
} }
// static shape doesn't support 1D op so far. // static shape doesn't support 1D op so far.
PADDLE_ENFORCE_NE(shape.size(), 1UL, PADDLE_ENFORCE_NE(shape.size(),
1UL,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The input [%s] shape of trt subgraph is %s." "The input [%s] shape of trt subgraph is %s."
"it's not supported by trt so far", "it's not supported by trt so far",
input, ShapeStr(shape))); input,
ShapeStr(shape)));
nvinfer1::Dims dims; nvinfer1::Dims dims;
dims.nbDims = shape.size() - 1; dims.nbDims = shape.size() - 1;
...@@ -151,7 +160,7 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input, ...@@ -151,7 +160,7 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
return dims; return dims;
} }
} }
} // NOLINT } // namespace
class TRTInt8Calibrator; class TRTInt8Calibrator;
...@@ -184,9 +193,11 @@ class TensorRTEngine { ...@@ -184,9 +193,11 @@ class TensorRTEngine {
}; };
TensorRTEngine( TensorRTEngine(
int max_batch, int max_workspace, int max_batch,
int max_workspace,
AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32, AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32,
TRTInt8Calibrator* calibrator = nullptr, int device_id = 0, TRTInt8Calibrator* calibrator = nullptr,
int device_id = 0,
const ShapeMapType min_input_shape = {}, const ShapeMapType min_input_shape = {},
const ShapeMapType max_input_shape = {}, const ShapeMapType max_input_shape = {},
const ShapeMapType optim_input_shape = {}, const ShapeMapType optim_input_shape = {},
...@@ -205,17 +216,21 @@ class TensorRTEngine { ...@@ -205,17 +216,21 @@ class TensorRTEngine {
if (min_input_shape_.size() != 0 && max_input_shape_.size() != 0 && if (min_input_shape_.size() != 0 && max_input_shape_.size() != 0 &&
optim_input_shape_.size() != 0) { optim_input_shape_.size() != 0) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
min_input_shape_.size(), max_input_shape_.size(), min_input_shape_.size(),
max_input_shape_.size(),
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The min_input_shape_'s size(%d) should be equal to the " "The min_input_shape_'s size(%d) should be equal to the "
"size(%d) of max_input_shape_", "size(%d) of max_input_shape_",
min_input_shape_.size(), max_input_shape_.size())); min_input_shape_.size(),
max_input_shape_.size()));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
min_input_shape_.size(), optim_input_shape_.size(), min_input_shape_.size(),
optim_input_shape_.size(),
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The min_input_shape_'s size(%d) should be equal to the " "The min_input_shape_'s size(%d) should be equal to the "
"size(%d) of optim_input_shape_", "size(%d) of optim_input_shape_",
min_input_shape_.size(), optim_input_shape_.size())); min_input_shape_.size(),
optim_input_shape_.size()));
#if IS_TRT_VERSION_GE(6000) #if IS_TRT_VERSION_GE(6000)
with_dynamic_shape_ = true; with_dynamic_shape_ = true;
#else #else
...@@ -242,7 +257,8 @@ class TensorRTEngine { ...@@ -242,7 +257,8 @@ class TensorRTEngine {
const nvinfer1::Dims& dim); const nvinfer1::Dims& dim);
// Set the offset-th output from a layer as the network's output, and set its // Set the offset-th output from a layer as the network's output, and set its
// name. // name.
void DeclareOutput(const nvinfer1::ILayer* layer, int offset, void DeclareOutput(const nvinfer1::ILayer* layer,
int offset,
const std::string& name); const std::string& name);
// Set the itensor_map_[name] as the network's output, and set its name. // Set the itensor_map_[name] as the network's output, and set its name.
void DeclareOutput(const std::string& name); void DeclareOutput(const std::string& name);
...@@ -374,7 +390,8 @@ class TensorRTEngine { ...@@ -374,7 +390,8 @@ class TensorRTEngine {
int GetDeviceId() { return device_id_; } int GetDeviceId() { return device_id_; }
nvinfer1::IPluginV2Layer* AddPlugin(nvinfer1::ITensor* const* inputs, nvinfer1::IPluginV2Layer* AddPlugin(nvinfer1::ITensor* const* inputs,
int num_inputs, plugin::PluginTensorRT*); int num_inputs,
plugin::PluginTensorRT*);
nvinfer1::IPluginV2Layer* AddPluginV2Ext(nvinfer1::ITensor* const* inputs, nvinfer1::IPluginV2Layer* AddPluginV2Ext(nvinfer1::ITensor* const* inputs,
int num_inputs, int num_inputs,
...@@ -431,7 +448,8 @@ class TensorRTEngine { ...@@ -431,7 +448,8 @@ class TensorRTEngine {
// After finishing adding ops, freeze this network and creates the execution // After finishing adding ops, freeze this network and creates the execution
// environment. // environment.
void FreezeNetwork(); void FreezeNetwork();
void Execute(int batch_size, std::vector<void*>* buffers, void Execute(int batch_size,
std::vector<void*>* buffers,
cudaStream_t stream = nullptr); cudaStream_t stream = nullptr);
nvinfer1::INetworkDefinition* network() { return infer_network_.get(); } nvinfer1::INetworkDefinition* network() { return infer_network_.get(); }
...@@ -448,15 +466,20 @@ class TensorRTEngine { ...@@ -448,15 +466,20 @@ class TensorRTEngine {
auto name = it.first; auto name = it.first;
auto input_shape = it.second; auto input_shape = it.second;
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
min_input_shape_.count(name), true, min_input_shape_.count(name),
true,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"TRT dynamic_shape min_input_shape %s not found.", name)); "TRT dynamic_shape min_input_shape %s not found.", name));
PADDLE_ENFORCE_EQ(min_input_shape_[name].size(), input_shape.size(), PADDLE_ENFORCE_EQ(min_input_shape_[name].size(),
input_shape.size(),
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"TRT dynamic_shape min_input_shape %s size not " "TRT dynamic_shape min_input_shape %s size not "
"equal, the min_input_shape[%s].size()=%d" "equal, the min_input_shape[%s].size()=%d"
", but the runtime_input_shape[%s].size()=%d.", ", but the runtime_input_shape[%s].size()=%d.",
name, name, min_input_shape_[name].size(), name, name,
name,
min_input_shape_[name].size(),
name,
input_shape.size())); input_shape.size()));
auto bak_min_shape = min_input_shape_[name]; auto bak_min_shape = min_input_shape_[name];
auto bak_max_shape = max_input_shape_[name]; auto bak_max_shape = max_input_shape_[name];
...@@ -497,7 +520,8 @@ class TensorRTEngine { ...@@ -497,7 +520,8 @@ class TensorRTEngine {
#if IS_TRT_VERSION_GE(6000) #if IS_TRT_VERSION_GE(6000)
nvinfer1::IPluginV2Layer* AddDynamicPlugin( nvinfer1::IPluginV2Layer* AddDynamicPlugin(
nvinfer1::ITensor* const* inputs, int num_inputs, nvinfer1::ITensor* const* inputs,
int num_inputs,
plugin::DynamicPluginTensorRT* plugin) { plugin::DynamicPluginTensorRT* plugin) {
owned_pluginv2_.emplace_back(plugin); owned_pluginv2_.emplace_back(plugin);
return network()->addPluginV2(inputs, num_inputs, *plugin); return network()->addPluginV2(inputs, num_inputs, *plugin);
...@@ -524,7 +548,8 @@ class TensorRTEngine { ...@@ -524,7 +548,8 @@ class TensorRTEngine {
void Set(const std::string& attr_name, AttrType* attr) { void Set(const std::string& attr_name, AttrType* attr) {
if (attrs_.count(attr_name) == 0) { if (attrs_.count(attr_name) == 0) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
attrs_.count(attr_name), 0, attrs_.count(attr_name),
0,
platform::errors::AlreadyExists( platform::errors::AlreadyExists(
"Attribute %s already set in trt engine.", attr_name)); "Attribute %s already set in trt engine.", attr_name));
} else { } else {
...@@ -543,7 +568,8 @@ class TensorRTEngine { ...@@ -543,7 +568,8 @@ class TensorRTEngine {
template <typename AttrType> template <typename AttrType>
void SetNotOwned(const std::string& attr_name, AttrType* attr) { void SetNotOwned(const std::string& attr_name, AttrType* attr) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
attrs_.count(attr_name), 0, attrs_.count(attr_name),
0,
platform::errors::AlreadyExists( platform::errors::AlreadyExists(
"Attribute %s already set in trt engine.", attr_name)); "Attribute %s already set in trt engine.", attr_name));
attrs_[attr_name] = attr; attrs_[attr_name] = attr;
...@@ -552,7 +578,8 @@ class TensorRTEngine { ...@@ -552,7 +578,8 @@ class TensorRTEngine {
// Get a reference to the attributed previously set. // Get a reference to the attributed previously set.
template <typename AttrType> template <typename AttrType>
AttrType& Get(const std::string& attr_name) const { AttrType& Get(const std::string& attr_name) const {
PADDLE_ENFORCE_NE(attrs_.find(attr_name), attrs_.end(), PADDLE_ENFORCE_NE(attrs_.find(attr_name),
attrs_.end(),
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"Attribute %s not found in trt engine.", attr_name)); "Attribute %s not found in trt engine.", attr_name));
try { try {
...@@ -574,7 +601,8 @@ class TensorRTEngine { ...@@ -574,7 +601,8 @@ class TensorRTEngine {
}; };
PADDLE_THROW(platform::errors::InvalidArgument( PADDLE_THROW(platform::errors::InvalidArgument(
"Invalid type for attritube %s, expected: %s, actual: %s.", attr_name, "Invalid type for attritube %s, expected: %s, actual: %s.",
attr_name,
TypeToString(typeid(AttrType*)), TypeToString(typeid(AttrType*)),
TypeToString(attrs_.at(attr_name).type()))); TypeToString(attrs_.at(attr_name).type())));
} }
...@@ -672,7 +700,7 @@ class TensorRTEngine { ...@@ -672,7 +700,7 @@ class TensorRTEngine {
// them, and an macro like this is more extensible when underlying TensorRT // them, and an macro like this is more extensible when underlying TensorRT
// library add new layer supports. // library add new layer supports.
#define TRT_ENGINE_ADD_LAYER(engine__, layer__, ...) \ #define TRT_ENGINE_ADD_LAYER(engine__, layer__, ...) \
engine__->network()->add##layer__(__VA_ARGS__); engine__->network()->add##layer__(__VA_ARGS__)
class TRTEngineManager { class TRTEngineManager {
public: public:
...@@ -687,18 +715,27 @@ class TRTEngineManager { ...@@ -687,18 +715,27 @@ class TRTEngineManager {
} }
TensorRTEngine* Create( TensorRTEngine* Create(
std::string name, int max_batch, int max_workspace, std::string name,
int max_batch,
int max_workspace,
AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32, AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32,
TRTInt8Calibrator* calibrator = nullptr, int device_id = 0, TRTInt8Calibrator* calibrator = nullptr,
int device_id = 0,
const std::map<std::string, std::vector<int>> min_input_shape = {}, const std::map<std::string, std::vector<int>> min_input_shape = {},
const std::map<std::string, std::vector<int>> max_input_shape = {}, const std::map<std::string, std::vector<int>> max_input_shape = {},
const std::map<std::string, std::vector<int>> optim_input_shape = {}, const std::map<std::string, std::vector<int>> optim_input_shape = {},
bool disable_trt_plugin_fp16 = false, bool disable_trt_plugin_fp16 = false,
nvinfer1::ILogger& logger = NaiveLogger::Global()) { nvinfer1::ILogger& logger = NaiveLogger::Global()) {
auto* p = auto* p = new TensorRTEngine(max_batch,
new TensorRTEngine(max_batch, max_workspace, precision, calibrator, max_workspace,
device_id, min_input_shape, max_input_shape, precision,
optim_input_shape, disable_trt_plugin_fp16, logger); calibrator,
device_id,
min_input_shape,
max_input_shape,
optim_input_shape,
disable_trt_plugin_fp16,
logger);
engines_[name].reset(p); engines_[name].reset(p);
return p; return p;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册