未验证 提交 17a2003d 编写于 作者: Z zhoutianzi666 提交者: GitHub

[Inference TRT] elementwise layer support (#43851)

* elementwise support

* commit
上级 ff70a269
......@@ -19,236 +19,115 @@ namespace paddle {
namespace inference {
namespace tensorrt {
static bool CheckDims(const nvinfer1::Dims& dims_x,
const nvinfer1::Dims& dims_y) {
if (dims_x.nbDims != dims_y.nbDims) {
return false;
}
for (int i = 0; i < dims_x.nbDims; i++) {
if (dims_x.d[i] != dims_y.d[i]) {
return false;
}
}
return true;
}
class ElementwiseWeightOpConverter : public OpConverter {
class ElementwiseTensorOpConverter : public OpConverter {
public:
ElementwiseWeightOpConverter() {}
ElementwiseTensorOpConverter() {}
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
// Here the two nullptr looks strange, that's because the
// framework::OpDesc's constructor is strange.
nvinfer1::ILayer* layer = nullptr;
const framework::Scope& scope,
bool test_mode) override {
VLOG(3) << "Convert a fluid elementwise op to TensorRT IElementWiseLayer";
framework::OpDesc op_desc(op, nullptr);
VLOG(3) << "Convert a fluid elementwise op to TensorRT IScaleLayer";
auto* X = engine_->GetITensor(op_desc.Input("X").front());
nvinfer1::ITensor* Y = nullptr;
auto* Y_v = scope.FindVar(op_desc.Input("Y").front());
PADDLE_ENFORCE_NOT_NULL(
Y_v, platform::errors::NotFound("Variable %s not found in scope.",
op_desc.Input("Y").front().c_str()));
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
float* weight_data = nullptr;
auto output_name = op_desc.Output("Out")[0];
weight_data = engine_->GetWeightCPUData(op_desc.Input("Y").front(), Y_t);
nvinfer1::Dims dims_x = X->getDimensions();
auto regist_eltwise_weight = [&](nvinfer1::ScaleMode scale_mode) {
TensorRTEngine::Weight shift_weights{nvinfer1::DataType::kFLOAT,
static_cast<void*>(weight_data),
static_cast<size_t>(Y_t->numel())};
TensorRTEngine::Weight scale_weights{nvinfer1::DataType::kFLOAT, nullptr,
0};
TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr,
0};
nvinfer1::IShuffleLayer* expand_layer = nullptr;
nvinfer1::IShuffleLayer* squeeze_layer = nullptr;
int dynamic_shape_offset = engine_->with_dynamic_shape() ? 1 : 0;
auto input_dim = X->getDimensions();
if (input_dim.nbDims < 3 + dynamic_shape_offset) {
nvinfer1::Dims expand_shape;
expand_shape.nbDims = 3 + dynamic_shape_offset;
for (int i = 0; i < expand_shape.nbDims; i++) {
if (i < input_dim.nbDims) {
expand_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i];
} else {
expand_shape.d[i] = 1;
}
}
expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
expand_layer->setReshapeDimensions(expand_shape);
X = expand_layer->getOutput(0);
expand_layer->getOutput(0)->setName(
("elementwise_reshape_out: " + output_name).c_str());
expand_layer->setName(
("Elewise: Shuffle: (Output: " + output_name + ")").c_str());
}
if (op_type_ == "add") {
nvinfer1::IScaleLayer* scale_layer = TRT_ENGINE_ADD_LAYER(
engine_, ScaleNd, *X, scale_mode, shift_weights.get(),
scale_weights.get(), power_weights.get(), dynamic_shape_offset);
layer = scale_layer;
} else if (op_type_ == "mul") {
nvinfer1::IScaleLayer* scale_layer = TRT_ENGINE_ADD_LAYER(
engine_, Scale, *X, scale_mode, scale_weights.get(),
shift_weights.get(), power_weights.get());
layer = scale_layer;
}
if (input_dim.nbDims < 3 + dynamic_shape_offset) {
nvinfer1::Dims squeeze_shape;
squeeze_shape.nbDims = input_dim.nbDims;
for (int i = 0; i < squeeze_shape.nbDims; i++) {
squeeze_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i];
}
squeeze_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0)));
squeeze_layer->setReshapeDimensions(squeeze_shape);
RreplenishLayerAndOutput(squeeze_layer, "elementwise_" + op_type_,
{output_name}, test_mode);
} else {
RreplenishLayerAndOutput(layer, "elementwise_" + op_type_,
{output_name}, test_mode);
}
};
if (engine_->with_dynamic_shape()) {
if (Y_t->dims().size() == 1) {
auto scale_mode = nvinfer1::ScaleMode::kCHANNEL;
PADDLE_ENFORCE_EQ(Y_t->dims()[0], dims_x.d[1],
platform::errors::InvalidArgument(
"The Bias's size(%d) should be equal to the "
"first dim(%d) of the Input.",
Y_t->dims()[0], dims_x.d[1]));
regist_eltwise_weight(scale_mode);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"The size of input bias's dims is %d, but TensorRT dynamic shape "
"only support size = 1 for Elementwise op!",
Y_t->dims().size()));
if (Y_v) {
// Y is weight
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
float* weight_data =
engine_->GetWeightCPUData(op_desc.Input("Y").front(), Y_t);
std::vector<int> dims_y = phi::vectorize<int>(Y_t->dims());
TensorRTEngine::Weight y_weight{nvinfer1::DataType::kFLOAT,
static_cast<void*>(weight_data),
static_cast<size_t>(Y_t->numel())};
nvinfer1::Dims trt_dims_y;
trt_dims_y.nbDims = dims_y.size();
for (int i = 0; i < trt_dims_y.nbDims; i++) {
trt_dims_y.d[i] = dims_y[i];
}
return;
Y = TRT_ENGINE_ADD_LAYER(engine_, Constant, trt_dims_y, y_weight.get())
->getOutput(0);
} else {
Y = engine_->GetITensor(op_desc.Input("Y").front());
}
std::vector<int> no_batch_dims;
int start_index = 0;
for (; start_index < dims_x.nbDims; start_index++)
no_batch_dims.push_back(dims_x.d[start_index]);
auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE;
if (X->getDimensions().nbDims < Y->getDimensions().nbDims) {
auto* tmp = X;
X = Y;
Y = tmp;
}
nvinfer1::Dims dims_x = X->getDimensions();
nvinfer1::Dims dims_y = Y->getDimensions();
auto output_name = op_desc.Output("Out")[0];
std::vector<int> dims_y = phi::vectorize<int>(Y_t->dims());
if (dims_y.size() == no_batch_dims.size() + 1) {
if (dims_y[0] == 1) dims_y.erase(dims_y.begin());
// axis here is relative to explicit batch
int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis"));
int real_x_rank = dims_x.nbDims;
int real_y_rank = dims_y.nbDims;
if (!engine_->with_dynamic_shape()) {
real_x_rank++;
real_y_rank++;
if (Y_v) real_y_rank--;
}
if (axis == -1) {
axis = real_x_rank - real_y_rank;
}
if (!engine_->with_dynamic_shape() && axis > 0) {
axis--;
}
if (dims_y.size() == 1 && dims_y[0] == no_batch_dims[0]) {
scale_mode = nvinfer1::ScaleMode::kCHANNEL;
} else if (dims_y.size() == no_batch_dims.size() &&
dims_y[0] == no_batch_dims[0]) {
scale_mode = nvinfer1::ScaleMode::kELEMENTWISE;
for (size_t i = 1; i < no_batch_dims.size(); i++) {
if (dims_y[i] != no_batch_dims[i]) {
scale_mode = nvinfer1::ScaleMode::kCHANNEL;
break;
// X: - - - - - - -
// axis
// Y: - - -
// we need expand Y's rank = X's rank
int left_one_num = axis;
int right_one_num = dims_x.nbDims - axis - dims_y.nbDims;
nvinfer1::IShuffleLayer* reshape_layer;
nvinfer1::ITensor* reshape_y_tensor;
if (left_one_num > 0 || right_one_num > 0) {
if (engine_->with_dynamic_shape()) {
auto* y_shape_tensor = Shape(Y);
auto* new_y_shape_tensor = y_shape_tensor;
if (axis > 0) {
std::vector<int32_t> left_one(left_one_num, 1);
auto* left_one_tensor = Add1DConstantLayer(left_one);
new_y_shape_tensor = Concat(std::vector<nvinfer1::ITensor*>{
left_one_tensor, new_y_shape_tensor});
}
}
if (scale_mode == nvinfer1::ScaleMode::kCHANNEL) {
for (size_t i = 1; i < no_batch_dims.size(); i++) {
if (dims_y[i] != 1)
PADDLE_THROW(platform::errors::InvalidArgument(
"The bias's %d dim is %d, but TensorRT dynamic shape only "
"support it equals to 1 for Elementwise op!",
i, dims_y[i]));
if (right_one_num > 0) {
std::vector<int32_t> right_one(right_one_num, 1);
auto* right_one_tensor = Add1DConstantLayer(right_one);
new_y_shape_tensor = Concat(std::vector<nvinfer1::ITensor*>{
new_y_shape_tensor, right_one_tensor});
}
}
} else {
if (dims_y.size() >= 1) {
PADDLE_THROW(platform::errors::InvalidArgument(
"The size of bias's dims is %d and bias's size is %d. TensorRT "
"doesn't support this shape for Elementwise op!",
dims_y.size(), dims_y[0]));
reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *Y);
reshape_layer->setInput(1, *new_y_shape_tensor);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"The size of bias's dims is %d. TensorRT doesn't support "
"this shape for Elementwise op!",
dims_y.size()));
nvinfer1::Dims new_y_dims;
new_y_dims.nbDims = left_one_num + dims_y.nbDims + right_one_num;
for (int i = 0; i < new_y_dims.nbDims; i++) new_y_dims.d[i] = 1;
for (int i = 0; i < dims_y.nbDims; i++)
new_y_dims.d[left_one_num + i] = dims_y.d[i];
reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *Y);
reshape_layer->setReshapeDimensions(new_y_dims);
}
reshape_y_tensor = reshape_layer->getOutput(0);
} else {
// In fact , we can remove this `else`, but -> rt_resnet50_test CI in trt
// 6015 faling, how ridiculous!
reshape_y_tensor = Y;
}
regist_eltwise_weight(scale_mode);
}
protected:
std::string op_type_;
};
class ElementwiseTensorOpConverter : public OpConverter {
public:
ElementwiseTensorOpConverter() {}
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
auto op_pair = ops.find(op_type_);
PADDLE_ENFORCE_NE(op_pair, ops.end(),
PADDLE_ENFORCE_NE(op_pair,
ops.end(),
platform::errors::InvalidArgument(
"Elementwise op's type(%s) is not supported. Please "
"check if the op_type is correct.",
op_type_));
// Here the two nullptr looks strange, that's because the
// framework::OpDesc's constructor is strange.
framework::OpDesc op_desc(op, nullptr);
nvinfer1::ILayer* layer = nullptr;
auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Y = engine_->GetITensor(op_desc.Input("Y").front());
std::vector<nvinfer1::ITensor*> itensors;
itensors.push_back(X);
itensors.push_back(Y);
nvinfer1::Dims dims_x = X->getDimensions();
nvinfer1::Dims dims_y = Y->getDimensions();
int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis"));
auto output_name = op_desc.Output("Out")[0];
auto common_func = [&](nvinfer1::ILayer* layer) {
RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode);
};
if (dims_x.nbDims == dims_y.nbDims) {
// The two input tensor should have the same dims
VLOG(3) << "Convert a fluid elementwise op to TensorRT IElementWiseLayer";
nvinfer1::IElementWiseLayer* elet_layer =
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *X, *Y, op_pair->second);
layer = elet_layer;
} else {
VLOG(3) << "Convert a fluid elementwise op to TensorRT "
"ElementWisePluginLayer";
if (engine_->with_dynamic_shape()) {
#if IS_TRT_VERSION_GE(6000)
plugin::ElementwisePluginDynamic* plugin =
new plugin::ElementwisePluginDynamic(op_type_, axis);
layer = engine_->AddDynamicPlugin(itensors.data(), 2, plugin);
#else
PADDLE_THROW(platform::errors::Fatal(
"You are running the TRT Dynamic Shape mode, need to confirm that "
"your TRT version is no less than 6.0"));
#endif
} else {
plugin::ElementWisePlugin* plugin =
new plugin::ElementWisePlugin(op_type_, dims_x, dims_y, axis);
std::vector<nvinfer1::ITensor*> inputs{X, Y};
auto* plugin_layer = engine_->AddPlugin(
inputs.data(), inputs.size(),
reinterpret_cast<plugin::PluginTensorRT*>(plugin));
layer = plugin_layer;
}
}
common_func(layer);
auto* layer = TRT_ENGINE_ADD_LAYER(
engine_, ElementWise, *X, *reshape_y_tensor, op_pair->second);
RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode);
}
protected:
......@@ -268,16 +147,6 @@ const std::unordered_map<std::string, nvinfer1::ElementWiseOperation>
{"max", nvinfer1::ElementWiseOperation::kMAX},
};
class ElementwiseWeightAddOpConverter : public ElementwiseWeightOpConverter {
public:
ElementwiseWeightAddOpConverter() { op_type_ = "add"; }
};
class ElementwiseWeightMulOpConverter : public ElementwiseWeightOpConverter {
public:
ElementwiseWeightMulOpConverter() { op_type_ = "mul"; }
};
class ElementwiseTensorAddOpConverter : public ElementwiseTensorOpConverter {
public:
ElementwiseTensorAddOpConverter() { op_type_ = "add"; }
......@@ -318,9 +187,15 @@ class ElementwiseTensorPowOpConverter : public ElementwiseTensorOpConverter {
} // namespace paddle
REGISTER_TRT_OP_CONVERTER(elementwise_add_weight,
ElementwiseWeightAddOpConverter);
ElementwiseTensorAddOpConverter);
REGISTER_TRT_OP_CONVERTER(elementwise_mul_weight,
ElementwiseWeightMulOpConverter);
ElementwiseTensorMulOpConverter);
REGISTER_TRT_OP_CONVERTER(elementwise_sub_weight,
ElementwiseTensorSubOpConverter);
REGISTER_TRT_OP_CONVERTER(elementwise_div_weight,
ElementwiseTensorDivOpConverter);
REGISTER_TRT_OP_CONVERTER(elementwise_pow_weight,
ElementwiseTensorPowOpConverter);
REGISTER_TRT_OP_CONVERTER(elementwise_add_tensor,
ElementwiseTensorAddOpConverter);
......
......@@ -18,6 +18,7 @@ limitations under the License. */
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
......@@ -46,14 +47,16 @@ class OpConverter {
// test_mode: whether the instance executes in an unit test.
void ConvertOp(const framework::proto::OpDesc& op,
const std::unordered_set<std::string>& parameters,
const framework::Scope& scope, TensorRTEngine* engine,
const framework::Scope& scope,
TensorRTEngine* engine,
bool test_mode = false) {
framework::OpDesc op_desc(op, nullptr);
OpConverter* it{nullptr};
if (op_desc.Type() == "mul") {
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL,
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(),
1UL,
platform::errors::InvalidArgument(
"The input op mul's Input(\"Y\")."
"size() should equal to 1, but reveceid "
......@@ -67,11 +70,10 @@ class OpConverter {
if (op_desc.Type().find("elementwise") != std::string::npos) {
static std::unordered_set<std::string> add_tensor_op_set{
"add", "mul", "sub", "div", "max", "min", "pow"};
// TODO(xingzhaolong): all mul, sub, div
// static std::unordered_set<std::string> add_weight_op_set {"add", "mul",
// "sub", "div"};
static std::unordered_set<std::string> add_weight_op_set{"add", "mul"};
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL,
static std::unordered_set<std::string> add_weight_op_set{
"add", "mul", "sub", "div", "pow"};
PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(),
1UL,
platform::errors::InvalidArgument(
"The input op's Input(\"Y\")."
"size() should equal to 1, but reveceid "
......@@ -82,64 +84,74 @@ class OpConverter {
std::string Y = op_desc.Input("Y")[0];
if (parameters.count(Y)) {
PADDLE_ENFORCE_GT(
add_weight_op_set.count(op_type), 0,
add_weight_op_set.count(op_type),
0,
platform::errors::Unimplemented("Unsupported elementwise type %s",
op_type.c_str()));
it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type +
"_weight");
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented(
"no OpConverter for optype [%s]", op_desc.Type()));
it,
platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
} else {
PADDLE_ENFORCE_GT(
add_tensor_op_set.count(op_type), 0,
add_tensor_op_set.count(op_type),
0,
platform::errors::Unimplemented("Unsupported elementwise type %s",
op_type.c_str()));
it = Registry<OpConverter>::Global().Lookup("elementwise_" + op_type +
"_tensor");
}
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
it,
platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
}
if (op_desc.Type() == "depthwise_conv2d") {
it = Registry<OpConverter>::Global().Lookup("conv2d");
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
it,
platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
}
if (op_desc.Type() == "depthwise_conv2d_transpose") {
it = Registry<OpConverter>::Global().Lookup("conv2d_transpose");
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
it,
platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
}
if (op_desc.Type() == "transpose2") {
it = Registry<OpConverter>::Global().Lookup("transpose");
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
it,
platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
}
if (op_desc.Type() == "flatten2") {
it = Registry<OpConverter>::Global().Lookup("flatten");
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
it,
platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
}
// reshape2 == reshape
if (op_desc.Type() == "reshape2") {
it = Registry<OpConverter>::Global().Lookup("reshape");
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
it,
platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
}
if (!it) {
it = Registry<OpConverter>::Global().Lookup(op_desc.Type());
}
PADDLE_ENFORCE_NOT_NULL(
it, platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
it,
platform::errors::Unimplemented("no OpConverter for optype [%s]",
op_desc.Type()));
it->SetEngine(engine);
(*it)(op, scope, test_mode);
......@@ -215,7 +227,8 @@ class OpConverter {
// the INetwork's inputs and outputs should specified in some other modules.
void ConvertBlock(const framework::proto::BlockDesc& block,
const std::unordered_set<std::string>& parameters,
const framework::Scope& scope, TensorRTEngine* engine) {
const framework::Scope& scope,
TensorRTEngine* engine) {
std::unique_lock<std::mutex> lk(mut_);
for (int i = 0; i < block.ops_size(); i++) {
const auto& op = block.ops(i);
......@@ -225,20 +238,24 @@ class OpConverter {
// The scope here should be inited with the parameter vars.
void ConvertBlockToTRTEngine(
framework::BlockDesc* block_desc, const framework::Scope& scope,
framework::BlockDesc* block_desc,
const framework::Scope& scope,
const std::vector<std::string>& inputs,
const std::unordered_set<std::string>& parameters,
const std::vector<std::string>& outputs, TensorRTEngine* engine) {
const std::vector<std::string>& outputs,
TensorRTEngine* engine) {
engine->InitNetwork();
bool all_dynamic_shape_set = true;
for (auto& input : inputs) {
if (parameters.count(input)) continue;
auto* var = block_desc->FindVar(input);
PADDLE_ENFORCE_NOT_NULL(
var, platform::errors::NotFound("no variable called %s in block.",
input.c_str()));
var,
platform::errors::NotFound("no variable called %s in block.",
input.c_str()));
PADDLE_ENFORCE_EQ(
var->GetType(), FluidDT::VarType_Type_LOD_TENSOR,
var->GetType(),
FluidDT::VarType_Type_LOD_TENSOR,
platform::errors::InvalidArgument("TensorRT engine only takes "
"LoDTensor as input"));
auto var_shape = var->GetShape();
......@@ -263,25 +280,29 @@ class OpConverter {
} else {
input_shape.push_back(min_input_shape[i]);
// the i dimension should be same.
PADDLE_ENFORCE_EQ(min_input_shape[i], optim_input_shape[i],
PADDLE_ENFORCE_EQ(min_input_shape[i],
optim_input_shape[i],
platform::errors::InvalidArgument(
"The dim (%d) of the min_input_shape and "
"optim_input_shape should be same."));
}
}
engine->DeclareInput(
input, FluidDataType2TRT(
var->Proto()->type().lod_tensor().tensor().data_type()),
input,
FluidDataType2TRT(
var->Proto()->type().lod_tensor().tensor().data_type()),
Vec2TRT_Dims(input_shape, input, true));
#endif
} else {
engine->DeclareInput(
input, FluidDataType2TRT(
var->Proto()->type().lod_tensor().tensor().data_type()),
input,
FluidDataType2TRT(
var->Proto()->type().lod_tensor().tensor().data_type()),
Vec2TRT_Dims(var_shape, input));
}
}
PADDLE_ENFORCE_EQ(all_dynamic_shape_set, true,
PADDLE_ENFORCE_EQ(all_dynamic_shape_set,
true,
platform::errors::InvalidArgument(
"some trt inputs dynamic shape info not set, "
"check the INFO log above for more details."));
......@@ -294,20 +315,221 @@ class OpConverter {
engine->ClearWeights();
}
// rank(result) = rank(input)
nvinfer1::ITensor* Gather(nvinfer1::ITensor* input,
const std::vector<int32_t> indices,
int axis = 0) {
auto* indices_tensor = Add1DConstantLayer(indices, " ");
auto* result =
TRT_ENGINE_ADD_LAYER(engine_, Gather, *input, *indices_tensor, axis)
->getOutput(0);
return result;
}
// paddle allows negative index
// for axis length = 5, paddle allows [-5, 4]
nvinfer1::ITensor* FixNegIndices(nvinfer1::ITensor* input_shape,
nvinfer1::ITensor* indices) {
int rank = input_shape->getDimensions().nbDims;
std::vector<int32_t> zero = std::vector<int32_t>(rank, 0);
std::vector<int32_t> minus_one = std::vector<int32_t>(rank, -1);
nvinfer1::ITensor* zero_tensor = Add1DConstantLayer(zero);
nvinfer1::ITensor* minus_one_tensor = Add1DConstantLayer(minus_one);
// -1, 0
auto* sign = Max(Min(indices, zero_tensor), minus_one_tensor);
return Sub(indices, Prod(sign, input_shape));
}
nvinfer1::ITensor* Shape(nvinfer1::ITensor* input) {
return TRT_ENGINE_ADD_LAYER(engine_, Shape, *input)->getOutput(0);
}
// Concat not make rank changed
nvinfer1::ITensor* Concat(const std::vector<nvinfer1::ITensor*>& inputs,
int axis = 0) {
auto* layer = TRT_ENGINE_ADD_LAYER(
engine_, Concatenation, inputs.data(), inputs.size());
if (axis != 0) layer->setAxis(axis);
nvinfer1::ITensor* c = layer->getOutput(0);
return c;
}
nvinfer1::ITensor* Sum(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(
engine_, ElementWise, *a, *b, nvinfer1::ElementWiseOperation::kSUM)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Prod(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(
engine_, ElementWise, *a, *b, nvinfer1::ElementWiseOperation::kPROD)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Min(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(
engine_, ElementWise, *a, *b, nvinfer1::ElementWiseOperation::kMIN)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Max(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(
engine_, ElementWise, *a, *b, nvinfer1::ElementWiseOperation::kMAX)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Sub(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(
engine_, ElementWise, *a, *b, nvinfer1::ElementWiseOperation::kSUB)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Div(nvinfer1::ITensor* a, nvinfer1::ITensor* b) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(
engine_, ElementWise, *a, *b, nvinfer1::ElementWiseOperation::kDIV)
->getOutput(0);
return c;
}
nvinfer1::ITensor* Act(nvinfer1::ITensor* a,
nvinfer1::ActivationType act_type) {
nvinfer1::ITensor* c =
TRT_ENGINE_ADD_LAYER(engine_, Activation, *a, act_type)->getOutput(0);
return c;
}
// Get element tensor of 1D shape tensor
nvinfer1::ITensor* GetEleTensorOfShape(nvinfer1::ITensor* shape_tensor,
int index,
bool is_scalar = false) {
auto* tensor =
TRT_ENGINE_ADD_LAYER(engine_,
Gather,
*shape_tensor,
*Add1DConstantLayer(index, " ", is_scalar),
0)
->getOutput(0);
return tensor;
}
// Create and add Multi-D constant float layer
nvinfer1::ITensor* AddConstantLayer(const float* data,
const std::vector<int32_t>& weight_dims,
const std::string& weight_name) {
std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
int data_size = std::accumulate(
weight_dims.begin(), weight_dims.end(), 1, std::multiplies<int>());
tmp_tensor->Resize({data_size});
auto* tmp_data = tmp_tensor->mutable_data<float>(platform::CPUPlace());
for (int i = 0; i < data_size; i++) {
tmp_data[i] = data[i];
}
engine_->SetWeights(weight_name, std::move(tmp_tensor));
TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT,
static_cast<void*>(tmp_data),
static_cast<size_t>(data_size)};
nvinfer1::Dims trt_dims;
trt_dims.nbDims = weight_dims.size();
for (size_t i = 0; i < weight_dims.size(); i++)
trt_dims.d[i] = weight_dims[i];
auto const_layer =
TRT_ENGINE_ADD_LAYER(engine_, Constant, trt_dims, weight.get());
return const_layer->getOutput(0);
}
// Create and add 1D constant float layer
nvinfer1::ITensor* Add1DConstantLayer(const std::vector<float>& data,
const std::string& weight_name = "",
bool scalar = false) {
std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
int data_size = data.size();
tmp_tensor->Resize({data_size});
auto* tmp_data = tmp_tensor->mutable_data<float>(platform::CPUPlace());
for (int i = 0; i < data_size; i++) {
tmp_data[i] = data[i];
}
engine_->SetWeights(weight_name, std::move(tmp_tensor));
TensorRTEngine::Weight weight{nvinfer1::DataType::kFLOAT,
static_cast<void*>(tmp_data),
static_cast<size_t>(data_size)};
nvinfer1::Dims input_shape;
input_shape.nbDims = scalar ? 0 : 1;
input_shape.d[0] = data_size;
auto const_layer =
TRT_ENGINE_ADD_LAYER(engine_, Constant, input_shape, weight.get());
return const_layer->getOutput(0);
}
// Create and add 1D constant layer
nvinfer1::ITensor* Add1DConstantLayer(const std::vector<int>& data,
const std::string& weight_name = "",
bool scalar = false) {
std::unique_ptr<framework::Tensor> tmp_tensor(new framework::Tensor());
int data_size = data.size();
tmp_tensor->Resize({data_size});
auto* tmp_data = tmp_tensor->mutable_data<int>(platform::CPUPlace());
for (int i = 0; i < data_size; i++) {
tmp_data[i] = data[i];
}
engine_->SetWeights(weight_name, std::move(tmp_tensor));
TensorRTEngine::Weight weight{nvinfer1::DataType::kINT32,
static_cast<void*>(tmp_data),
static_cast<size_t>(data_size)};
nvinfer1::Dims input_shape;
input_shape.nbDims = scalar ? 0 : 1;
input_shape.d[0] = data_size;
auto const_layer =
TRT_ENGINE_ADD_LAYER(engine_, Constant, input_shape, weight.get());
return const_layer->getOutput(0);
}
nvinfer1::ITensor* Add1DConstantLayer(nvinfer1::Dims data,
const std::string& weight_name = "",
bool scalar = false) {
std::vector<int> tmp_data;
for (int i = 0; i < data.nbDims; i++) tmp_data.push_back(data.d[i]);
return Add1DConstantLayer(tmp_data, weight_name, scalar);
}
nvinfer1::ITensor* Add1DConstantLayer(int32_t data,
const std::string& weight_name = "",
bool scalar = false) {
std::vector<int> tmp_data;
tmp_data.push_back(data);
return Add1DConstantLayer(tmp_data, weight_name, scalar);
}
void RreplenishLayerAndOutput(
nvinfer1::ILayer* layer, const std::string& layer_type,
nvinfer1::ILayer* layer,
const std::string& layer_type,
const std::vector<std::string>& output_tensor_names,
bool test_mode = false) {
size_t num_out = output_tensor_names.size();
std::string layer_name = layer_type + " (Output: ";
for (size_t i = 0; i < num_out; i++) {
layer->getOutput(i)->setName(output_tensor_names[i].c_str());
engine_->SetITensor(output_tensor_names[i], layer->getOutput(i));
if (test_mode) {
engine_->DeclareOutput(output_tensor_names[i]);
}
layer_name += output_tensor_names[i];
if (i != num_out - 1) layer_name += ", ";
}
layer->setName(
(layer_type + " (Output: " + output_tensor_names[0] + ")").c_str());
layer->setName((layer_name + ")").c_str());
}
void SetEngine(TensorRTEngine* engine) { engine_ = engine; }
......
......@@ -66,13 +66,16 @@ TRT_DT FluidDataType2TRT(FluidDT type) {
// The T can be int32 or int64 type.
template <typename T>
nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape,
std::string input,
bool with_dynamic_shape = false) {
PADDLE_ENFORCE_GT(shape.size(), 0UL,
PADDLE_ENFORCE_GT(shape.size(),
0UL,
platform::errors::InvalidArgument(
"TensorRT's tensor input requires at least 1 "
"dimensions, but input %s has %d dims.",
input, shape.size()));
input,
shape.size()));
auto ShapeStr = [](const std::vector<T>& shape) {
std::ostringstream os;
......@@ -93,7 +96,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
PADDLE_THROW(platform::errors::InvalidArgument(
"The input [%s] shape of trt subgraph is %s, please enable "
"trt dynamic_shape mode by SetTRTDynamicShapeInfo.",
input, ShapeStr(shape)));
input,
ShapeStr(shape)));
}
return nvinfer1::Dims3(shape[1], shape[2], shape[3]);
} else if (shape.size() == 5UL) {
......@@ -101,7 +105,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
PADDLE_THROW(platform::errors::InvalidArgument(
"The input [%s] shape of trt subgraph is %s, please enable "
"trt dynamic_shape mode by SetTRTDynamicShapeInfo.",
input, ShapeStr(shape)));
input,
ShapeStr(shape)));
}
return nvinfer1::Dims4(shape[1], shape[2], shape[3], shape[4]);
} else if (shape.size() == 3UL) {
......@@ -109,7 +114,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
PADDLE_THROW(platform::errors::InvalidArgument(
"The input [%s] shape of trt subgraph is %s, please enable "
"trt dynamic_shape mode by SetTRTDynamicShapeInfo.",
input, ShapeStr(shape)));
input,
ShapeStr(shape)));
}
return nvinfer1::Dims2(shape[1], shape[2]);
} else if (shape.size() == 2UL) {
......@@ -117,7 +123,8 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
PADDLE_THROW(platform::errors::InvalidArgument(
"The input [%s] shape of trt subgraph is %s, please enable "
"trt dynamic_shape mode by SetTRTDynamicShapeInfo.",
input, ShapeStr(shape)));
input,
ShapeStr(shape)));
}
nvinfer1::Dims dims;
dims.nbDims = 1;
......@@ -125,11 +132,13 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
return dims;
}
// static shape doesn't support 1D op so far.
PADDLE_ENFORCE_NE(shape.size(), 1UL,
PADDLE_ENFORCE_NE(shape.size(),
1UL,
platform::errors::InvalidArgument(
"The input [%s] shape of trt subgraph is %s."
"it's not supported by trt so far",
input, ShapeStr(shape)));
input,
ShapeStr(shape)));
nvinfer1::Dims dims;
dims.nbDims = shape.size() - 1;
......@@ -151,7 +160,7 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, std::string input,
return dims;
}
}
} // NOLINT
} // namespace
class TRTInt8Calibrator;
......@@ -184,9 +193,11 @@ class TensorRTEngine {
};
TensorRTEngine(
int max_batch, int max_workspace,
int max_batch,
int max_workspace,
AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32,
TRTInt8Calibrator* calibrator = nullptr, int device_id = 0,
TRTInt8Calibrator* calibrator = nullptr,
int device_id = 0,
const ShapeMapType min_input_shape = {},
const ShapeMapType max_input_shape = {},
const ShapeMapType optim_input_shape = {},
......@@ -205,17 +216,21 @@ class TensorRTEngine {
if (min_input_shape_.size() != 0 && max_input_shape_.size() != 0 &&
optim_input_shape_.size() != 0) {
PADDLE_ENFORCE_EQ(
min_input_shape_.size(), max_input_shape_.size(),
min_input_shape_.size(),
max_input_shape_.size(),
platform::errors::InvalidArgument(
"The min_input_shape_'s size(%d) should be equal to the "
"size(%d) of max_input_shape_",
min_input_shape_.size(), max_input_shape_.size()));
min_input_shape_.size(),
max_input_shape_.size()));
PADDLE_ENFORCE_EQ(
min_input_shape_.size(), optim_input_shape_.size(),
min_input_shape_.size(),
optim_input_shape_.size(),
platform::errors::InvalidArgument(
"The min_input_shape_'s size(%d) should be equal to the "
"size(%d) of optim_input_shape_",
min_input_shape_.size(), optim_input_shape_.size()));
min_input_shape_.size(),
optim_input_shape_.size()));
#if IS_TRT_VERSION_GE(6000)
with_dynamic_shape_ = true;
#else
......@@ -242,7 +257,8 @@ class TensorRTEngine {
const nvinfer1::Dims& dim);
// Set the offset-th output from a layer as the network's output, and set its
// name.
void DeclareOutput(const nvinfer1::ILayer* layer, int offset,
void DeclareOutput(const nvinfer1::ILayer* layer,
int offset,
const std::string& name);
// Set the itensor_map_[name] as the network's output, and set its name.
void DeclareOutput(const std::string& name);
......@@ -374,7 +390,8 @@ class TensorRTEngine {
int GetDeviceId() { return device_id_; }
nvinfer1::IPluginV2Layer* AddPlugin(nvinfer1::ITensor* const* inputs,
int num_inputs, plugin::PluginTensorRT*);
int num_inputs,
plugin::PluginTensorRT*);
nvinfer1::IPluginV2Layer* AddPluginV2Ext(nvinfer1::ITensor* const* inputs,
int num_inputs,
......@@ -431,7 +448,8 @@ class TensorRTEngine {
// After finishing adding ops, freeze this network and creates the execution
// environment.
void FreezeNetwork();
void Execute(int batch_size, std::vector<void*>* buffers,
void Execute(int batch_size,
std::vector<void*>* buffers,
cudaStream_t stream = nullptr);
nvinfer1::INetworkDefinition* network() { return infer_network_.get(); }
......@@ -448,15 +466,20 @@ class TensorRTEngine {
auto name = it.first;
auto input_shape = it.second;
PADDLE_ENFORCE_EQ(
min_input_shape_.count(name), true,
min_input_shape_.count(name),
true,
platform::errors::InvalidArgument(
"TRT dynamic_shape min_input_shape %s not found.", name));
PADDLE_ENFORCE_EQ(min_input_shape_[name].size(), input_shape.size(),
PADDLE_ENFORCE_EQ(min_input_shape_[name].size(),
input_shape.size(),
platform::errors::InvalidArgument(
"TRT dynamic_shape min_input_shape %s size not "
"equal, the min_input_shape[%s].size()=%d"
", but the runtime_input_shape[%s].size()=%d.",
name, name, min_input_shape_[name].size(), name,
name,
name,
min_input_shape_[name].size(),
name,
input_shape.size()));
auto bak_min_shape = min_input_shape_[name];
auto bak_max_shape = max_input_shape_[name];
......@@ -497,7 +520,8 @@ class TensorRTEngine {
#if IS_TRT_VERSION_GE(6000)
nvinfer1::IPluginV2Layer* AddDynamicPlugin(
nvinfer1::ITensor* const* inputs, int num_inputs,
nvinfer1::ITensor* const* inputs,
int num_inputs,
plugin::DynamicPluginTensorRT* plugin) {
owned_pluginv2_.emplace_back(plugin);
return network()->addPluginV2(inputs, num_inputs, *plugin);
......@@ -524,7 +548,8 @@ class TensorRTEngine {
void Set(const std::string& attr_name, AttrType* attr) {
if (attrs_.count(attr_name) == 0) {
PADDLE_ENFORCE_EQ(
attrs_.count(attr_name), 0,
attrs_.count(attr_name),
0,
platform::errors::AlreadyExists(
"Attribute %s already set in trt engine.", attr_name));
} else {
......@@ -543,7 +568,8 @@ class TensorRTEngine {
template <typename AttrType>
void SetNotOwned(const std::string& attr_name, AttrType* attr) {
PADDLE_ENFORCE_EQ(
attrs_.count(attr_name), 0,
attrs_.count(attr_name),
0,
platform::errors::AlreadyExists(
"Attribute %s already set in trt engine.", attr_name));
attrs_[attr_name] = attr;
......@@ -552,7 +578,8 @@ class TensorRTEngine {
// Get a reference to the attributed previously set.
template <typename AttrType>
AttrType& Get(const std::string& attr_name) const {
PADDLE_ENFORCE_NE(attrs_.find(attr_name), attrs_.end(),
PADDLE_ENFORCE_NE(attrs_.find(attr_name),
attrs_.end(),
platform::errors::InvalidArgument(
"Attribute %s not found in trt engine.", attr_name));
try {
......@@ -574,7 +601,8 @@ class TensorRTEngine {
};
PADDLE_THROW(platform::errors::InvalidArgument(
"Invalid type for attritube %s, expected: %s, actual: %s.", attr_name,
"Invalid type for attritube %s, expected: %s, actual: %s.",
attr_name,
TypeToString(typeid(AttrType*)),
TypeToString(attrs_.at(attr_name).type())));
}
......@@ -672,7 +700,7 @@ class TensorRTEngine {
// them, and an macro like this is more extensible when underlying TensorRT
// library add new layer supports.
#define TRT_ENGINE_ADD_LAYER(engine__, layer__, ...) \
engine__->network()->add##layer__(__VA_ARGS__);
engine__->network()->add##layer__(__VA_ARGS__)
class TRTEngineManager {
public:
......@@ -687,18 +715,27 @@ class TRTEngineManager {
}
TensorRTEngine* Create(
std::string name, int max_batch, int max_workspace,
std::string name,
int max_batch,
int max_workspace,
AnalysisConfig::Precision precision = AnalysisConfig::Precision::kFloat32,
TRTInt8Calibrator* calibrator = nullptr, int device_id = 0,
TRTInt8Calibrator* calibrator = nullptr,
int device_id = 0,
const std::map<std::string, std::vector<int>> min_input_shape = {},
const std::map<std::string, std::vector<int>> max_input_shape = {},
const std::map<std::string, std::vector<int>> optim_input_shape = {},
bool disable_trt_plugin_fp16 = false,
nvinfer1::ILogger& logger = NaiveLogger::Global()) {
auto* p =
new TensorRTEngine(max_batch, max_workspace, precision, calibrator,
device_id, min_input_shape, max_input_shape,
optim_input_shape, disable_trt_plugin_fp16, logger);
auto* p = new TensorRTEngine(max_batch,
max_workspace,
precision,
calibrator,
device_id,
min_input_shape,
max_input_shape,
optim_input_shape,
disable_trt_plugin_fp16,
logger);
engines_[name].reset(p);
return p;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册