未验证 提交 d77c4955 编写于 作者: Z zhoutianzi666 提交者: GitHub

[Inference] rewrite elementwise trt layer (#43615)

* rewrite elementwise
上级 2bef8a48
......@@ -19,228 +19,115 @@ namespace paddle {
namespace inference {
namespace tensorrt {
static bool CheckDims(const nvinfer1::Dims& dims_x,
const nvinfer1::Dims& dims_y) {
if (dims_x.nbDims != dims_y.nbDims) {
return false;
}
for (int i = 0; i < dims_x.nbDims; i++) {
if (dims_x.d[i] != dims_y.d[i]) {
return false;
}
}
return true;
}
class ElementwiseWeightOpConverter : public OpConverter {
class ElementwiseTensorOpConverter : public OpConverter {
public:
ElementwiseWeightOpConverter() {}
ElementwiseTensorOpConverter() {}
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
// Here the two nullptr looks strange, that's because the
// framework::OpDesc's constructor is strange.
nvinfer1::ILayer* layer = nullptr;
const framework::Scope& scope,
bool test_mode) override {
VLOG(3) << "Convert a fluid elementwise op to TensorRT IElementWiseLayer";
framework::OpDesc op_desc(op, nullptr);
VLOG(3) << "Convert a fluid elementwise op to TensorRT IScaleLayer";
auto* X = engine_->GetITensor(op_desc.Input("X").front());
nvinfer1::ITensor* Y = nullptr;
auto* Y_v = scope.FindVar(op_desc.Input("Y").front());
PADDLE_ENFORCE_NOT_NULL(
Y_v, platform::errors::NotFound("Variable %s not found in scope.",
op_desc.Input("Y").front().c_str()));
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
float* weight_data = nullptr;
auto output_name = op_desc.Output("Out")[0];
weight_data = engine_->GetWeightCPUData(op_desc.Input("Y").front(), Y_t);
if (Y_v) {
// Y is weight
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
float* weight_data =
engine_->GetWeightCPUData(op_desc.Input("Y").front(), Y_t);
std::vector<int> dims_y = phi::vectorize<int>(Y_t->dims());
TensorRTEngine::Weight y_weight{nvinfer1::DataType::kFLOAT,
static_cast<void*>(weight_data),
static_cast<size_t>(Y_t->numel())};
nvinfer1::Dims trt_dims_y;
trt_dims_y.nbDims = dims_y.size();
for (int i = 0; i < trt_dims_y.nbDims; i++) {
trt_dims_y.d[i] = dims_y[i];
}
Y = TRT_ENGINE_ADD_LAYER(engine_, Constant, trt_dims_y, y_weight.get())
->getOutput(0);
} else {
Y = engine_->GetITensor(op_desc.Input("Y").front());
}
if (X->getDimensions().nbDims < Y->getDimensions().nbDims) {
auto* tmp = X;
X = Y;
Y = tmp;
}
nvinfer1::Dims dims_x = X->getDimensions();
std::vector<int> dims_y = phi::vectorize<int>(Y_t->dims());
nvinfer1::Dims dims_y = Y->getDimensions();
auto output_name = op_desc.Output("Out")[0];
auto regist_eltwise_weight = [&](nvinfer1::ScaleMode scale_mode) {
nvinfer1::IShuffleLayer* expand_layer = nullptr;
nvinfer1::IShuffleLayer* squeeze_layer = nullptr;
int dynamic_shape_offset = engine_->with_dynamic_shape() ? 1 : 0;
auto input_dim = X->getDimensions();
// reshape
if (input_dim.nbDims < 3 + dynamic_shape_offset) {
nvinfer1::Dims expand_shape;
expand_shape.nbDims = 3 + dynamic_shape_offset;
for (int i = 0; i < expand_shape.nbDims; i++) {
if (i < input_dim.nbDims) {
expand_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i];
} else {
expand_shape.d[i] = 1;
}
}
expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
expand_layer->setReshapeDimensions(expand_shape);
X = expand_layer->getOutput(0);
expand_layer->getOutput(0)->setName(
("elementwise_reshape_out: " + output_name).c_str());
expand_layer->setName(
("Elewise: Shuffle: (Output: " + output_name + ")").c_str());
}
// eltwise_ops
TensorRTEngine::Weight shift_weights{nvinfer1::DataType::kFLOAT, nullptr,
0};
TensorRTEngine::Weight scale_weights{nvinfer1::DataType::kFLOAT, nullptr,
0};
TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr,
0};
if (op_type_ == "add") {
shift_weights = TensorRTEngine::Weight(
nvinfer1::DataType::kFLOAT, static_cast<void*>(weight_data),
static_cast<size_t>(Y_t->numel()));
} else if (op_type_ == "sub") {
for (int i = 0; i < Y_t->numel(); i++) {
weight_data[i] = -weight_data[i];
}
shift_weights = TensorRTEngine::Weight(
nvinfer1::DataType::kFLOAT, static_cast<void*>(weight_data),
static_cast<size_t>(Y_t->numel()));
} else if (op_type_ == "mul") {
scale_weights = TensorRTEngine::Weight(
nvinfer1::DataType::kFLOAT, static_cast<void*>(weight_data),
static_cast<size_t>(Y_t->numel()));
} else if (op_type_ == "div") {
for (int i = 0; i < Y_t->numel(); i++) {
weight_data[i] = 1.f / weight_data[i];
// axis here is relative to explicit batch
int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis"));
int real_x_rank = dims_x.nbDims;
int real_y_rank = dims_y.nbDims;
if (!engine_->with_dynamic_shape()) {
real_x_rank++;
real_y_rank++;
if (Y_v) real_y_rank--;
}
if (axis == -1) {
axis = real_x_rank - real_y_rank;
}
if (!engine_->with_dynamic_shape() && axis > 0) {
axis--;
}
// X: - - - - - - -
// axis
// Y: - - -
// we need expand Y's rank = X's rank
int left_one_num = axis;
int right_one_num = dims_x.nbDims - axis - dims_y.nbDims;
nvinfer1::IShuffleLayer* reshape_layer;
nvinfer1::ITensor* reshape_y_tensor;
if (left_one_num > 0 || right_one_num > 0) {
if (engine_->with_dynamic_shape()) {
auto* y_shape_tensor = Shape(Y);
auto* new_y_shape_tensor = y_shape_tensor;
if (axis > 0) {
std::vector<int32_t> left_one(left_one_num, 1);
auto* left_one_tensor = Add1DConstantLayer(left_one);
new_y_shape_tensor = Concat(std::vector<nvinfer1::ITensor*>{
left_one_tensor, new_y_shape_tensor});
}
scale_weights = TensorRTEngine::Weight(
nvinfer1::DataType::kFLOAT, static_cast<void*>(weight_data),
static_cast<size_t>(Y_t->numel()));
} else if (op_type_ == "pow") {
power_weights = TensorRTEngine::Weight(
nvinfer1::DataType::kFLOAT, static_cast<void*>(weight_data),
static_cast<size_t>(Y_t->numel()));
}
nvinfer1::IScaleLayer* scale_layer = TRT_ENGINE_ADD_LAYER(
engine_, ScaleNd, *X, scale_mode, shift_weights.get(),
scale_weights.get(), power_weights.get(), dynamic_shape_offset);
layer = scale_layer;
// reshape
if (input_dim.nbDims < 3 + dynamic_shape_offset) {
nvinfer1::Dims squeeze_shape;
squeeze_shape.nbDims = input_dim.nbDims;
for (int i = 0; i < squeeze_shape.nbDims; i++) {
squeeze_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i];
if (right_one_num > 0) {
std::vector<int32_t> right_one(right_one_num, 1);
auto* right_one_tensor = Add1DConstantLayer(right_one);
new_y_shape_tensor = Concat(std::vector<nvinfer1::ITensor*>{
new_y_shape_tensor, right_one_tensor});
}
squeeze_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0)));
squeeze_layer->setReshapeDimensions(squeeze_shape);
RreplenishLayerAndOutput(squeeze_layer, "elementwise_" + op_type_,
{output_name}, test_mode);
reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *Y);
reshape_layer->setInput(1, *new_y_shape_tensor);
} else {
RreplenishLayerAndOutput(layer, "elementwise_" + op_type_,
{output_name}, test_mode);
nvinfer1::Dims new_y_dims;
new_y_dims.nbDims = left_one_num + dims_y.nbDims + right_one_num;
for (int i = 0; i < new_y_dims.nbDims; i++) new_y_dims.d[i] = 1;
for (int i = 0; i < dims_y.nbDims; i++)
new_y_dims.d[left_one_num + i] = dims_y.d[i];
reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *Y);
reshape_layer->setReshapeDimensions(new_y_dims);
}
};
// dynamic shape
if (engine_->with_dynamic_shape()) {
if (dims_y.size() == 1 && dims_y[0] == dims_x.d[1]) {
regist_eltwise_weight(nvinfer1::ScaleMode::kCHANNEL);
} else if (dims_y.size() == 1 && dims_y[0] == 1) {
regist_eltwise_weight(nvinfer1::ScaleMode::kUNIFORM);
} else if (dims_y.size() == static_cast<size_t>(dims_x.nbDims)) {
regist_eltwise_weight(nvinfer1::ScaleMode::kELEMENTWISE);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"The size of input_y's dims is %d, but TensorRT dynamic shape "
"only support size = 1 or size = input_x.size() for Elementwise "
"op!",
dims_y.size()));
}
return;
}
// static shape with dynamic batch
std::vector<int> no_batch_dims;
int start_index = 0;
for (; start_index < dims_x.nbDims; start_index++) {
no_batch_dims.push_back(dims_x.d[start_index]);
}
if (dims_y.size() == 1 && dims_y[0] == no_batch_dims[0]) {
regist_eltwise_weight(nvinfer1::ScaleMode::kCHANNEL);
} else if (dims_y.size() == 1 && dims_y[0] == 1) {
regist_eltwise_weight(nvinfer1::ScaleMode::kUNIFORM);
} else if (dims_y.size() == no_batch_dims.size() + 1) {
regist_eltwise_weight(nvinfer1::ScaleMode::kELEMENTWISE);
reshape_y_tensor = reshape_layer->getOutput(0);
} else {
PADDLE_THROW(platform::errors::InvalidArgument(
"The size of input_y's dims is %d, but TensorRT dynamic shape "
"only support size = 1 or size = input_x.size() for Elementwise "
"op!",
dims_y.size()));
// In fact , we can remove this `else`, but -> rt_resnet50_test CI in trt
// 6015 faling, how ridiculous!
reshape_y_tensor = Y;
}
}
protected:
std::string op_type_;
};
class ElementwiseTensorOpConverter : public OpConverter {
public:
ElementwiseTensorOpConverter() {}
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope, bool test_mode) override {
auto op_pair = ops.find(op_type_);
PADDLE_ENFORCE_NE(op_pair, ops.end(),
PADDLE_ENFORCE_NE(op_pair,
ops.end(),
platform::errors::InvalidArgument(
"Elementwise op's type(%s) is not supported. Please "
"check if the op_type is correct.",
op_type_));
// Here the two nullptr looks strange, that's because the
// framework::OpDesc's constructor is strange.
framework::OpDesc op_desc(op, nullptr);
nvinfer1::ILayer* layer = nullptr;
auto* X = engine_->GetITensor(op_desc.Input("X").front());
auto* Y = engine_->GetITensor(op_desc.Input("Y").front());
std::vector<nvinfer1::ITensor*> itensors;
itensors.push_back(X);
itensors.push_back(Y);
nvinfer1::Dims dims_x = X->getDimensions();
nvinfer1::Dims dims_y = Y->getDimensions();
int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis"));
auto output_name = op_desc.Output("Out")[0];
auto common_func = [&](nvinfer1::ILayer* layer) {
RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode);
};
if (dims_x.nbDims == dims_y.nbDims) {
// The two input tensor should have the same dims
VLOG(3) << "Convert a fluid elementwise op to TensorRT IElementWiseLayer";
nvinfer1::IElementWiseLayer* elet_layer =
TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *X, *Y, op_pair->second);
layer = elet_layer;
} else {
VLOG(3) << "Convert a fluid elementwise op to TensorRT "
"ElementWisePluginLayer";
if (engine_->with_dynamic_shape()) {
#if IS_TRT_VERSION_GE(6000)
plugin::ElementwisePluginDynamic* plugin =
new plugin::ElementwisePluginDynamic(op_type_, axis);
layer = engine_->AddDynamicPlugin(itensors.data(), 2, plugin);
#else
PADDLE_THROW(platform::errors::Fatal(
"You are running the TRT Dynamic Shape mode, need to confirm that "
"your TRT version is no less than 6.0"));
#endif
} else {
plugin::ElementWisePlugin* plugin =
new plugin::ElementWisePlugin(op_type_, dims_x, dims_y, axis);
std::vector<nvinfer1::ITensor*> inputs{X, Y};
auto* plugin_layer = engine_->AddPlugin(
inputs.data(), inputs.size(),
reinterpret_cast<plugin::PluginTensorRT*>(plugin));
layer = plugin_layer;
}
}
common_func(layer);
auto* layer = TRT_ENGINE_ADD_LAYER(
engine_, ElementWise, *X, *reshape_y_tensor, op_pair->second);
RreplenishLayerAndOutput(layer, "elementwise", {output_name}, test_mode);
}
protected:
......@@ -260,31 +147,6 @@ const std::unordered_map<std::string, nvinfer1::ElementWiseOperation>
{"max", nvinfer1::ElementWiseOperation::kMAX},
};
class ElementwiseWeightAddOpConverter : public ElementwiseWeightOpConverter {
public:
ElementwiseWeightAddOpConverter() { op_type_ = "add"; }
};
class ElementwiseWeightMulOpConverter : public ElementwiseWeightOpConverter {
public:
ElementwiseWeightMulOpConverter() { op_type_ = "mul"; }
};
class ElementwiseWeightSubOpConverter : public ElementwiseWeightOpConverter {
public:
ElementwiseWeightSubOpConverter() { op_type_ = "sub"; }
};
class ElementwiseWeightDivOpConverter : public ElementwiseWeightOpConverter {
public:
ElementwiseWeightDivOpConverter() { op_type_ = "div"; }
};
class ElementwiseWeightPowOpConverter : public ElementwiseWeightOpConverter {
public:
ElementwiseWeightPowOpConverter() { op_type_ = "pow"; }
};
class ElementwiseTensorAddOpConverter : public ElementwiseTensorOpConverter {
public:
ElementwiseTensorAddOpConverter() { op_type_ = "add"; }
......@@ -325,15 +187,15 @@ class ElementwiseTensorPowOpConverter : public ElementwiseTensorOpConverter {
} // namespace paddle
REGISTER_TRT_OP_CONVERTER(elementwise_add_weight,
ElementwiseWeightAddOpConverter);
ElementwiseTensorAddOpConverter);
REGISTER_TRT_OP_CONVERTER(elementwise_mul_weight,
ElementwiseWeightMulOpConverter);
ElementwiseTensorMulOpConverter);
REGISTER_TRT_OP_CONVERTER(elementwise_sub_weight,
ElementwiseWeightSubOpConverter);
ElementwiseTensorSubOpConverter);
REGISTER_TRT_OP_CONVERTER(elementwise_div_weight,
ElementwiseWeightDivOpConverter);
ElementwiseTensorDivOpConverter);
REGISTER_TRT_OP_CONVERTER(elementwise_pow_weight,
ElementwiseWeightPowOpConverter);
ElementwiseTensorPowOpConverter);
REGISTER_TRT_OP_CONVERTER(elementwise_add_tensor,
ElementwiseTensorAddOpConverter);
......
......@@ -1256,6 +1256,28 @@ bool OpTeller::Tell(const framework::ir::Node* node,
}
}
}
// not support following four inputs for slice in paddle-trt
auto slice_inputs = desc.Inputs(); // its size == 5
if (slice_inputs.find("StartsTensor") != slice_inputs.end()) {
if (desc.Input("StartsTensor").size()) {
return false;
}
}
if (slice_inputs.find("EndsTensor") != slice_inputs.end()) {
if (desc.Input("EndsTensor").size()) {
return false;
}
}
if (slice_inputs.find("StartsTensorList") != slice_inputs.end()) {
if (desc.Input("StartsTensorList").size()) {
return false;
}
}
if (slice_inputs.find("EndsTensorList") != slice_inputs.end()) {
if (desc.Input("EndsTensorList").size()) {
return false;
}
}
}
if (op_type == "elementwise_add" || op_type == "elementwise_mul" ||
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册