未验证 提交 5defefd6 编写于 作者: Z Zhang Jun 提交者: GitHub

[inference][trt]Upgrade expand cast nearestinterp for sd (#48998)

* update nearest_interp, expand_v2, cast for stable diffusion

* update nearest_interp, expand_v2, cast for stable diffusion

* correct shape rank

* Update expand_v2_op.cc
上级 351d37d9
......@@ -33,21 +33,41 @@ class ExpandV2OpConverter : public OpConverter {
void operator()(const framework::proto::OpDesc& op,
const framework::Scope& scope,
bool test_mode) override {
VLOG(3) << "convert a paddle expand_v2 op to trt expand layer.";
framework::OpDesc op_desc(op, nullptr);
auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
auto inputs = op_desc.Inputs();
auto input_dims = input->getDimensions();
auto output_name = op_desc.Output("Out")[0];
auto rank = input_dims.nbDims;
nvinfer1::ITensor* shape_tensor = nullptr;
int32_t shape_rank = 0;
if (inputs.find("Shape") != inputs.end() &&
op_desc.Input("Shape").size() >= 1) {
shape_tensor = engine_->GetITensor(op_desc.Input("Shape")[0]);
shape_rank = shape_tensor->getDimensions().d[0];
} else if (inputs.find("expand_shapes_tensor") != inputs.end() &&
op_desc.Input("expand_shapes_tensor").size() >= 1) {
int shape_size = op_desc.Input("expand_shapes_tensor").size();
std::vector<nvinfer1::ITensor*> shape_tensors;
for (int i = 0; i < shape_size; ++i) {
shape_tensors.push_back(
engine_->GetITensor(op_desc.Input("expand_shapes_tensor")[i]));
}
shape_tensor = Concat(shape_tensors);
shape_rank = shape_size;
} else {
std::vector<int32_t> shape =
PADDLE_GET_CONST(std::vector<int32_t>, op_desc.GetAttr("shape"));
int32_t nbDims_num = shape.size();
shape_tensor = Add1DConstantLayer(shape, output_name + "_shape_tensor_");
shape_rank = shape.size();
}
auto* shape_tensor =
Add1DConstantLayer(shape, output_name + "_shape_tensor_");
nvinfer1::ITensor* input_shape_tensor;
if (rank < nbDims_num) {
if (rank < shape_rank) {
auto* one_rank_tensor =
Add1DConstantLayer(std::vector<int32_t>(nbDims_num - rank, 1),
Add1DConstantLayer(std::vector<int32_t>(shape_rank - rank, 1),
output_name + "_one_rank_tensor_");
auto in_shape_tensor = Shape(input);
std::vector<nvinfer1::ITensor*> itensors;
......@@ -61,16 +81,16 @@ class ExpandV2OpConverter : public OpConverter {
auto* shuffle = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
shuffle->setInput(1, *input_shape_tensor);
std::vector<int32_t> start_vec(nbDims_num, 0);
std::vector<int32_t> start_vec(shape_rank, 0);
nvinfer1::Dims start;
start.nbDims = nbDims_num;
for (int32_t i = 0; i < nbDims_num; ++i) {
start.nbDims = shape_rank;
for (int32_t i = 0; i < shape_rank; ++i) {
start.d[i] = start_vec[i];
}
nvinfer1::Dims size;
size.nbDims = nbDims_num;
size.nbDims = shape_rank;
nvinfer1::Dims stride;
stride.nbDims = nbDims_num;
stride.nbDims = shape_rank;
auto starts_tensor =
Add1DConstantLayer(start_vec, output_name + "_start_tensor_");
......
......@@ -12,15 +12,6 @@ limitations under the License. */
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
namespace paddle {
namespace framework {
class Scope;
namespace proto {
class OpDesc;
} // namespace proto
} // namespace framework
} // namespace paddle
namespace paddle {
namespace inference {
namespace tensorrt {
......@@ -34,6 +25,7 @@ class NearestInterpolateOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
auto inputs = op_desc.Inputs();
std::string input_name = op_desc.Input("X").front();
std::string output_name = op_desc.Output("Out").front();
......
......@@ -38,6 +38,7 @@ class NearestInterpolateV2OpConverter : public OpConverter {
std::string output_name = op_desc.Output("Out").front();
auto input = engine_->GetITensor(input_name);
auto inputs = op_desc.Inputs();
auto data_layout = phi::StringToDataLayout(
PADDLE_GET_CONST(std::string, op_desc.GetAttr("data_layout")));
......@@ -73,9 +74,23 @@ class NearestInterpolateV2OpConverter : public OpConverter {
scale_w =
static_cast<float>(out_w) / static_cast<float>(in_dim.d[w_axis]);
} else {
if (scale.size() >= 2) {
scale_h = scale[0];
scale_w = scale[1];
}
}
// Priority: Input(SizeTensor) > attr(out_h/out_w) > attr(scale)
nvinfer1::ITensor* outsize_tensor = nullptr;
if (engine_->with_dynamic_shape() &&
inputs.find("SizeTensor") != inputs.end()) {
if (op_desc.Input("SizeTensor").size() >= 2) {
auto* outsize_h = engine_->GetITensor(op_desc.Input("SizeTensor")[0]);
auto* outsize_w = engine_->GetITensor(op_desc.Input("SizeTensor")[1]);
outsize_tensor =
Concat(std::vector<nvinfer1::ITensor*>{outsize_h, outsize_w});
}
}
if (engine_->with_dynamic_shape()) {
scales.push_back(1.f);
......@@ -94,7 +109,27 @@ class NearestInterpolateV2OpConverter : public OpConverter {
PADDLE_THROW(platform::errors::InvalidArgument(
"Data layout must be NCHW or NHWC."));
}
if (engine_->with_dynamic_shape()) {
if (outsize_tensor != nullptr) {
std::vector<nvinfer1::ITensor*> outsize_itensors;
auto* input_shape = Shape(input);
outsize_itensors.push_back(GetEleTensorOfShape(input_shape, 0));
if (data_layout == phi::DataLayout::kNCHW) {
outsize_itensors.push_back(GetEleTensorOfShape(input_shape, 1));
outsize_itensors.push_back(outsize_tensor);
} else if (data_layout == phi::DataLayout::kNHWC) {
outsize_itensors.push_back(outsize_tensor);
outsize_itensors.push_back(GetEleTensorOfShape(input_shape, 3));
}
layer->setInput(1, *Concat(outsize_itensors));
} else {
layer->setScales(scales.data(), scales.size());
}
} else {
layer->setScales(scales.data(), scales.size());
}
RreplenishLayerAndOutput(
layer, "nearest_interp_v2", {output_name}, test_mode);
......
......@@ -676,7 +676,7 @@ struct SimpleOpTypeSetTeller : public Teller {
if (!has_attrs) return false;
}
if (op_type == "arg_max") {
if (op_type == "arg_max" || op_type == "arg_min") {
if (!desc.HasAttr("axis", /*with_attr_var=*/false)) {
VLOG(3) << "Skip to convert into TRT while found Attribute('axis') is "
"Variable type in arg_max.";
......@@ -691,21 +691,6 @@ struct SimpleOpTypeSetTeller : public Teller {
if (axis == 0 || flatten || (dtype != 2 && dtype != 3)) return false;
}
if (op_type == "arg_min") {
if (!desc.HasAttr("axis", /*with_attr_var=*/false)) {
VLOG(3) << "Skip to convert into TRT while found Attribute('axis') is "
"Variable type in arg_min.";
return false;
}
int axis = desc.HasAttr("axis")
? PADDLE_GET_CONST(int64_t, desc.GetAttr("axis"))
: -1;
bool flatten = PADDLE_GET_CONST(bool, desc.GetAttr("flatten"));
int dtype = PADDLE_GET_CONST(int, desc.GetAttr("dtype"));
if (axis == 0 || flatten || dtype != 2) return false;
}
if (op_type == "affine_channel") {
if (!desc.HasAttr("data_layout")) return false;
auto data_layout = phi::StringToDataLayout(
......@@ -836,6 +821,14 @@ struct SimpleOpTypeSetTeller : public Teller {
auto interp_method =
PADDLE_GET_CONST(std::string, desc.GetAttr("interp_method"));
if (interp_method != "nearest") return false;
auto resize_inputs = desc.Inputs();
if (with_dynamic_shape &&
resize_inputs.find("SizeTensor") != resize_inputs.end() &&
desc.Input("SizeTensor").size() == 2) {
return true;
}
auto scale = PADDLE_GET_CONST(std::vector<float>, desc.GetAttr("scale"));
auto out_h = PADDLE_GET_CONST(int, desc.GetAttr("out_h"));
auto out_w = PADDLE_GET_CONST(int, desc.GetAttr("out_w"));
......@@ -2292,7 +2285,8 @@ struct SimpleOpTypeSetTeller : public Teller {
}
}
#endif
if (!((in_dtype == 5 || in_dtype == 4 || in_dtype == 2) &&
if (!((in_dtype == 5 || in_dtype == 4 || in_dtype == 3 ||
in_dtype == 2) &&
(out_dtype == 5 || out_dtype == 4 || out_dtype == 2))) {
VLOG(3) << "only valid conversions are: "
"(kFLOAT | kHALF | kINT32) -> (kFLOAT | kHALF | kINT32)";
......@@ -2411,18 +2405,6 @@ struct SimpleOpTypeSetTeller : public Teller {
if (!desc.HasAttr("shape")) {
return false;
}
auto expand_v2_inputs = desc.Inputs();
if (expand_v2_inputs.find("Shape") != expand_v2_inputs.end()) {
if (desc.Input("Shape").size() >= 1) {
return false;
}
}
if (expand_v2_inputs.find("expand_shapes_tensor") !=
expand_v2_inputs.end()) {
if (desc.Input("expand_shapes_tensor").size() >= 1) {
return false;
}
}
}
if (use_no_calib_int8) {
......
......@@ -249,9 +249,9 @@ class TrtConvertExpandV2Test2(TrtLayerAutoScanTest):
generate_dynamic_shape()
self.trt_param.precision = paddle_infer.PrecisionType.Float32
# fill_constant will be folded by constnt folding pass!
yield self.create_inference_config(), (0, 3), 1e-5
yield self.create_inference_config(), (1, 2), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), (0, 3), 1e-3
yield self.create_inference_config(), (1, 2), 1e-3
def add_skip_trt_case(self):
pass
......@@ -393,9 +393,9 @@ class TrtConvertExpandV2Test3(TrtLayerAutoScanTest):
generate_dynamic_shape()
self.trt_param.precision = paddle_infer.PrecisionType.Float32
# fill_constant will be folded by constnt folding pass!
yield self.create_inference_config(), (0, 3), 1e-5
yield self.create_inference_config(), (1, 2), 1e-5
self.trt_param.precision = paddle_infer.PrecisionType.Half
yield self.create_inference_config(), (0, 3), 1e-3
yield self.create_inference_config(), (1, 2), 1e-3
def add_skip_trt_case(self):
pass
......
......@@ -30,10 +30,16 @@ class TrtConvertNearestInterpV2Test(TrtLayerAutoScanTest):
def generate_input():
return np.ones([1, 3, 32, 32]).astype(np.float32)
def generate_weight():
return np.array([64]).astype(np.int32)
ops_config = [
{
"op_type": "nearest_interp_v2",
"op_inputs": {"X": ["input_data"]},
"op_inputs": {
"X": ["input_data"],
"SizeTensor": ["size_tensor_data0", "size_tensor_data1"],
},
"op_outputs": {"Out": ["interp_output_data"]},
"op_attrs": {
"data_layout": "NCHW",
......@@ -51,7 +57,10 @@ class TrtConvertNearestInterpV2Test(TrtLayerAutoScanTest):
ops = self.generate_op_config(ops_config)
program_config = ProgramConfig(
ops=ops,
weights={},
weights={
"size_tensor_data0": TensorConfig(data_gen=generate_weight),
"size_tensor_data1": TensorConfig(data_gen=generate_weight),
},
inputs={"input_data": TensorConfig(data_gen=generate_input)},
outputs=["interp_output_data"],
)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册