未验证 提交 c36c22fe 编写于 作者: S Shang Zhizhou 提交者: GitHub

Scale 1.8 (#31940)

* add n-d input support for trt scale converter (#31316)

* add n-d input support for trt scale converter

* add flatten for ut

* fix dims

* fix batchnorm when inpu dims < 3 (#31933)

* fix batchnorm when inpu dims < 3

* add unittest for batchnorm dims = 2

* fix unittest
Co-authored-by: NPei Yang <peiyang@baidu.com>
上级 ba82757e
...@@ -146,17 +146,49 @@ class BatchNormOpConverter : public OpConverter { ...@@ -146,17 +146,49 @@ class BatchNormOpConverter : public OpConverter {
TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr, TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr,
0}; 0};
nvinfer1::IScaleLayer* layer = int dynamic_shape_offset = engine_->with_dynamic_shape() ? 1 : 0;
TRT_ENGINE_ADD_LAYER(engine_, Scale, *const_cast<nvinfer1::ITensor*>(X), nvinfer1::ILayer* layer = nullptr;
nvinfer1::ScaleMode::kCHANNEL, shift_weights.get(), nvinfer1::IShuffleLayer* expand_layer = nullptr;
scale_weights.get(), power_weights.get()); nvinfer1::IShuffleLayer* squeeze_layer = nullptr;
auto x_dim = X->getDimensions();
if (x_dim.nbDims < 3 + dynamic_shape_offset) {
nvinfer1::Dims expand_shape;
expand_shape.nbDims = 3 + dynamic_shape_offset;
for (int i = 0; i < 3 + dynamic_shape_offset; i++) {
if (i < x_dim.nbDims) {
expand_shape.d[i] = x_dim.d[i] < 0 ? 0 : x_dim.d[i];
} else {
expand_shape.d[i] = 1;
}
}
expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
expand_layer->setReshapeDimensions(expand_shape);
X = expand_layer->getOutput(0);
}
layer = TRT_ENGINE_ADD_LAYER(
engine_, Scale, *X, nvinfer1::ScaleMode::kCHANNEL, shift_weights.get(),
scale_weights.get(), power_weights.get());
auto output_name = op_desc.Output("Y").front(); auto output_name = op_desc.Output("Y").front();
engine_->SetWeights(op_desc.Input("Bias").front(), engine_->SetWeights(op_desc.Input("Bias").front(),
std::move(combile_bias_tensor)); std::move(combile_bias_tensor));
engine_->SetWeights(op_desc.Input("Scale").front(), engine_->SetWeights(op_desc.Input("Scale").front(),
std::move(combile_scale_tensor)); std::move(combile_scale_tensor));
RreplenishLayerAndOutput(layer, "pool2d", {output_name}, test_mode); if (x_dim.nbDims < 3 + dynamic_shape_offset) {
nvinfer1::Dims squeeze_shape;
squeeze_shape.nbDims = x_dim.nbDims;
for (int i = 0; i < squeeze_shape.nbDims; i++) {
squeeze_shape.d[i] = x_dim.d[i] < 0 ? 0 : x_dim.d[i];
}
squeeze_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0)));
squeeze_layer->setReshapeDimensions(squeeze_shape);
layer = static_cast<nvinfer1::ILayer*>(squeeze_layer);
}
RreplenishLayerAndOutput(layer, "batchnorm_add_scale", {output_name},
test_mode);
} }
}; };
......
...@@ -45,5 +45,31 @@ class TRTScaleTest(InferencePassTest): ...@@ -45,5 +45,31 @@ class TRTScaleTest(InferencePassTest):
self.check_output_with_option(use_gpu, flatten=True) self.check_output_with_option(use_gpu, flatten=True)
class TRTScaleShape2Test(InferencePassTest):
def setUp(self):
with fluid.program_guard(self.main_program, self.startup_program):
data = fluid.data(
name="data", shape=[-1, 512, 512], dtype="float32")
scale_out = self.append_scale(data)
out = fluid.layers.batch_norm(scale_out, is_test=True)
self.feeds = {
"data": np.random.random([1, 512, 512]).astype("float32"),
}
self.enable_trt = True
self.trt_parameters = TRTScaleShape2Test.TensorRTParam(
1 << 30, 32, 1, AnalysisConfig.Precision.Float32, False, False)
self.fetch_list = [out]
def append_scale(self, data):
return fluid.layers.scale(
x=data, scale=2.0, bias=-1.0, bias_after_scale=False)
def test_check_output(self):
if core.is_compiled_with_cuda():
use_gpu = True
self.check_output_with_option(use_gpu, flatten=True)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册