diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index d2b0ba0a5fcf86d8abd16ff91088edaa68a65ceb..5e19ae32bd81387bccbf622cb29320d6a13d3121 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -2227,6 +2227,7 @@ USE_TRT_CONVERTER(elementwise_div_weight); USE_TRT_CONVERTER(elementwise_min_weight); USE_TRT_CONVERTER(elementwise_max_weight); USE_TRT_CONVERTER(elementwise_pow_weight); +USE_TRT_CONVERTER(elementwise_floordiv_weight); USE_TRT_CONVERTER(elementwise_add_tensor); USE_TRT_CONVERTER(elementwise_sub_tensor); USE_TRT_CONVERTER(elementwise_div_tensor); @@ -2234,6 +2235,7 @@ USE_TRT_CONVERTER(elementwise_mul_tensor); USE_TRT_CONVERTER(elementwise_max_tensor); USE_TRT_CONVERTER(elementwise_min_tensor); USE_TRT_CONVERTER(elementwise_pow_tensor); +USE_TRT_CONVERTER(elementwise_floordiv_tensor); USE_TRT_CONVERTER(transpose); USE_TRT_CONVERTER(transpose2); USE_TRT_CONVERTER(flatten); diff --git a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc index 82fd1e016119fb71db2a5b60cc46270e6082e648..53cb2da285afae4b8346f5fc6adb3afb9adec354 100644 --- a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc @@ -167,6 +167,7 @@ const std::unordered_map {"min", nvinfer1::ElementWiseOperation::kMIN}, {"pow", nvinfer1::ElementWiseOperation::kPOW}, {"max", nvinfer1::ElementWiseOperation::kMAX}, + {"floordiv", nvinfer1::ElementWiseOperation::kFLOOR_DIV}, }; class ElementwiseTensorAddOpConverter : public ElementwiseTensorOpConverter { @@ -204,6 +205,12 @@ class ElementwiseTensorPowOpConverter : public ElementwiseTensorOpConverter { ElementwiseTensorPowOpConverter() { op_type_ = "pow"; } }; +class ElementwiseTensorFloorDivOpConverter + : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorFloorDivOpConverter() { op_type_ = "floordiv"; } +}; + } // namespace tensorrt } // namespace inference } // namespace paddle @@ -222,6 +229,8 @@ REGISTER_TRT_OP_CONVERTER(elementwise_min_weight, ElementwiseTensorMinOpConverter); REGISTER_TRT_OP_CONVERTER(elementwise_pow_weight, ElementwiseTensorPowOpConverter); +REGISTER_TRT_OP_CONVERTER(elementwise_floordiv_weight, + ElementwiseTensorFloorDivOpConverter); REGISTER_TRT_OP_CONVERTER(elementwise_add_tensor, ElementwiseTensorAddOpConverter); @@ -237,3 +246,5 @@ REGISTER_TRT_OP_CONVERTER(elementwise_min_tensor, ElementwiseTensorMinOpConverter); REGISTER_TRT_OP_CONVERTER(elementwise_pow_tensor, ElementwiseTensorPowOpConverter); +REGISTER_TRT_OP_CONVERTER(elementwise_floordiv_tensor, + ElementwiseTensorFloorDivOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index fd21e70780bd0023f891928d95f71f70a9275e8a..d9d9be1241bde94f352f2d6bb0924133b827bea0 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -1244,7 +1244,7 @@ struct SimpleOpTypeSetTeller : public Teller { if (op_type == "elementwise_add" || op_type == "elementwise_mul" || op_type == "elementwise_sub" || op_type == "elementwise_div" || op_type == "elementwise_pow" || op_type == "elementwise_min" || - op_type == "elementwise_max") { + op_type == "elementwise_max" || op_type == "elementwise_floordiv") { if (desc.Input("X").size() != 1) { VLOG(3) << "The input op's Input(\"X\").size() " "should equal to 1, but received Input(\"X\").size() = " @@ -2288,6 +2288,7 @@ struct SimpleOpTypeSetTeller : public Teller { "elementwise_pow", "elementwise_min", "elementwise_max", + "elementwise_floordiv", "equal", "dropout", "prelu", @@ -2413,6 +2414,7 @@ struct SimpleOpTypeSetTeller : public Teller { "elementwise_pow", "elementwise_min", "elementwise_max", + "elementwise_floordiv", "equal", "dropout", "prelu", diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py index 8420c9cdaae46618e08bd6ae3fab98842648766e..3c0230e84b52ede9d24ff1136abe03a1fce22a51 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py @@ -28,11 +28,22 @@ class TrtConvertElementwiseTest_one_input_special_case0(TrtLayerAutoScanTest): return True def sample_program_configs(self): - def generate_input(shape): - return np.random.random(shape).astype(np.float32) - - def generate_weight(): - return np.random.randn(1, 32, 1, 1).astype(np.float32) + def generate_input(shape, op_type): + # elementwise_floordiv is integer only + if op_type == "elementwise_floordiv": + return np.random.randint( + low=1, high=10000, size=shape, dtype=np.int32 + ) + else: + return np.random.random(shape).astype(np.float32) + + def generate_weight(op_type): + if op_type == "elementwise_floordiv": + return np.random.randint( + low=1, high=10000, size=[1, 32, 1, 1], dtype=np.int32 + ) + else: + return np.random.randn(1, 32, 1, 1).astype(np.float32) for batch in [1, 4]: for shape in [[batch, 32, 16, 32]]: @@ -44,6 +55,7 @@ class TrtConvertElementwiseTest_one_input_special_case0(TrtLayerAutoScanTest): "elementwise_pow", "elementwise_min", "elementwise_max", + "elementwise_floordiv", ]: for axis in [-1]: self.dims = len(shape) @@ -65,12 +77,14 @@ class TrtConvertElementwiseTest_one_input_special_case0(TrtLayerAutoScanTest): ops=ops, weights={ "weight": TensorConfig( - data_gen=partial(generate_weight) + data_gen=partial(generate_weight, op_type) ) }, inputs={ "input_data": TensorConfig( - data_gen=partial(generate_input, shape) + data_gen=partial( + generate_input, shape, op_type + ) ), }, outputs=["output_data"], @@ -142,11 +156,23 @@ class TrtConvertElementwiseTest_one_input_special_case1(TrtLayerAutoScanTest): return True def sample_program_configs(self): - def generate_input(shape): - return np.random.random(shape).astype(np.float32) - - def generate_weight(): - return np.random.randn(1).astype(np.float32) + def generate_input(shape, op_type): + # elementwise_floordiv is integer only + if op_type == "elementwise_floordiv": + return np.random.randint( + low=1, high=10000, size=shape, dtype=np.int32 + ) + else: + return np.random.random(shape).astype(np.float32) + + def generate_weight(op_type): + # elementwise_floordiv is integer only + if op_type == "elementwise_floordiv": + return np.random.randint( + low=1, high=10000, size=[1], dtype=np.int32 + ) + else: + return np.random.randn(1).astype(np.float32) for shape in [[32]]: for op_type in [ @@ -157,6 +183,7 @@ class TrtConvertElementwiseTest_one_input_special_case1(TrtLayerAutoScanTest): "elementwise_pow", "elementwise_min", "elementwise_max", + "elementwise_floordiv", ]: for axis in [-1]: self.dims = len(shape) @@ -175,12 +202,12 @@ class TrtConvertElementwiseTest_one_input_special_case1(TrtLayerAutoScanTest): ops=ops, weights={ "weight": TensorConfig( - data_gen=partial(generate_weight) + data_gen=partial(generate_weight, op_type) ) }, inputs={ "input_data": TensorConfig( - data_gen=partial(generate_input, shape) + data_gen=partial(generate_input, shape, op_type) ), }, outputs=["output_data"], @@ -245,11 +272,23 @@ class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): return True def sample_program_configs(self): - def generate_input(shape): - return np.random.random(shape).astype(np.float32) - - def generate_weight(): - return np.random.randn(32).astype(np.float32) + def generate_input(shape, op_type): + # elementwise_floordiv is integer only + if op_type == "elementwise_floordiv": + return np.random.randint( + low=1, high=10000, size=shape, dtype=np.int32 + ) + else: + return np.random.random(shape).astype(np.float32) + + def generate_weight(op_type): + # elementwise_floordiv is integer only + if op_type == "elementwise_floordiv": + return np.random.randint( + low=1, high=10000, size=[32], dtype=np.int32 + ) + else: + return np.random.randn(32).astype(np.float32) for batch in [1, 4]: for shape in [ @@ -266,6 +305,7 @@ class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): "elementwise_pow", "elementwise_min", "elementwise_max", + "elementwise_floordiv", ]: for axis in [-1 if len(shape) == 1 else 1]: self.dims = len(shape) @@ -287,12 +327,14 @@ class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): ops=ops, weights={ "weight": TensorConfig( - data_gen=partial(generate_weight) + data_gen=partial(generate_weight, op_type) ) }, inputs={ "input_data": TensorConfig( - data_gen=partial(generate_input, shape) + data_gen=partial( + generate_input, shape, op_type + ) ), }, outputs=["output_data"], @@ -379,8 +421,14 @@ class TrtConvertElementwiseTest_two_input_without_broadcast( return True def sample_program_configs(self): - def generate_input(shape): - return np.random.random(shape).astype(np.float32) + def generate_input(shape, op_type): + # elementwise_floordiv is integer only + if op_type == "elementwise_floordiv": + return np.random.randint( + low=1, high=10000, size=shape, dtype=np.int32 + ) + else: + return np.random.random(shape).astype(np.float32) for shape in [[4], [4, 32], [2, 32, 16], [1, 8, 16, 32]]: for op_type in [ @@ -391,6 +439,7 @@ class TrtConvertElementwiseTest_two_input_without_broadcast( "elementwise_pow", "elementwise_min", "elementwise_max", + "elementwise_floordiv", ]: for axis in [0, -1]: self.dims = len(shape) @@ -413,10 +462,10 @@ class TrtConvertElementwiseTest_two_input_without_broadcast( weights={}, inputs={ "input_data1": TensorConfig( - data_gen=partial(generate_input, shape) + data_gen=partial(generate_input, shape, op_type) ), "input_data2": TensorConfig( - data_gen=partial(generate_input, shape) + data_gen=partial(generate_input, shape, op_type) ), }, outputs=["output_data"], @@ -530,8 +579,14 @@ class TrtConvertElementwiseTest_two_input_with_broadcast(TrtLayerAutoScanTest): return True def sample_program_configs(self): - def generate_input(shape): - return np.random.random(shape).astype(np.float32) + def generate_input(shape, op_type): + # elementwise_floordiv is integer only + if op_type == "elementwise_floordiv": + return np.random.randint( + low=1, high=10000, size=shape, dtype=np.int32 + ) + else: + return np.random.random(shape).astype(np.float32) input1_shape_list = [[4, 32], [2, 4, 32], [4, 2, 4, 32]] input2_shape1_list = [[32], [4, 32], [2, 4, 32]] @@ -575,6 +630,7 @@ class TrtConvertElementwiseTest_two_input_with_broadcast(TrtLayerAutoScanTest): "elementwise_pow", "elementwise_min", "elementwise_max", + "elementwise_floordiv", ]: for axis in axis_list[j][i]: self.shape1 = input1_shape @@ -599,12 +655,12 @@ class TrtConvertElementwiseTest_two_input_with_broadcast(TrtLayerAutoScanTest): inputs={ "input_data1": TensorConfig( data_gen=partial( - generate_input, input1_shape + generate_input, input1_shape, op_type ) ), "input_data2": TensorConfig( data_gen=partial( - generate_input, input2_shape + generate_input, input2_shape, op_type ) ), }, @@ -676,12 +732,23 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest): return True def sample_program_configs(self): - def generate_input(shape): - return np.random.random(shape).astype(np.float32) + def generate_input(shape, op_type): + # elementwise_floordiv is integer only + if op_type == "elementwise_floordiv": + return np.random.randint( + low=1, high=10000, size=shape, dtype=np.int32 + ) + else: + return np.random.random(shape).astype(np.float32) # use rand not randn to avoiding pow producing `NAN` - def generate_weight(): - return np.random.rand(32).astype(np.float32) + def generate_weight(op_type): + if op_type == "elementwise_floordiv": + return np.random.randint( + low=1, high=10000, size=[32], dtype=np.int32 + ) + else: + return np.random.rand(32).astype(np.float32) for batch in [1, 2, 4]: for shape in [ @@ -698,6 +765,7 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest): "elementwise_pow", "elementwise_min", "elementwise_max", + "elementwise_floordiv", ]: self.op_type = op_type for axis in [-1 if len(shape) == 1 else 1]: @@ -720,12 +788,14 @@ class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest): ops=ops, weights={ "weight": TensorConfig( - data_gen=partial(generate_weight) + data_gen=partial(generate_weight, op_type) ) }, inputs={ "input_data": TensorConfig( - data_gen=partial(generate_input, shape) + data_gen=partial( + generate_input, shape, op_type + ) ), }, outputs=["output_data"],