diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 7f5e5a622b9e8578f9b39aa9024d653d7b17138f..d86c046f2e5b08a4c00cf6cad19627e6a196c798 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -1,6 +1,6 @@ # Add TRT tests nv_library(tensorrt_converter - SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc + SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc DEPS tensorrt_engine operator scope framework_proto op_registry) nv_test(test_op_converter SRCS test_op_converter.cc DEPS @@ -17,3 +17,6 @@ nv_test(test_trt_conv_op SRCS test_conv2d_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES} tensorrt_engine conv_op SERIAL) nv_test(test_trt_pool2d_op SRCS test_pool2d_op.cc pool2d_op.cc DEPS ${FLUID_CORE_MODULES} tensorrt_engine pool_op SERIAL) + +nv_test(test_trt_elementwise_op SRCS test_elementwise_op.cc elementwise_op.cc + DEPS ${FLUID_CORE_MODULES} tensorrt_engine elementwise_add_op SERIAL) diff --git a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..3744550f60a1696aedd8a3ecd24f1b21d22325b9 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc @@ -0,0 +1,210 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace tensorrt { + +class ElementwiseWeightOpConverter : public OpConverter { + public: + ElementwiseWeightOpConverter() {} + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { + // Here the two nullptr looks strange, that's because the + // framework::OpDesc's constructor is strange. + framework::OpDesc op_desc(op, nullptr); + LOG(INFO) << "convert a fluid elementwise op to tensorrt IScaleLayer"; + + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto* X = engine_->GetITensor(op_desc.Input("X").front()); + nvinfer1::Dims dims_x = X->getDimensions(); + PADDLE_ENFORCE(dims_x.nbDims >= 3); + + auto* Y_v = scope.FindVar(op_desc.Input("Y").front()); + PADDLE_ENFORCE_NOT_NULL(Y_v); + auto* Y_t = Y_v->GetMutable(); + auto* weight_data = Y_t->mutable_data(platform::CPUPlace()); + auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; + + std::vector dims_y = framework::vectorize2int(Y_t->dims()); + if (static_cast(dims_y.size()) == dims_x.nbDims + 1) { + if (dims_y[0] == 1) dims_y.erase(dims_y.begin()); + } + + if (static_cast(dims_y.size()) == 1 && dims_y[0] == dims_x.d[0]) { + scale_mode = nvinfer1::ScaleMode::kCHANNEL; + } else if (static_cast(dims_y.size()) == dims_x.nbDims && + dims_y[0] == dims_x.d[0]) { + scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; + for (int i = 1; i < dims_x.nbDims; i++) { + if (dims_y[i] != dims_x.d[i]) { + scale_mode = nvinfer1::ScaleMode::kCHANNEL; + break; + } + } + if (scale_mode == nvinfer1::ScaleMode::kCHANNEL) { + for (int i = 1; i < dims_x.nbDims; i++) { + if (dims_y[i] != 1) + PADDLE_THROW( + "TensorRT unsupported weight shape for Elementwise op!"); + } + } + } else { + PADDLE_THROW("TensorRT unsupported weight Shape for Elementwise op!"); + } + + TensorRTEngine::Weight shift_weights{nvinfer1::DataType::kFLOAT, + static_cast(weight_data), + Y_t->memory_size() / sizeof(float)}; + TensorRTEngine::Weight scale_weights{nvinfer1::DataType::kFLOAT, nullptr, + 0}; + TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr, + 0}; + + nvinfer1::IScaleLayer* layer = TRT_ENGINE_ADD_LAYER( + engine_, Scale, *const_cast(X), scale_mode, + shift_weights.get(), scale_weights.get(), power_weights.get()); + auto output_name = op_desc.Output("Out")[0]; + engine_->SetITensor(output_name, layer->getOutput(0)); + if (test_mode) { // the test framework can not determine which is the + // output, so place the declaration inside. + engine_->DeclareOutput(output_name); + } + } +}; + +class ElementwiseTensorOpConverter : public OpConverter { + public: + ElementwiseTensorOpConverter() {} + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { + // Here the two nullptr looks strange, that's because the + // framework::OpDesc's constructor is strange. + framework::OpDesc op_desc(op, nullptr); + LOG(INFO) << "convert a fluid elementwise op to tensorrt IScaleLayer"; + + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto* X = engine_->GetITensor(op_desc.Input("X").front()); + auto* Y = engine_->GetITensor(op_desc.Input("Y").front()); + nvinfer1::Dims dims_x = X->getDimensions(); + nvinfer1::Dims dims_y = Y->getDimensions(); + + // The two input tensor should have the same dims + PADDLE_ENFORCE(dims_x.nbDims >= 3); + if (dims_x.nbDims == dims_y.nbDims) { + for (int i = 0; i < dims_x.nbDims; i++) { + if (dims_x.d[i] != dims_y.d[i]) + PADDLE_THROW("TensorRT unsupported tensor shape for Elementwise op!"); + } + } else { + PADDLE_THROW("TensorRT unsupported tensor shape for Elementwise op!"); + } + + auto op_pair = ops.find(op_type_); + if (op_pair == ops.end()) { + PADDLE_THROW("Wrong elementwise op type!"); + } + nvinfer1::IElementWiseLayer* layer = TRT_ENGINE_ADD_LAYER( + engine_, ElementWise, *const_cast(X), + *const_cast(Y), op_pair->second); + + auto output_name = op_desc.Output("Out")[0]; + engine_->SetITensor(output_name, layer->getOutput(0)); + if (test_mode) { // the test framework can not determine which is the + // output, so place the declaration inside. + engine_->DeclareOutput(output_name); + } + } + + protected: + static const std::unordered_map + ops; + std::string op_type_; +}; + +const std::unordered_map + ElementwiseTensorOpConverter::ops = { + {"add", nvinfer1::ElementWiseOperation::kSUM}, + {"mul", nvinfer1::ElementWiseOperation::kPROD}, + {"sub", nvinfer1::ElementWiseOperation::kSUB}, + {"div", nvinfer1::ElementWiseOperation::kDIV}, + {"min", nvinfer1::ElementWiseOperation::kMIN}, + {"pow", nvinfer1::ElementWiseOperation::kPOW}, + {"max", nvinfer1::ElementWiseOperation::kMAX}, +}; + +class ElementwiseTensorAddOpConverter : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorAddOpConverter() { op_type_ = "add"; } +}; + +class ElementwiseTensorMulOpConverter : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorMulOpConverter() { op_type_ = "mul"; } +}; + +class ElementwiseTensorSubOpConverter : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorSubOpConverter() { op_type_ = "sub"; } +}; + +class ElementwiseTensorDivOpConverter : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorDivOpConverter() { op_type_ = "div"; } +}; + +class ElementwiseTensorMinOpConverter : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorMinOpConverter() { op_type_ = "min"; } +}; + +class ElementwiseTensorMaxOpConverter : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorMaxOpConverter() { op_type_ = "max"; } +}; + +class ElementwiseTensorPowOpConverter : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorPowOpConverter() { op_type_ = "pow"; } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(elementwise_add_weight, ElementwiseWeightOpConverter); + +REGISTER_TRT_OP_CONVERTER(elementwise_add_tensor, + ElementwiseTensorAddOpConverter); +REGISTER_TRT_OP_CONVERTER(elementwise_sub_tensor, + ElementwiseTensorSubOpConverter); +REGISTER_TRT_OP_CONVERTER(elementwise_div_tensor, + ElementwiseTensorDivOpConverter); +REGISTER_TRT_OP_CONVERTER(elementwise_mul_tensor, + ElementwiseTensorMulOpConverter); +REGISTER_TRT_OP_CONVERTER(elementwise_max_tensor, + ElementwiseTensorMaxOpConverter); +REGISTER_TRT_OP_CONVERTER(elementwise_min_tensor, + ElementwiseTensorMinOpConverter); +REGISTER_TRT_OP_CONVERTER(elementwise_pow_tensor, + ElementwiseTensorPowOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index 968f7eb99ce8519edaa585fd3cb642bd80cc63cc..1b6a0ad82f3ceb00cec15c28c8121adc22271b7a 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -55,6 +55,31 @@ class OpConverter { it = Registry::Lookup("fc"); } } + + if (op_desc.Type().find("elementwise") != std::string::npos) { + static std::unordered_set add_tensor_op_set{ + "add", "mul", "sub", "div", "max", "min", "pow"}; + // TODO(xingzhaolong): all mul, sub, div + // static std::unordered_set add_weight_op_set {"add", "mul", + // "sub", "div"}; + static std::unordered_set add_weight_op_set{"add"}; + PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL); + int op_type_len = op_desc.Type().size(); + std::string op_type = op_desc.Type().substr(op_type_len - 3, op_type_len); + std::string Y = op_desc.Input("Y")[0]; + if (parameters.count(Y)) { + PADDLE_ENFORCE(add_weight_op_set.count(op_type) > 0, + "Unsupported elementwise type" + op_type); + it = + Registry::Lookup("elementwise_" + op_type + "_weight"); + } else { + PADDLE_ENFORCE(add_tensor_op_set.count(op_type) > 0, + "Unsupported elementwise type" + op_type); + it = + Registry::Lookup("elementwise_" + op_type + "_tensor"); + } + } + if (!it) { it = Registry::Lookup(op_desc.Type()); } diff --git a/paddle/fluid/inference/tensorrt/convert/test_elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/test_elementwise_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..7537d02a35b66a41c158cd8eb1b1e5d4107e7d84 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/test_elementwise_op.cc @@ -0,0 +1,73 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace tensorrt { + +TEST(elementwise_op, add_weight_test) { + std::unordered_set parameters({"elementwise_add-Y"}); + framework::Scope scope; + TRTConvertValidation validator(10, parameters, scope, 1 << 15); + validator.DeclInputVar("elementwise_add-X", nvinfer1::DimsCHW(10, 3, 3)); + validator.DeclParamVar("elementwise_add-Y", nvinfer1::Dims3(10, 1, 1)); + // validator.DeclParamVar("mul-Y", nvinfer1::Dims2(8, 2)); + validator.DeclOutputVar("elementwise_add-Out", nvinfer1::DimsCHW(10, 3, 3)); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("elementwise_add"); + desc.SetInput("X", {"elementwise_add-X"}); + desc.SetInput("Y", {"elementwise_add-Y"}); + desc.SetOutput("Out", {"elementwise_add-Out"}); + + int axis = 1; + desc.SetAttr("axis", axis); + + validator.SetOp(*desc.Proto()); + + validator.Execute(8); +} + +TEST(elementwise_op, add_tensor_test) { + std::unordered_set parameters; + framework::Scope scope; + TRTConvertValidation validator(8, parameters, scope, 1 << 15); + validator.DeclInputVar("elementwise_add-X", nvinfer1::DimsCHW(10, 3, 3)); + validator.DeclInputVar("elementwise_add-Y", nvinfer1::Dims3(10, 3, 3)); + // validator.DeclParamVar("mul-Y", nvinfer1::Dims2(8, 2)); + validator.DeclOutputVar("elementwise_add-Out", nvinfer1::DimsCHW(10, 3, 3)); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("elementwise_add"); + desc.SetInput("X", {"elementwise_add-X"}); + desc.SetInput("Y", {"elementwise_add-Y"}); + desc.SetOutput("Out", {"elementwise_add-Out"}); + + // the defalut axis of elementwise op is -1 + + validator.SetOp(*desc.Proto()); + + validator.Execute(8); +} + +} // namespace tensorrt +} // namespace inference +} // namespace paddle +USE_OP(elementwise_add); diff --git a/paddle/fluid/inference/tensorrt/convert/ut_helper.h b/paddle/fluid/inference/tensorrt/convert/ut_helper.h index 39529cc2c799212f91107b1b86dd2c8c3642b6da..63c2f978f253df11100ecca83acae5eab6a0337d 100644 --- a/paddle/fluid/inference/tensorrt/convert/ut_helper.h +++ b/paddle/fluid/inference/tensorrt/convert/ut_helper.h @@ -149,7 +149,7 @@ class TRTConvertValidation { cudaStreamSynchronize(*engine_->stream()); ASSERT_FALSE(op_desc_->OutputArgumentNames().empty()); - const size_t output_space_size = 2000; + const size_t output_space_size = 3000; for (const auto& output : op_desc_->OutputArgumentNames()) { std::vector fluid_out; std::vector trt_out(output_space_size);