From 830aa12c1ab0e1d9f001c2569bc9d214a219fd90 Mon Sep 17 00:00:00 2001 From: nhzlx Date: Fri, 27 Jul 2018 08:44:38 +0000 Subject: [PATCH] add elementwise init code --- .../tensorrt/convert/.elementwise_op.cc.swp | Bin 0 -> 20480 bytes .../inference/tensorrt/convert/CMakeLists.txt | 5 +- .../tensorrt/convert/elementwise_op.cc | 210 ++++++++++++++++++ .../inference/tensorrt/convert/op_converter.h | 25 +++ .../tensorrt/convert/test_elementwise_op.cc | 74 ++++++ 5 files changed, 313 insertions(+), 1 deletion(-) create mode 100644 paddle/fluid/inference/tensorrt/convert/.elementwise_op.cc.swp create mode 100644 paddle/fluid/inference/tensorrt/convert/elementwise_op.cc create mode 100644 paddle/fluid/inference/tensorrt/convert/test_elementwise_op.cc diff --git a/paddle/fluid/inference/tensorrt/convert/.elementwise_op.cc.swp b/paddle/fluid/inference/tensorrt/convert/.elementwise_op.cc.swp new file mode 100644 index 0000000000000000000000000000000000000000..395cab5b64479e4ec1ed3fa3197a0b4302685912 GIT binary patch literal 20480 zcmeI4U2G#)700J^TcEUjw3Jc_wRgShVrLyYNrj5M*+3e5cdaINu$^Q>Rjje+IvMSF zW;!#DlTA}9)D|JA3aW&tR1goSRpG55@gdp@Dhfpc@gW{cUno3KgpiO>A1b7l|GD?t zo>^}aH=*J~W9he!XYM`s+;hM0Ic;NNxwyb?%S>tbyjRn#mp*yt2LH;HM+%zeIZi;2 z_xo(!Jux|M8rJyw#MHQBTc*t?rZV(2)6h+$lW96_l963^+C1BJyz|-33s%ua?y@jjU<6xX0}#&j#G~9nTB0O~>Bg_~sUGb35poKCe4&rrF$2v-o{%f!G2A z3tXe!IyE^#zmFx3vg@v08oY}CiY*XZAhtkkf!G4E1!4=t7KkknTj2lR0)czE_G#4e zZK|fr>ie>bzK^KiTz#)x^!{@7`v&#>!;9XlpYd00f!G4E1!4=t7KkknTOhVTY=PJU zu?1oa#1@Dx@E@>%ZfM$7DED4E@WcP#JO4lX9!+}&+zU>D>%sfMU*4^0uYhlW2DlOY z?ix+|Hh2nr27DZR45Yyaz-2%K&tI)+PlHE66Wjn^e3zy@4laN!$bi?N&{N<7*aA6l z75L*jHSJ~aIJh6&0WJqWe}|^M0PY8Qa5cCRTmioSc1`;%$b%cfpRR0PW)TQmBYEtU+pAJp59!BcFTv22zF>zvU;*7Ec=r;rKO)NN)v0W)~Oy*6Sj zSa;oQt~1l8)It`w*YLbPK2zmEu_mG?l1?XZJ3-xsm^8EWE$)ratnr{M9w#SKsgokB z-U%EQO3O|$$E_ROZ)Q}|1illgJ16dv8Ax@1-E0N*E;rZK13v?)$yA#8P2J)bKXjcg z_o5F~!6ZlNon}?an`UDO_<%v0x*ts28zLZNGM7^|8klZ^dOKT(}luk&Qr>c(0 z8ly6`O08Zfohgysf%#Uf{_T%QE`I zX(d~hqX#KvmXRvr2YO&OWwUNLrh%4}eWz}~OetxST?`!QYJ1gEw&{*0HCjYt&ll>o zvz78nGSQRGI=0_&UB?T!!K4bTx~{vNwH%KX22n;B14k38qwX+n`J8Pl$-NR78~oL? z!kz{B`xVra6470iO7CdGFwC}J-@4Phiyb}1CR6>lq<_-117@PnPoBirG!tnzGj=17 zOn7wS1ZEkPgJj7LL?elvIu#m6Vi+ypF*@(u+1XO5Fh9sRsPy13#u^^i&kyDY%n)KS zkh1p{J;*gwKUDz()g37CW@|cvCWZOJLZMV!DOL*uZOG#ID1%SjMK~jvv=V8E@KJBbGLa685ZsD8AQ@A@j@HPNr1m4EJ=O_o~z2 zSZtEL^#PoV3nuIL9wF1Nv(f#Xe+(NvM>#r+kq_a4*F6p|#x@1QBIM6 zV|?Ze4;G||=}<-HoxGfjW|AtS8tjGA>2hVRP%o8h_0rP(JSw?yk?0MG_M48&Gp9}4 zSk}E{Z=o*QSm~3A)kG@O@*G%*v=VlSN|!Czn-SQ|DR!$WBeZbAuBxA-ymMmDkT(&1 z>6=)ads<7gokNQL)&mvpZ#~ooLg{sGKfUUOPxR{&*>tF(lSX%u12bBmaK4ZY)6*=WVpAblgs)^i-@2eTaKd_QR#uk`?u8oa4@u)_6rlPvNP<%Bh;QXf1nyJDI(9LEwnR<(_F z5U}f+SW>p32`%=h8Kw_tnM@^zWVxObIJum(!Eq#|y+CLkmbjJ zj8@d2;+eUZSx@XHq2Jb7WbiQ8qGh)zy=Iwhla?5aLu_OW?uquDH=F2ZJj2GaT9wN- zSI@+*hE5c|%bRA)Y+_ezuXXe_&eo`%Y|~z2Xq~p{i_y(=+t5~uwX@}=8e5sIRAx)H zVxh{)6*gBc<%_jqxrAS**=%W*-BB#%(~O&t?=imVdK@i5eX;FYCO0ygL7JP54(+vu z)TJ&PPT<%moF!(f7%T~P`)svXO>0ZGMM6D(cgOS~rLo0y7t)(FGovnDDo{B9n0j%L3LUtl;WFB?qU~aBP$s~M?5?0!bo8e z8BH}%*f6s~Ii}vE*)sR2Q5Y1%IT4MRP_JlTs%6L69CY*O1jdrh6q}lu{3w%Q_3~df z+X>bk&(E+~%VHIARAUwHb8mxMhtB`s!kPOR&f;|b@A>ba$65b5@MG{Kco=MgE;tTu z0sp|+|GVG;Py{!CmvGL1612fd@CM}m0(=S70hN;g@h7%GY=PJUu?1oa#1@Dx5L+O& zKx~2kN()4;0U|#_Cvu=Dupw}YOPSIlmDf>SjYKsxd87;B^kOWsl$6x zm$xK)xkZLbTQKc|lm_3~flAXi4^*1Id7#od)`3dvSO+OBZ*ClCDsl!pH#yG41@C{L(o8$oKyUcnEwBJO~~DcYxc$ zO@QJ69s{2OC%_vx?|%nSEWo|sHbCe9*TJ*kKClQFxEcH%_YOY;UjsHc4PJ$x|8L+A z;1Tc;_yXXd0ImbqfM3Gz{{>J23*bZGW%&EQ3~C?+j)9xOP2gJa9Q^;^178PU1&@Jc zkOr^9|Nk_25R}0O!OQUZe-k_b9tY<@8C(yp1@8mTAqLtRn6R`n50?&Y_fdg6~4N@Qpevh`H_W2dH(SD!Z??f*(MGvk;hdNMb z(9lKarF+>zWKtaJWo>;^^s@bwii$b3Qjv9GSSiyUO{s{O!zvY>VuzH%R63GUQ89;A zDmoJnDTNt#B&DKa4y)8cWa%4Hs@<_hQYtFuuu4T&ks+lp^^c@fRLq-HO76Y36Ev#e z9qI?hsG+-p|1C;cI9YIC44?hN?75x6erj7x`6FaoEU!eeVY@U^X~j|`8+GUrO3N3Q zBiX3SjgYOnbbBNlb&3(PEmq3;NIq(}5%N`+7F4#qrpa8+aeXYV+nRK637>Yyc0BlK zxKVHG?zDJB{t$9b?A?MhYR(O1>963@d&uzHr$1$2jdTU#2R!MAD3y^j4vu(k=ZTPgc$_+L09Wis;0F!<2n5g~tuAtVWZ7W^eXe1_6_ zNf8DAV{vgXm;CG(z_yA2Htfb+-9$8=&(U3sklp&>aG`-cQc#>If6%Lnq5LC-!;$j6 zmR2ry6*;wnc!ISd-!?Xr{qQJiE>$Ipa6!+^aN9r#0)_+06`>jZ1mb-Q@e7nkY)WZU zOzAFNzTCT1YLUn-xdlcP;z1*~7>m~OP^FOa{n(k!3`HwZaF&R8*%Ci{VOo*UrHTE5 zv_djf*i0`1X-ma)^-LU%ir78MgGQolZ7~%CCr05M8)Ij|3dSI!NXTW}hKEz5S`B&> zB0$JrOR;B|=T>)#3b|DJO{CvDiewcj`qRtqQ6$mm7E&(`LWzS=C>G_d4nmQ$L+DsU hyFg4=y9S{QWf<)DLQ?vND>?J^=D+?>cxzYMe*rVj3%~#X literal 0 HcmV?d00001 diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index c841510ae6..7e95622799 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -1,6 +1,6 @@ # Add TRT tests nv_library(tensorrt_converter - SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc + SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc DEPS tensorrt_engine mul_op) nv_test(test_op_converter SRCS test_op_converter.cc DEPS @@ -16,3 +16,6 @@ nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc nv_test(test_trt_pool2d_op SRCS test_pool2d_op.cc pool2d_op.cc DEPS ${FLUID_CORE_MODULES} tensorrt_engine pool_op SERIAL) + +nv_test(test_trt_elementwise_op SRCS test_elementwise_op.cc elementwise_op.cc + DEPS ${FLUID_CORE_MODULES} tensorrt_engine elementwise_add_op SERIAL) diff --git a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc new file mode 100644 index 0000000000..2fb924225f --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc @@ -0,0 +1,210 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace tensorrt { + +class ElementwiseWeightOpConverter : public OpConverter { + public: + ElementwiseWeightOpConverter() {} + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { + // Here the two nullptr looks strange, that's because the + // framework::OpDesc's constructor is strange. + framework::OpDesc op_desc(op, nullptr); + LOG(INFO) << "convert a fluid elementwise op to tensorrt IScaleLayer"; + + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto* X = engine_->GetITensor(op_desc.Input("X").front()); + nvinfer1::Dims dims_x = X->getDimensions(); + PADDLE_ENFORCE(dims_x.nbDims >= 3); + + auto* Y_v = scope.FindVar(op_desc.Input("Y").front()); + PADDLE_ENFORCE_NOT_NULL(Y_v); + auto* Y_t = Y_v->GetMutable(); + auto* weight_data = Y_t->mutable_data(platform::CPUPlace()); + auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; + + std::vector dims_y = framework::vectorize2int(Y_t->dims()); + if (static_cast(dims_y.size()) == dims_x.nbDims + 1) { + if (dims_y[0] == 1) dims_y.erase(dims_y.begin()); + } + + if (static_cast(dims_y.size()) == 1 && dims_y[0] == dims_x.d[0]) { + scale_mode = nvinfer1::ScaleMode::kCHANNEL; + } else if (static_cast(dims_y.size()) == dims_x.nbDims && + dims_y[0] == dims_x.d[0]) { + scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; + for (int i = 1; i < dims_x.nbDims; i++) { + if (dims_y[i] != dims_x.d[i]) { + scale_mode = nvinfer1::ScaleMode::kCHANNEL; + break; + } + } + if (scale_mode == nvinfer1::ScaleMode::kCHANNEL) { + for (int i = 1; i < dims_x.nbDims; i++) { + if (dims_y[i] != 1) + PADDLE_THROW( + "TensorRT unsupported weight shape for Elementwise op!"); + } + } + } else { + PADDLE_THROW("TensorRT unsupported weight Shape for Elementwise op!"); + } + + TensorRTEngine::Weight shift_weights{nvinfer1::DataType::kFLOAT, + static_cast(weight_data), + Y_t->memory_size() / sizeof(float)}; + TensorRTEngine::Weight scale_weights{nvinfer1::DataType::kFLOAT, nullptr, + 0}; + TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr, + 0}; + + nvinfer1::IScaleLayer* layer = TRT_ENGINE_ADD_LAYER( + engine_, Scale, *const_cast(X), scale_mode, + shift_weights.get(), scale_weights.get(), power_weights.get()); + auto output_name = op_desc.Output("Out")[0]; + engine_->SetITensor(output_name, layer->getOutput(0)); + if (test_mode) { // the test framework can not determine which is the + // output, so place the declaration inside. + engine_->DeclareOutput(output_name); + } + } +}; + +class ElementwiseTensorOpConverter : public OpConverter { + public: + ElementwiseTensorOpConverter() {} + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { + // Here the two nullptr looks strange, that's because the + // framework::OpDesc's constructor is strange. + framework::OpDesc op_desc(op, nullptr); + LOG(INFO) << "convert a fluid elementwise op to tensorrt IScaleLayer"; + + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1); // Y is a weight + PADDLE_ENFORCE_EQ(op_desc.Output("Out").size(), 1); + + auto* X = engine_->GetITensor(op_desc.Input("X").front()); + auto* Y = engine_->GetITensor(op_desc.Input("Y").front()); + nvinfer1::Dims dims_x = X->getDimensions(); + nvinfer1::Dims dims_y = Y->getDimensions(); + + // only support the C * H * W input format + PADDLE_ENFORCE(dims_x.nbDims >= 3); + if (dims_x.nbDims == dims_y.nbDims) { + for (int i = 0; i < dims_x.nbDims; i++) { + if (dims_x.d[i] != dims_y.d[i]) + PADDLE_THROW("TensorRT unsupported tensor shape for Elementwise op!"); + } + } else { + PADDLE_THROW("TensorRT unsupported tensor shape for Elementwise op!"); + } + + auto op_pair = ops.find(op_type_); + if (op_pair == ops.end()) { + PADDLE_THROW("Wrong elementwise op type!"); + } + nvinfer1::IElementWiseLayer* layer = TRT_ENGINE_ADD_LAYER( + engine_, ElementWise, *const_cast(X), + *const_cast(Y), op_pair->second); + + auto output_name = op_desc.Output("Out")[0]; + engine_->SetITensor(output_name, layer->getOutput(0)); + if (test_mode) { // the test framework can not determine which is the + // output, so place the declaration inside. + engine_->DeclareOutput(output_name); + } + } + + protected: + static const std::unordered_map + ops; + std::string op_type_; +}; + +const std::unordered_map + ElementwiseTensorOpConverter::ops = { + {"add", nvinfer1::ElementWiseOperation::kSUM}, + {"mul", nvinfer1::ElementWiseOperation::kPROD}, + {"sub", nvinfer1::ElementWiseOperation::kSUB}, + {"div", nvinfer1::ElementWiseOperation::kDIV}, + {"min", nvinfer1::ElementWiseOperation::kMIN}, + {"pow", nvinfer1::ElementWiseOperation::kPOW}, + {"max", nvinfer1::ElementWiseOperation::kMAX}, +}; + +class ElementwiseTensorAddOpConverter : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorAddOpConverter() { op_type_ = "add"; } +}; + +class ElementwiseTensorMulOpConverter : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorMulOpConverter() { op_type_ = "mul"; } +}; + +class ElementwiseTensorSubOpConverter : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorSubOpConverter() { op_type_ = "sub"; } +}; + +class ElementwiseTensorDivOpConverter : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorDivOpConverter() { op_type_ = "div"; } +}; + +class ElementwiseTensorMinOpConverter : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorMinOpConverter() { op_type_ = "min"; } +}; + +class ElementwiseTensorMaxOpConverter : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorMaxOpConverter() { op_type_ = "max"; } +}; + +class ElementwiseTensorPowOpConverter : public ElementwiseTensorOpConverter { + public: + ElementwiseTensorPowOpConverter() { op_type_ = "pow"; } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(elementwise_add_weight, ElementwiseWeightOpConverter); + +REGISTER_TRT_OP_CONVERTER(elementwise_add_tensor, + ElementwiseTensorAddOpConverter); +REGISTER_TRT_OP_CONVERTER(elementwise_sub_tensor, + ElementwiseTensorSubOpConverter); +REGISTER_TRT_OP_CONVERTER(elementwise_div_tensor, + ElementwiseTensorDivOpConverter); +REGISTER_TRT_OP_CONVERTER(elementwise_mul_tensor, + ElementwiseTensorMulOpConverter); +REGISTER_TRT_OP_CONVERTER(elementwise_max_tensor, + ElementwiseTensorMaxOpConverter); +REGISTER_TRT_OP_CONVERTER(elementwise_min_tensor, + ElementwiseTensorMinOpConverter); +REGISTER_TRT_OP_CONVERTER(elementwise_pow_tensor, + ElementwiseTensorPowOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index 968f7eb99c..1b6a0ad82f 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -55,6 +55,31 @@ class OpConverter { it = Registry::Lookup("fc"); } } + + if (op_desc.Type().find("elementwise") != std::string::npos) { + static std::unordered_set add_tensor_op_set{ + "add", "mul", "sub", "div", "max", "min", "pow"}; + // TODO(xingzhaolong): all mul, sub, div + // static std::unordered_set add_weight_op_set {"add", "mul", + // "sub", "div"}; + static std::unordered_set add_weight_op_set{"add"}; + PADDLE_ENFORCE_EQ(op_desc.Input("Y").size(), 1UL); + int op_type_len = op_desc.Type().size(); + std::string op_type = op_desc.Type().substr(op_type_len - 3, op_type_len); + std::string Y = op_desc.Input("Y")[0]; + if (parameters.count(Y)) { + PADDLE_ENFORCE(add_weight_op_set.count(op_type) > 0, + "Unsupported elementwise type" + op_type); + it = + Registry::Lookup("elementwise_" + op_type + "_weight"); + } else { + PADDLE_ENFORCE(add_tensor_op_set.count(op_type) > 0, + "Unsupported elementwise type" + op_type); + it = + Registry::Lookup("elementwise_" + op_type + "_tensor"); + } + } + if (!it) { it = Registry::Lookup(op_desc.Type()); } diff --git a/paddle/fluid/inference/tensorrt/convert/test_elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/test_elementwise_op.cc new file mode 100644 index 0000000000..7496ce54e9 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/test_elementwise_op.cc @@ -0,0 +1,74 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace tensorrt { + +TEST(elementwise_op, add_weight_test) { + std::unordered_set parameters({"elementwise_add-Y"}); + framework::Scope scope; + TRTConvertValidation validator(1, parameters, scope, 1 << 15); + validator.DeclInputVar("elementwise_add-X", nvinfer1::DimsCHW(10, 3, 3)); + validator.DeclParamVar("elementwise_add-Y", nvinfer1::Dims3(10, 1, 1)); + // validator.DeclParamVar("mul-Y", nvinfer1::Dims2(8, 2)); + validator.DeclOutputVar("elementwise_add-Out", nvinfer1::DimsCHW(10, 3, 3)); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("elementwise_add"); + desc.SetInput("X", {"elementwise_add-X"}); + desc.SetInput("Y", {"elementwise_add-Y"}); + desc.SetOutput("Out", {"elementwise_add-Out"}); + + int axis = 1; + desc.SetAttr("axis", axis); + + validator.SetOp(*desc.Proto()); + + validator.Execute(1); +} + +TEST(elementwise_op, add_tensor_test) { + std::unordered_set parameters; + framework::Scope scope; + TRTConvertValidation validator(1, parameters, scope, 1 << 15); + validator.DeclInputVar("elementwise_add-X", nvinfer1::DimsCHW(10, 3, 3)); + validator.DeclInputVar("elementwise_add-Y", nvinfer1::Dims3(10, 3, 3)); + // validator.DeclParamVar("mul-Y", nvinfer1::Dims2(8, 2)); + validator.DeclOutputVar("elementwise_add-Out", nvinfer1::DimsCHW(10, 3, 3)); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("elementwise_add"); + desc.SetInput("X", {"elementwise_add-X"}); + desc.SetInput("Y", {"elementwise_add-Y"}); + desc.SetOutput("Out", {"elementwise_add-Out"}); + + int axis = 1; + desc.SetAttr("axis", axis); + + validator.SetOp(*desc.Proto()); + + validator.Execute(1); +} + +} // namespace tensorrt +} // namespace inference +} // namespace paddle +USE_OP(elementwise_add); -- GitLab