diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake index 729bdcb3dc5324df0a5272402ef203012be0072a..7355b67ab1020f58760f23b1a20ca189591db35e 100644 --- a/cmake/inference_lib.cmake +++ b/cmake/inference_lib.cmake @@ -166,8 +166,8 @@ copy(framework_lib DEPS ${framework_lib_deps} set(module "memory") copy(memory_lib - SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/detail/*.h - DSTS ${dst_dir}/${module} ${dst_dir}/${module}/detail + SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/detail/*.h ${src_dir}/${module}/allocation/*.h + DSTS ${dst_dir}/${module} ${dst_dir}/${module}/detail ${dst_dir}/${module}/allocation ) set(inference_deps paddle_fluid_shared paddle_fluid) diff --git a/paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc b/paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc index 267737e95cb51f9a2d72b70b171be779d6f52c8d..108cb6f74b1208395a4faabdf6184152c300d244 100644 --- a/paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_analysis_compose_pass.cc @@ -46,7 +46,7 @@ void IrAnalysisComposePass::InitTensorRTAttrs(Argument *argument) { {"mul", "conv2d", "pool2d", "relu", "softmax", "sigmoid", "depthwise_conv2d", "batch_norm", "concat", "tanh", "pad", "elementwise_add", "elementwise_mul", "dropout", "split", "prelu", - "conv2d_transpose"}); + "conv2d_transpose", "leaky_relu"}); if (!node->IsOp()) return false; if (teller_set.count(node->Op()->Type())) { diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index d19505877bbc1110fcf5787fffc1436d242a7cdc..ee1d1d839cbd52ecf6010046056e97c86f3e141c 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -551,4 +551,5 @@ USE_TRT_CONVERTER(pad); USE_TRT_CONVERTER(split); USE_TRT_CONVERTER(prelu); USE_TRT_CONVERTER(conv2d_transpose); +USE_TRT_CONVERTER(leaky_relu); #endif diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 8dd6e8453f971b980a3dcb95c10734537e8333dd..27fb41d16ead65a1ec075399bcda135e2238c7ba 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -2,7 +2,7 @@ nv_library(tensorrt_converter SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc - pad_op.cc split_op.cc prelu_op.cc + pad_op.cc split_op.cc prelu_op.cc leaky_relu_op.cc DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry) nv_test(test_op_converter SRCS test_op_converter.cc DEPS @@ -38,3 +38,5 @@ nv_test(test_trt_split_op SRCS test_split_op.cc split_op.cc nv_test(test_trt_prelu_op SRCS test_prelu_op.cc prelu_op.cc DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine tensorrt_plugin prelu_op SERIAL) +nv_test(test_trt_leaky_relu_op SRCS test_leaky_relu_op.cc leaky_relu_op.cc + DEPS ${FLUID_CORE_MODULES} ${GLOB_OPERATOR_DEPS} tensorrt_engine activation_op SERIAL) diff --git a/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc b/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..3f6ed04c46d70b1ab68b4c01ef0c908a1a8d1a19 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/leaky_relu_op.cc @@ -0,0 +1,95 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace tensorrt { + +// LeakyRelu converter from fluid to tensorRT +class LeakyReluOpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { + VLOG(4) << "convert fluid leaky_relu op to tensorrt layer"; + + framework::OpDesc op_desc(op, nullptr); + // Declare inputs + int input_num = op_desc.Input("X").size(); + PADDLE_ENFORCE(input_num == 1); + auto* input = engine_->GetITensor(op_desc.Input("X")[0]); + // Get output + size_t output_num = op_desc.Output("Out").size(); + PADDLE_ENFORCE(output_num == 1); + // Get attrs + float alpha = boost::get(op_desc.GetAttr("alpha")); + + platform::CPUPlace place; + std::unique_ptr alpha_tensor( + new framework::LoDTensor()); + alpha_tensor->Resize(framework::make_ddim({2})); + float* alpha_data = alpha_tensor->mutable_data(place); + alpha_data[0] = alpha; + alpha_data[1] = 1.f - alpha; + // the leaky relu formula y = (x > 0) ? x : alpha * x is equal to + // y = alpha * x + (x > 0) ? (1 - alpha) * x : 0 + TensorRTEngine::Weight scale{nvinfer1::DataType::kFLOAT, &alpha_data[0], 1}; + TensorRTEngine::Weight shift{nvinfer1::DataType::kFLOAT, nullptr, 0}; + TensorRTEngine::Weight power{nvinfer1::DataType::kFLOAT, nullptr, 0}; + // y_scale = alpha * x + auto* scale_layer = TRT_ENGINE_ADD_LAYER( + engine_, Scale, *input, nvinfer1::ScaleMode::kUNIFORM, shift.get(), + scale.get(), power.get()); + PADDLE_ENFORCE(nullptr != scale_layer); + // y_relu = (x > 0) : x : 0 + auto* relu_layer = TRT_ENGINE_ADD_LAYER(engine_, Activation, *input, + nvinfer1::ActivationType::kRELU); + PADDLE_ENFORCE(nullptr != relu_layer); + // + TensorRTEngine::Weight sub_scale{nvinfer1::DataType::kFLOAT, &alpha_data[1], + 1}; + auto* scale_relu_layer = + TRT_ENGINE_ADD_LAYER(engine_, Scale, *(relu_layer->getOutput(0)), + nvinfer1::ScaleMode::kUNIFORM, shift.get(), + sub_scale.get(), power.get()); + PADDLE_ENFORCE(nullptr != scale_relu_layer); + auto* output_layer = + TRT_ENGINE_ADD_LAYER(engine_, ElementWise, *(scale_layer->getOutput(0)), + *(scale_relu_layer->getOutput(0)), + nvinfer1::ElementWiseOperation::kSUM); + PADDLE_ENFORCE(nullptr != output_layer); + // keep alpha tensor to avoid release it's memory + std::string alpha_name = op_desc.Output("Out")[0] + "_alpha"; + PADDLE_ENFORCE(engine_->weight_map.find(alpha_name) == + engine_->weight_map.end()); + engine_->weight_map[alpha_name] = std::move(alpha_tensor); + + std::string layer_name = "leaky_relu (Output: "; + auto output_name = op_desc.Output("Out")[0]; + output_layer->getOutput(0)->setName(output_name.c_str()); + engine_->SetITensor(output_name, output_layer->getOutput(0)); + layer_name += output_name; + if (test_mode) { + engine_->DeclareOutput(output_name); + } + output_layer->setName((layer_name + ")").c_str()); + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(leaky_relu, LeakyReluOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/test_leaky_relu_op.cc b/paddle/fluid/inference/tensorrt/convert/test_leaky_relu_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..d00826af075159004d3727a7519e7c319dbddb02 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/test_leaky_relu_op.cc @@ -0,0 +1,48 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace tensorrt { + +TEST(leaky_relu_op, test_leaky_relu) { + std::unordered_set parameters; + framework::Scope scope; + TRTConvertValidation validator(10, parameters, scope, 1000); + validator.DeclInputVar("leaky_relu_input", nvinfer1::DimsCHW(3, 2, 2)); + validator.DeclOutputVar("leaky_relu_out", nvinfer1::DimsCHW(3, 2, 2)); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("leaky_relu"); + desc.SetInput("X", {"leaky_relu_input"}); + desc.SetOutput("Out", {"leaky_relu_out"}); + + desc.SetAttr("alpha", 0.1f); + + validator.SetOp(*desc.Proto()); + + validator.Execute(1); +} + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +// USE_OP(leaky_relu); +USE_OP(leaky_relu); diff --git a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt index 40902694995f487343e3cfc727ad95daafa6b415..a0329325bea19bd9cdd3fcd39724cf05664b505a 100644 --- a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt @@ -1,3 +1,3 @@ nv_library(tensorrt_plugin SRCS trt_plugin.cc split_op_plugin.cu elementwise_op_plugin.cu prelu_op_plugin.cu - DEPS enforce device_context) + DEPS enforce tensorrt_engine) diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 16a9b50e6fb174374d23cd021e47e52921871a8a..cf2a61ea61b6bb4566e5057a5c17630298d3f88e 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -27,14 +27,14 @@ function(inference_analysis_api_test_with_fake_data target install_dir filename endfunction() # RNN1 -if(NOT APPLE) +if(NOT APPLE AND WITH_MKLML) set(RNN1_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/rnn1") download_model_and_data(${RNN1_INSTALL_DIR} "rnn1%2Fmodel.tar.gz" "rnn1%2Fdata.txt.tar.gz") inference_analysis_api_test(test_analyzer_rnn1 ${RNN1_INSTALL_DIR} analyzer_rnn1_tester.cc) else() - # TODO: fix this test on MACOS, the reason is that - # fusion_seqexpand_concat_fc_op is not supported on MACOS - message(WARNING "These tests has been disabled in OSX before being fixed: \n test_analyzer_rnn1") + # TODO: fix this test on MACOS and OPENBLAS, the reason is that + # fusion_seqexpand_concat_fc_op is not supported on MACOS and OPENBLAS + message(WARNING "These tests has been disabled in OSX or WITH_MKL=OFF before being fixed: \n test_analyzer_rnn1") endif() # RNN2 diff --git a/paddle/fluid/operators/stack_op.h b/paddle/fluid/operators/stack_op.h index d236c5b943704683c27b9b155c11ca9113edf514..3d132e4397e837442d406e1668126da9163129ef 100644 --- a/paddle/fluid/operators/stack_op.h +++ b/paddle/fluid/operators/stack_op.h @@ -147,20 +147,32 @@ class StackKernel : public framework::OpKernel { auto &dim = x[0]->dims(); for (auto i = 0; i < axis; ++i) pre *= dim[i]; for (auto i = axis; i < dim.size(); ++i) post *= dim[i]; - int total_num = pre * n * post; - auto &dev_ctx = ctx.template device_context(); #ifdef __NVCC__ + int total_num = pre * n * post; + auto &dev_ctx = ctx.template device_context(); + thrust::device_vector device_x_vec(x_datas); auto x_data_arr = device_x_vec.data().get(); -#else - auto x_data_arr = x_datas.data(); -#endif + StackFunctorForRange(dev_ctx, x_data_arr, y_data, total_num, n, post); -#ifdef __NVCC__ + // Wait() must be called because device_x_vec may be destructed before // kernel ends dev_ctx.Wait(); +#else + auto x_data_arr = x_datas.data(); + + size_t x_offset = 0; + size_t y_offset = 0; + for (int i = 0; i < pre; i++) { + for (int j = 0; j < n; j++) { + std::memcpy(y_data + y_offset, x_data_arr[j] + x_offset, + post * sizeof(T)); + y_offset += post; + } + x_offset += post; + } #endif } }; diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc index 9f7aa556988e8ab0ca87d0e7212fe27a209f6a32..e07e9d3825243017159698c1959e626ef3e66dd7 100644 --- a/paddle/fluid/platform/init.cc +++ b/paddle/fluid/platform/init.cc @@ -38,6 +38,7 @@ std::once_flag p2p_init_flag; void InitGflags(std::vector argv) { std::call_once(gflags_init_flag, [&]() { + FLAGS_logtostderr = true; argv.insert(argv.begin(), "dummy"); int argc = argv.size(); char **arr = new char *[argv.size()]; diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 89f8449124af3d794d928ebb8a2353fa0ee22ea6..99acd7e30884b46cb14e27ac4569af82af311a3a 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -5788,7 +5788,7 @@ def image_resize(input, Examples: .. code-block:: python - out = fluid.layers.image_resize(input, out_shape=[12, 12]) + out = fluid.layers.image_resize(input, out_shape=[12, 12], resample="NEAREST") """ resample_methods = { 'BILINEAR': 'bilinear', @@ -5891,6 +5891,11 @@ def resize_bilinear(input, Returns: ${out_comment}. + + Examples: + .. code-block:: python + + out = fluid.layers.resize_bilinear(input, out_shape=[12, 12]) """ return image_resize(input, out_shape, scale, name, 'BILINEAR', actual_shape) @@ -5937,6 +5942,11 @@ def resize_nearest(input, Returns: ${out_comment}. + + Examples: + .. code-block:: python + + out = fluid.layers.resize_nearest(input, out_shape=[12, 12]) """ return image_resize(input, out_shape, scale, name, 'NEAREST', actual_shape) diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 1513eca51439288acac35729300bcbe4e71e4205..29e4ca04a7fbb2eae870fcf15763310b849c8b53 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -45,6 +45,10 @@ if(APPLE) list(REMOVE_ITEM TEST_OPS test_dist_se_resnext) list(REMOVE_ITEM TEST_OPS test_fuse_elewise_add_act_pass) endif() +if(NOT WITH_MKLML) + # this op is not support on openblas + list(REMOVE_ITEM TEST_OPS test_fusion_seqexpand_concat_fc_op) +endif() function(py_test_modules TARGET_NAME) if(WITH_TESTING)