diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 6863b035d8cd9dfb21aed3947226a796778912a4..2a449eb95e3537a11962912a6a3f29e89958fbd8 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -1,7 +1,7 @@ # Add TRT tests nv_library(tensorrt_converter SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc -activation_op.cc softmax_op.cc +batch_norm_op.cc activation_op.cc softmax_op.cc DEPS tensorrt_engine operator scope framework_proto op_registry) nv_test(test_op_converter SRCS test_op_converter.cc DEPS @@ -24,3 +24,6 @@ nv_test(test_trt_elementwise_op SRCS test_elementwise_op.cc elementwise_op.cc nv_test(test_trt_softmax_op SRCS test_softmax_op.cc softmax_op.cc DEPS ${FLUID_CORE_MODULES} tensorrt_engine softmax_op SERIAL) + +nv_test(test_trt_batch_norm_op SRCS test_batch_norm_op.cc batch_norm_op.cc + DEPS ${FLUID_CORE_MODULES} tensorrt_engine batch_norm_op SERIAL) diff --git a/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..94f8b0ae5606d39a722ffe28501645c9b6fc5d2e --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc @@ -0,0 +1,136 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" + +namespace paddle { +namespace inference { +namespace tensorrt { + +class BatchNormOpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { + LOG(INFO) << "convert a fluid batch norm op to tensorrt batch_norm"; + + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Input("Bias").size(), 1); // Bias is a weight + PADDLE_ENFORCE_EQ(op_desc.Input("Mean").size(), 1); // Mean is a weight + PADDLE_ENFORCE_EQ(op_desc.Input("Scale").size(), 1); // Scale is a weight + PADDLE_ENFORCE_EQ(op_desc.Input("Variance").size(), + 1); // Variance is a weight + PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 1); + + auto* X = engine_->GetITensor(op_desc.Input("X").front()); + // Declare weights + auto* Bias_v = scope.FindVar(op_desc.Input("Bias").front()); + auto* Mean_v = scope.FindVar(op_desc.Input("Mean").front()); + auto* Scale_v = scope.FindVar(op_desc.Input("Scale").front()); + auto* Variance_v = scope.FindVar(op_desc.Input("Variance").front()); + const float eps = boost::get(op_desc.GetAttr("epsilon")); + + PADDLE_ENFORCE_NOT_NULL(Bias_v); + PADDLE_ENFORCE_NOT_NULL(Mean_v); + PADDLE_ENFORCE_NOT_NULL(Scale_v); + PADDLE_ENFORCE_NOT_NULL(Variance_v); + + // get tensor + auto* Bias_t = Bias_v->GetMutable(); + auto* Mean_t = Mean_v->GetMutable(); + auto* Scale_t = Scale_v->GetMutable(); + auto* Variance_t = Variance_v->GetMutable(); + + // create temp tensor for weights + framework::LoDTensor bias_tensor; + framework::LoDTensor mean_tensor; + framework::LoDTensor scale_tensor; + framework::LoDTensor variance_tensor; + + bias_tensor.Resize(Bias_t->dims()); + mean_tensor.Resize(Mean_t->dims()); + scale_tensor.Resize(Scale_t->dims()); + variance_tensor.Resize(Variance_t->dims()); + + platform::CPUPlace cpu_place; + // copy data from gpu to cpu + TensorCopySync((*Bias_t), cpu_place, &bias_tensor); + TensorCopySync((*Mean_t), cpu_place, &mean_tensor); + TensorCopySync((*Scale_t), cpu_place, &scale_tensor); + TensorCopySync((*Variance_t), cpu_place, &variance_tensor); + + auto* bias_data = bias_tensor.mutable_data(platform::CPUPlace()); + auto* mean_data = mean_tensor.mutable_data(platform::CPUPlace()); + auto* scale_data = scale_tensor.mutable_data(platform::CPUPlace()); + auto* variance_data = + variance_tensor.mutable_data(platform::CPUPlace()); + + std::unique_ptr combile_scale_tensor( + new framework::LoDTensor()); + std::unique_ptr combile_bias_tensor( + new framework::LoDTensor()); + + combile_scale_tensor->Resize(scale_tensor.dims()); + combile_bias_tensor->Resize(bias_tensor.dims()); + + auto* combile_scale_data = + combile_scale_tensor->mutable_data(platform::CPUPlace()); + auto* combile_bias_data = + combile_bias_tensor->mutable_data(platform::CPUPlace()); + + size_t ele_num = combile_scale_tensor->memory_size() / sizeof(float); + + for (size_t i = 0; i < ele_num; i++) { + float scale = scale_data[i]; + float bias = bias_data[i]; + float mean = mean_data[i]; + float variance = variance_data[i]; + combile_scale_data[i] = scale / sqrtf(variance + eps); + combile_bias_data[i] = bias - mean * combile_scale_data[i]; + } + + TensorRTEngine::Weight scale_weights{ + nvinfer1::DataType::kFLOAT, static_cast(combile_scale_data), + combile_scale_tensor->memory_size() / sizeof(float)}; + TensorRTEngine::Weight shift_weights{ + nvinfer1::DataType::kFLOAT, static_cast(combile_bias_data), + combile_bias_tensor->memory_size() / sizeof(float)}; + TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr, + 0}; + + nvinfer1::IScaleLayer* layer = + TRT_ENGINE_ADD_LAYER(engine_, Scale, *const_cast(X), + nvinfer1::ScaleMode::kCHANNEL, shift_weights.get(), + scale_weights.get(), power_weights.get()); + + auto output_name = op_desc.Output("Y").front(); + engine_->weight_map[op_desc.Input("Bias").front()] = + std::move(combile_bias_tensor); + engine_->weight_map[op_desc.Input("Scale").front()] = + std::move(combile_scale_tensor); + + engine_->SetITensor(output_name, layer->getOutput(0)); + + if (test_mode) { + engine_->DeclareOutput(output_name); + } + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(batch_norm, BatchNormOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/test_batch_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/test_batch_norm_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..41412cb079540da72760558379b158b6538aa6a8 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/test_batch_norm_op.cc @@ -0,0 +1,71 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace tensorrt { + +TEST(batch_norm_op, test) { + std::unordered_set parameters( + {"batch_norm_scale", "batch_norm_bias", "batch_norm_mean", + "batch_norm_variance"}); + framework::Scope scope; + TRTConvertValidation validator(5, parameters, scope, 1 << 15); + std::vector param_shape{2}; + + validator.DeclInputVar("batch_norm_X", nvinfer1::DimsCHW(2, 5, 5)); + validator.DeclParamVar("batch_norm_scale", param_shape); + validator.DeclParamVar("batch_norm_bias", param_shape); + validator.DeclParamVar("batch_norm_mean", param_shape); + validator.DeclParamVar("batch_norm_variance", param_shape); + validator.DeclOutputVar("batch_norm_Y", nvinfer1::DimsCHW(2, 5, 5)); + validator.DeclOutputVar("batch_norm_save_mean", param_shape); + validator.DeclOutputVar("batch_norm_save_variance", param_shape); + + // Prepare Op description + framework::OpDesc desc; + + desc.SetType("batch_norm"); + desc.SetInput("X", {"batch_norm_X"}); + desc.SetInput("Scale", {"batch_norm_scale"}); + desc.SetInput("Bias", {"batch_norm_bias"}); + desc.SetInput("Mean", {"batch_norm_mean"}); + desc.SetInput("Variance", {"batch_norm_variance"}); + desc.SetOutput("Y", {"batch_norm_Y"}); + desc.SetOutput("MeanOut", {"batch_norm_mean"}); + desc.SetOutput("VarianceOut", {"batch_norm_variance"}); + desc.SetOutput("SavedMean", {"batch_norm_save_mean"}); + desc.SetOutput("SavedVariance", {"batch_norm_save_variance"}); + + float eps = 1e-5f; + bool is_test = true; + desc.SetAttr("epsilon", eps); + desc.SetAttr("is_test", is_test); + + validator.SetOp(*desc.Proto()); + + std::unordered_set neglected_output = { + "batch_norm_save_mean", "batch_norm_save_variance", "batch_norm_mean", + "batch_norm_variance"}; + validator.Execute(3, neglected_output); +} + +} // namespace tensorrt +} // namespace inference +} // namespace paddle +USE_OP(batch_norm); diff --git a/paddle/fluid/inference/tensorrt/convert/ut_helper.h b/paddle/fluid/inference/tensorrt/convert/ut_helper.h index 35ecfd02f43ca0715793a9b6b3997dbc943b5769..0a6f171fc40a838fd81d6a51aca0430d5526f188 100644 --- a/paddle/fluid/inference/tensorrt/convert/ut_helper.h +++ b/paddle/fluid/inference/tensorrt/convert/ut_helper.h @@ -98,11 +98,19 @@ class TRTConvertValidation { engine_->DeclareInput(name, nvinfer1::DataType::kFLOAT, dims); } + void DeclParamVar(const std::string& name, const std::vector dim_vec) { + DeclVar(name, dim_vec); + } + // Declare a parameter varaible in the scope. void DeclParamVar(const std::string& name, const nvinfer1::Dims& dims) { DeclVar(name, dims, true); } + void DeclOutputVar(const std::string& name, const std::vector dim_vec) { + DeclVar(name, dim_vec); + } + void DeclOutputVar(const std::string& name, const nvinfer1::Dims& dims) { DeclVar(name, dims); } @@ -155,7 +163,11 @@ class TRTConvertValidation { } } - void Execute(int batch_size) { + // We use the set 'neglected_output' here, because some Ops like batch norm, + // the outputs specified in the op des are only used during training, + // so we should neglect those output during inference. + void Execute(int batch_size, + std::unordered_set neglected_output = {}) { // Execute Fluid Op PADDLE_ENFORCE_LE(batch_size, max_batch_size_); platform::CUDAPlace place; @@ -168,6 +180,7 @@ class TRTConvertValidation { ASSERT_FALSE(op_desc_->OutputArgumentNames().empty()); const size_t output_space_size = 3000; for (const auto& output : op_desc_->OutputArgumentNames()) { + if (neglected_output.count(output)) continue; std::vector fluid_out; std::vector trt_out(output_space_size); engine_->GetOutputInCPU(output, &trt_out[0], output_space_size);