diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 6863b035d8cd9dfb21aed3947226a796778912a4..2a449eb95e3537a11962912a6a3f29e89958fbd8 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -1,7 +1,7 @@ # Add TRT tests nv_library(tensorrt_converter SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc -activation_op.cc softmax_op.cc +batch_norm_op.cc activation_op.cc softmax_op.cc DEPS tensorrt_engine operator scope framework_proto op_registry) nv_test(test_op_converter SRCS test_op_converter.cc DEPS @@ -24,3 +24,6 @@ nv_test(test_trt_elementwise_op SRCS test_elementwise_op.cc elementwise_op.cc nv_test(test_trt_softmax_op SRCS test_softmax_op.cc softmax_op.cc DEPS ${FLUID_CORE_MODULES} tensorrt_engine softmax_op SERIAL) + +nv_test(test_trt_batch_norm_op SRCS test_batch_norm_op.cc batch_norm_op.cc + DEPS ${FLUID_CORE_MODULES} tensorrt_engine batch_norm_op SERIAL) diff --git a/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..922c638299adda6a43e27eb9526f18203627dec6 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/batch_norm_op.cc @@ -0,0 +1,131 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +#include + +namespace paddle { +namespace inference { +namespace tensorrt { + +class BatchNormOpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { + LOG(INFO) + << "convert a fluid batch norm op to tensorrt batch_norm"; + + framework::OpDesc op_desc(op, nullptr); + PADDLE_ENFORCE_EQ(op_desc.Input("X").size(), 1); + PADDLE_ENFORCE_EQ(op_desc.Input("Bias").size(), 1); // Bias is a weight + PADDLE_ENFORCE_EQ(op_desc.Input("Mean").size(), 1); // Mean is a weight + PADDLE_ENFORCE_EQ(op_desc.Input("Scale").size(), 1); // Scale is a weight + PADDLE_ENFORCE_EQ(op_desc.Input("Variance").size(), 1); // Variance is a weight + PADDLE_ENFORCE_EQ(op_desc.Output("Y").size(), 1); + + auto* X = engine_->GetITensor(op_desc.Input("X").front()); + // Declare weights + auto* Bias_v = scope.FindVar(op_desc.Input("Bias").front()); + auto* Mean_v = scope.FindVar(op_desc.Input("Mean").front()); + auto* Scale_v = scope.FindVar(op_desc.Input("Scale").front()); + auto* Variance_v = scope.FindVar(op_desc.Input("Variance").front()); + const float eps = boost::get(op_desc.GetAttr("epsilon")); + + PADDLE_ENFORCE_NOT_NULL(Bias_v); + PADDLE_ENFORCE_NOT_NULL(Mean_v); + PADDLE_ENFORCE_NOT_NULL(Scale_v); + PADDLE_ENFORCE_NOT_NULL(Variance_v); + + // get tensor + auto* Bias_t = Bias_v->GetMutable(); + auto* Mean_t = Mean_v->GetMutable(); + auto* Scale_t = Scale_v->GetMutable(); + auto* Variance_t = Variance_v->GetMutable(); + + + // create temp tensor for weights + framework::LoDTensor bias_tensor; + framework::LoDTensor mean_tensor; + framework::LoDTensor scale_tensor; + framework::LoDTensor variance_tensor; + + bias_tensor.Resize(Bias_t->dims()); + mean_tensor.Resize(Mean_t->dims()); + scale_tensor.Resize(Scale_t->dims()); + variance_tensor.Resize(Variance_t->dims()); + + platform::CPUPlace cpu_place; + // copy data from gpu to cpu + TensorCopySync((*Bias_t), cpu_place, &bias_tensor); + TensorCopySync((*Mean_t), cpu_place, &mean_tensor); + TensorCopySync((*Scale_t), cpu_place, &scale_tensor); + TensorCopySync((*Variance_t), cpu_place, &variance_tensor); + + auto* bias_data = bias_tensor.mutable_data(platform::CPUPlace()); + auto* mean_data = mean_tensor.mutable_data(platform::CPUPlace()); + auto* scale_data = scale_tensor.mutable_data(platform::CPUPlace()); + auto* variance_data = variance_tensor.mutable_data(platform::CPUPlace()); + + framework::LoDTensor *combile_scale_tensor = new framework::LoDTensor(); + framework::LoDTensor *combile_bias_tensor = new framework::LoDTensor(); + + combile_scale_tensor->Resize(scale_tensor.dims()); + combile_bias_tensor->Resize(bias_tensor.dims()); + + auto* combile_scale_data = combile_scale_tensor->mutable_data(platform::CPUPlace()); + auto* combile_bias_data = combile_bias_tensor->mutable_data(platform::CPUPlace()); + + engine_->weight_map_[op_desc.Input("Bias").front()] = std::move(std::unique_ptr(combile_bias_tensor)); + engine_->weight_map_[op_desc.Input("Scale").front()] = std::move(std::unique_ptr(combile_scale_tensor)); + + size_t ele_num = combile_scale_tensor->memory_size()/sizeof(float); + + for (size_t i = 0; i < ele_num; i++) { + float scale = scale_data[i]; + float bias = bias_data[i]; + float mean = mean_data[i]; + float variance = variance_data[i]; + combile_scale_data[i] = scale / sqrtf(variance + eps); + combile_bias_data[i] = bias - mean * combile_scale_data[i]; + } + + + TensorRTEngine::Weight scale_weights{nvinfer1::DataType::kFLOAT, + static_cast(combile_scale_data), + combile_scale_tensor->memory_size() / sizeof(float)}; + TensorRTEngine::Weight shift_weights{nvinfer1::DataType::kFLOAT, + static_cast(combile_bias_data), + combile_bias_tensor->memory_size()/ sizeof(float)}; + TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr, + 0}; + + + nvinfer1::IScaleLayer* layer = TRT_ENGINE_ADD_LAYER( + engine_, Scale, *const_cast(X), nvinfer1::ScaleMode::kCHANNEL, + shift_weights.get(), scale_weights.get(), power_weights.get()); + + auto output_name = op_desc.Output("Y").front(); + engine_->SetITensor(output_name, layer->getOutput(0)); + + if (test_mode) { + engine_->DeclareOutput(output_name); + } + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(batch_norm, BatchNormOpConverter); diff --git a/paddle/fluid/inference/tensorrt/convert/test_batch_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/test_batch_norm_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..7c9cde464a616318cbb98d519f5a8e2381c4a5a7 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/test_batch_norm_op.cc @@ -0,0 +1,67 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +#include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" + +namespace paddle { +namespace inference { +namespace tensorrt { + +TEST(batch_norm_op, test) { + std::unordered_set parameters({"batch_norm_scale", + "batch_norm_bias", "batch_norm_mean", "batch_norm_variance" }); + framework::Scope scope; + TRTConvertValidation validator(5, parameters, scope, 1 << 15); + std::vector param_shape{2}; + + validator.DeclInputVar("batch_norm_X", nvinfer1::DimsCHW(2, 5, 5)); + validator.DeclParamVar("batch_norm_scale", param_shape); + validator.DeclParamVar("batch_norm_bias", param_shape); + validator.DeclParamVar("batch_norm_mean", param_shape); + validator.DeclParamVar("batch_norm_variance", param_shape); + validator.DeclOutputVar("batch_norm_Y", nvinfer1::DimsCHW(2, 5, 5)); + validator.DeclOutputVar("batch_norm_save_mean", param_shape); + validator.DeclOutputVar("batch_norm_save_variance", param_shape); + + // Prepare Op description + framework::OpDesc desc; + desc.SetType("batch_norm"); + desc.SetInput("X", {"batch_norm_X"}); + desc.SetInput("Scale", {"batch_norm_scale"}); + desc.SetInput("Bias", {"batch_norm_bias"}); + desc.SetInput("Mean", {"batch_norm_mean"}); + desc.SetInput("Variance", {"batch_norm_variance"}); + desc.SetOutput("Y", {"batch_norm_Y"}); + desc.SetOutput("MeanOut", {"batch_norm_mean"}); + desc.SetOutput("VarianceOut", {"batch_norm_variance"}); + desc.SetOutput("SavedMean", {"batch_norm_save_mean"}); + desc.SetOutput("SavedVariance", {"batch_norm_save_variance"}); + + float eps = 1e-5f; + bool is_test = true; + desc.SetAttr("epsilon", eps); + desc.SetAttr("is_test", is_test); + + validator.SetOp(*desc.Proto()); + + std::unordered_set neglected_output = {"batch_norm_save_mean", "batch_norm_save_variance", "batch_norm_mean", "batch_norm_variance"}; + validator.Execute(3, neglected_output); +} + +} // namespace tensorrt +} // namespace inference +} // namespace paddle +USE_OP(batch_norm);