api_tensorrt_subgraph_engine.cc 6.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/inference/analysis/analyzer.h"
L
Luo Tao 已提交
16
#include "paddle/fluid/inference/api/api_impl.h"
17
#include "paddle/fluid/inference/api/paddle_inference_api.h"
18
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
19
#include "paddle/fluid/inference/utils/singleton.h"
20
#include "paddle/fluid/operators/tensorrt_engine_op.h"
21 22 23 24 25 26 27

namespace paddle {

using inference::analysis::Argument;
using inference::Singleton;
using inference::analysis::Analyzer;
using framework::proto::ProgramDesc;
Y
Yan Chunwei 已提交
28
using paddle::contrib::MixedRTConfig;
29 30 31

class TensorRTSubgraphPredictor : public NativePaddlePredictor {
 public:
Y
Yan Chunwei 已提交
32
  explicit TensorRTSubgraphPredictor(const MixedRTConfig& config)
33 34 35
      : NativePaddlePredictor(config), config_(config) {}

  bool Init(const std::shared_ptr<framework::Scope>& parent_scope) {
N
nhzlx 已提交
36
    FLAGS_IA_enable_tensorrt_subgraph_engine = true;
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
    VLOG(3) << "Predictor::init()";
    if (config_.use_gpu) {
      place_ = paddle::platform::CUDAPlace(config_.device);
    } else {
      place_ = paddle::platform::CPUPlace();
    }
    if (parent_scope) {
      scope_ = parent_scope;
      sub_scope_ = &(parent_scope->NewScope());
    } else {
      paddle::framework::InitDevices(false);
      scope_.reset(new paddle::framework::Scope());
    }

    executor_.reset(new paddle::framework::Executor(place_));

    // Initialize the inference program
    if (!config_.model_dir.empty()) {
      // Parameters are saved in separate files sited in
      // the specified `dirname`.
      inference_program_ = paddle::inference::Load(
          executor_.get(), scope_.get(), config_.model_dir);
    } else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
      // All parameters are saved in a single file.
      // The file names should be consistent with that used
      // in Python API `fluid.io.save_inference_model`.
      inference_program_ = paddle::inference::Load(
          executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
    } else {
      LOG(ERROR) << "fail to load inference model.";
      return false;
    }

70
    OptimizeInferenceProgram();
71 72 73
    ctx_ = executor_->Prepare(*inference_program_, 0);

    VLOG(5) << "to create variables";
74 75
    executor_->CreateVariables(*inference_program_,
                               sub_scope_ ? sub_scope_ : scope_.get(), 0);
76
    // Get the feed_target_names and fetch_target_names
77
    PrepareFeedFetch();
78 79 80
    return true;
  }

81 82 83 84 85 86 87 88 89 90 91 92
  bool Run(const std::vector<PaddleTensor>& inputs,
           std::vector<PaddleTensor>* output_data,
           int batch_size = -1) override {
    PADDLE_ENFORCE_GT(batch_size, 0,
                      "TensorRT engine needs the argument batch_size set");
    FLAGS_tensorrt_engine_batch_size = batch_size;
    return NativePaddlePredictor::Run(inputs, output_data, batch_size);
  }

  void OptimizeInferenceProgram() {
    // Analyze inference_program
    Argument argument;
N
nhzlx 已提交
93

N
nhzlx 已提交
94 95 96 97 98 99
    argument.Set<int>("minimum_subgraph_size",
                      new int(config_.minimum_subgraph_size));
    argument.Set<int>("max_batch_size", new int(config_.max_batch_size));
    argument.Set<int>("workspace_size", new int(config_.workspace_size));
    argument.Set<std::string>("precision_mode",
                              new std::string(config_.precision_mode));
N
nhzlx 已提交
100

Y
Yan Chunwei 已提交
101 102 103 104 105 106 107 108 109 110 111 112
    if (!config_.model_dir.empty()) {
      argument.fluid_model_dir.reset(new std::string(config_.model_dir));
    } else {
      PADDLE_ENFORCE(
          !config_.param_file.empty(),
          "Either model_dir or (param_file, prog_file) should be set.");
      PADDLE_ENFORCE(!config_.prog_file.empty());
      argument.fluid_model_program_path.reset(
          new std::string(config_.prog_file));
      argument.fluid_model_param_path.reset(
          new std::string(config_.param_file));
    }
113 114 115 116 117 118 119 120 121 122 123
    argument.origin_program_desc.reset(
        new ProgramDesc(*inference_program_->Proto()));
    Singleton<Analyzer>::Global().Run(&argument);
    CHECK(argument.transformed_program_desc);
    VLOG(5) << "transformed program:\n"
            << argument.transformed_program_desc->SerializeAsString();
    VLOG(5) << "to prepare executor";
    inference_program_.reset(
        new framework::ProgramDesc(*argument.transformed_program_desc));
  }

124
 private:
Y
Yan Chunwei 已提交
125
  MixedRTConfig config_;
126 127 128 129
};

template <>
std::unique_ptr<PaddlePredictor>
Y
Yan Chunwei 已提交
130 131
CreatePaddlePredictor<MixedRTConfig, PaddleEngineKind::kAutoMixedTensorRT>(
    const MixedRTConfig& config) {
132 133 134 135
  VLOG(3) << "create TensorRTSubgraphPredictor";
  if (config.use_gpu) {
    // 1. GPU memeroy
    PADDLE_ENFORCE_GT(
136
        config.fraction_of_gpu_memory, 0.f,
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159
        "fraction_of_gpu_memory in the config should be set to range (0., 1.]");
    PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);
    std::vector<std::string> flags;
    if (config.fraction_of_gpu_memory >= 0.0f ||
        config.fraction_of_gpu_memory <= 0.95f) {
      flags.push_back("dummpy");
      std::string flag = "--fraction_of_gpu_memory_to_use=" +
                         std::to_string(config.fraction_of_gpu_memory);
      flags.push_back(flag);
      VLOG(3) << "set flag: " << flag;
      framework::InitGflags(flags);
    }
  }

  std::unique_ptr<PaddlePredictor> predictor(
      new TensorRTSubgraphPredictor(config));
  if (!dynamic_cast<TensorRTSubgraphPredictor*>(predictor.get())
           ->Init(nullptr)) {
    return nullptr;
  }
  return std::move(predictor);
}

Y
Yan Chunwei 已提交
160 161 162 163 164 165 166
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<MixedRTConfig>(
    const MixedRTConfig& config) {
  return CreatePaddlePredictor<MixedRTConfig,
                               PaddleEngineKind::kAutoMixedTensorRT>(config);
}

167
}  // namespace paddle
168 169

USE_TRT_CONVERTER(elementwise_add_weight);
N
nhzlx 已提交
170 171 172 173 174 175 176
USE_TRT_CONVERTER(elementwise_add_tensor);
USE_TRT_CONVERTER(elementwise_sub_tensor);
USE_TRT_CONVERTER(elementwise_div_tensor);
USE_TRT_CONVERTER(elementwise_mul_tensor);
USE_TRT_CONVERTER(elementwise_max_tensor);
USE_TRT_CONVERTER(elementwise_min_tensor);
USE_TRT_CONVERTER(elementwise_pow_tensor);
177 178 179
USE_TRT_CONVERTER(mul);
USE_TRT_CONVERTER(conv2d);
USE_TRT_CONVERTER(relu);
N
nhzlx 已提交
180 181
USE_TRT_CONVERTER(sigmoid);
USE_TRT_CONVERTER(tanh);
182 183 184 185
USE_TRT_CONVERTER(fc);
USE_TRT_CONVERTER(pool2d);
USE_TRT_CONVERTER(softmax);
USE_TRT_CONVERTER(batch_norm);
N
nhzlx 已提交
186
USE_TRT_CONVERTER(concat);
N
nhzlx 已提交
187
USE_TRT_CONVERTER(dropout);