paddle_engine.h 8.9 KB
Newer Older
Z
update  
zhangjun 已提交
1
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Z
zhangjun 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

H
HexToString 已提交
17
#include <dirent.h>
Z
zhangjun 已提交
18 19 20
#include <pthread.h>
#include <fstream>
#include <map>
Z
zhangjun 已提交
21
#include <memory>
Z
zhangjun 已提交
22
#include <string>
23
#include <utility>
Z
zhangjun 已提交
24 25 26
#include <vector>
#include "core/configure/include/configure_parser.h"
#include "core/configure/inferencer_configure.pb.h"
Z
zhangjun 已提交
27
#include "core/predictor/common/utils.h"
Z
zhangjun 已提交
28
#include "core/predictor/framework/infer.h"
Z
zhangjun 已提交
29 30 31 32 33 34 35
#include "paddle_inference_api.h"  // NOLINT

namespace baidu {
namespace paddle_serving {
namespace inference {

using paddle_infer::Config;
Z
zhangjun 已提交
36
using paddle_infer::PrecisionType;
Z
zhangjun 已提交
37 38 39 40
using paddle_infer::Predictor;
using paddle_infer::Tensor;
using paddle_infer::CreatePredictor;

Z
zhangjun 已提交
41
DECLARE_int32(gpuid);
Z
fix  
zhangjun 已提交
42 43
DECLARE_string(precision);
DECLARE_bool(use_calib);
Z
zhangjun 已提交
44

Z
zhangjun 已提交
45 46
static const int max_batch = 32;
static const int min_subgraph_size = 3;
Z
fix  
zhangjun 已提交
47 48
static PrecisionType precision_type;

Z
update  
zhangjun 已提交
49 50 51
std::shared_ptr<std::vector<paddle::PaddleTensor>> PrepareWarmupData() {
  auto warmup_data = std::make_shared<std::vector<paddle::PaddleTensor>>(1);
  paddle::PaddleTensor images;
Z
update  
zhangjun 已提交
52 53
  images.name = "image";
  images.shape = {2, 3, 300, 300};
Z
update  
zhangjun 已提交
54
  images.dtype = paddle::PaddleDType::FLOAT32;
Z
update  
zhangjun 已提交
55 56 57 58 59 60
  images.data.Resize(sizeof(float) * 2 * 3 * 300 * 300);

  (*warmup_data)[0] = std::move(images);
  return warmup_data;
}

Z
fix  
zhangjun 已提交
61 62 63 64 65 66 67 68 69 70 71
PrecisionType GetPrecision(const std::string& precision_data) {
  std::string precision_type = predictor::ToLower(precision_data);
  if (precision_type == "fp32") {
    return PrecisionType::kFloat32;
  } else if (precision_type == "int8") {
    return PrecisionType::kInt8;
  } else if (precision_type == "fp16") {
    return PrecisionType::kHalf;
  }
  return PrecisionType::kFloat32;
}
Z
zhangjun 已提交
72

H
HexToString 已提交
73 74 75 76 77 78 79 80 81 82 83
const std::string& getFileBySuffix(
    const std::string& path, const std::vector<std::string>& suffixVector) {
  DIR* dp = nullptr;
  std::string fileName = "";
  struct dirent* dirp = nullptr;
  if ((dp = opendir(path.c_str())) == nullptr) {
    return fileName;
  }
  while ((dirp = readdir(dp)) != nullptr) {
    if (dirp->d_type == DT_REG) {
      for (int idx = 0; idx < suffixVector.size(); ++idx) {
H
HexToString 已提交
84 85
        if (std::string(dirp->d_name).find(suffixVector[idx]) !=
            std::string::npos) {
H
HexToString 已提交
86 87 88 89 90 91 92 93 94 95 96
          fileName = static_cast<std::string>(dirp->d_name);
          break;
        }
      }
    }
    if (fileName.length() != 0) break;
  }
  closedir(dp);
  return fileName;
}

Z
update  
zhangjun 已提交
97
// Engine Base
H
HexToString 已提交
98
class EngineCore {
Z
zhangjun 已提交
99
 public:
H
HexToString 已提交
100
  virtual ~EngineCore() {}
Z
zhangjun 已提交
101
  virtual std::vector<std::string> GetInputNames() {
Z
zhangjun 已提交
102
    return _predictor->GetInputNames();
Z
zhangjun 已提交
103 104 105
  }

  virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
Z
zhangjun 已提交
106
    return _predictor->GetInputHandle(name);
Z
zhangjun 已提交
107 108 109
  }

  virtual std::vector<std::string> GetOutputNames() {
Z
zhangjun 已提交
110
    return _predictor->GetOutputNames();
Z
zhangjun 已提交
111 112 113
  }

  virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
Z
zhangjun 已提交
114
    return _predictor->GetOutputHandle(name);
Z
zhangjun 已提交
115 116 117
  }

  virtual bool Run() {
Z
zhangjun 已提交
118
    if (!_predictor->Run()) {
Z
zhangjun 已提交
119 120 121 122 123 124
      LOG(ERROR) << "Failed call Run with paddle predictor";
      return false;
    }
    return true;
  }

125
  virtual int create(const configure::EngineDesc& conf, int gpu_id) = 0;
Z
zhangjun 已提交
126

Z
update  
zhangjun 已提交
127 128
  virtual int clone(void* predictor) {
    if (predictor == NULL) {
Z
zhangjun 已提交
129 130 131
      LOG(ERROR) << "origin paddle Predictor is null.";
      return -1;
    }
Z
zhangjun 已提交
132 133
    Predictor* prep = static_cast<Predictor*>(predictor);
    _predictor = prep->Clone();
Z
update  
zhangjun 已提交
134 135
    if (_predictor.get() == NULL) {
      LOG(ERROR) << "fail to clone paddle predictor: " << predictor;
Z
zhangjun 已提交
136 137 138 139 140
      return -1;
    }
    return 0;
  }

Z
update  
zhangjun 已提交
141
  virtual void* get() { return _predictor.get(); }
Z
zhangjun 已提交
142 143

 protected:
Z
update  
zhangjun 已提交
144
  std::shared_ptr<Predictor> _predictor;
Z
zhangjun 已提交
145 146
};

Z
update  
zhangjun 已提交
147
// Paddle Inference Engine
H
HexToString 已提交
148
class PaddleInferenceEngine : public EngineCore {
Z
zhangjun 已提交
149
 public:
150
  int create(const configure::EngineDesc& engine_conf, int gpu_id) {
Z
update  
zhangjun 已提交
151 152
    std::string model_path = engine_conf.model_dir();
    if (access(model_path.c_str(), F_OK) == -1) {
Z
zhangjun 已提交
153
      LOG(ERROR) << "create paddle predictor failed, path not exits: "
Z
update  
zhangjun 已提交
154
                 << model_path;
Z
zhangjun 已提交
155 156 157 158
      return -1;
    }

    Config config;
Z
update  
zhangjun 已提交
159
    // todo, auto config(zhangjun)
Z
zhangjun 已提交
160 161 162
    if (engine_conf.has_encrypted_model() && engine_conf.encrypted_model()) {
      // decrypt model
      std::string model_buffer, params_buffer, key_buffer;
H
HexToString 已提交
163 164 165
      predictor::ReadBinaryFile(model_path + "/encrypt_model", &model_buffer);
      predictor::ReadBinaryFile(model_path + "/encrypt_params", &params_buffer);
      predictor::ReadBinaryFile(model_path + "/key", &key_buffer);
Z
zhangjun 已提交
166 167 168 169 170 171 172 173 174

      auto cipher = paddle::MakeCipher("");
      std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
      std::string real_params_buffer =
          cipher->Decrypt(params_buffer, key_buffer);
      config.SetModelBuffer(&real_model_buffer[0],
                            real_model_buffer.size(),
                            &real_params_buffer[0],
                            real_params_buffer.size());
H
HexToString 已提交
175 176 177
    } else if (engine_conf.has_combined_model() &&
               (!engine_conf.combined_model())) {
      config.SetModel(model_path);
Z
update  
zhangjun 已提交
178
    } else {
H
HexToString 已提交
179 180 181 182 183 184 185 186 187
      std::vector<std::string> suffixParaVector = {".pdiparams", "__params__"};
      std::vector<std::string> suffixModelVector = {".pdmodel", "__model__"};
      std::string paraFileName = getFileBySuffix(model_path, suffixParaVector);
      std::string modelFileName =
          getFileBySuffix(model_path, suffixModelVector);
      if (paraFileName.length() != 0 && modelFileName.length() != 0) {
        config.SetParamsFile(model_path + "/" + paraFileName);
        config.SetProgFile(model_path + "/" + modelFileName);
      }
Z
zhangjun 已提交
188
    }
Z
zhangjun 已提交
189

Z
zhangjun 已提交
190
    config.SwitchSpecifyInputNames(true);
Z
update  
zhangjun 已提交
191 192 193
    config.SetCpuMathLibraryNumThreads(1);
    if (engine_conf.has_use_gpu() && engine_conf.use_gpu()) {
      // 2000MB GPU memory
194 195 196 197 198
      config.EnableUseGpu(50, gpu_id);
      if (engine_conf.has_gpu_multi_stream() &&
          engine_conf.gpu_multi_stream()) {
        config.EnableGpuMultiStream();
      }
Z
zhangjun 已提交
199
    }
Z
fix  
zhangjun 已提交
200
    precision_type = GetPrecision(FLAGS_precision);
Z
zhangjun 已提交
201

Z
update  
zhangjun 已提交
202 203 204 205 206 207 208
    if (engine_conf.has_enable_ir_optimization() &&
        !engine_conf.enable_ir_optimization()) {
      config.SwitchIrOptim(false);
    } else {
      config.SwitchIrOptim(true);
    }

Z
update  
zhangjun 已提交
209
    if (engine_conf.has_use_trt() && engine_conf.use_trt()) {
210
      config.SwitchIrOptim(true);
Z
zhangjun 已提交
211
      if (!engine_conf.has_use_gpu() || !engine_conf.use_gpu()) {
212 213 214 215 216
        config.EnableUseGpu(50, gpu_id);
        if (engine_conf.has_gpu_multi_stream() &&
            engine_conf.gpu_multi_stream()) {
          config.EnableGpuMultiStream();
        }
Z
zhangjun 已提交
217
      }
Z
update  
zhangjun 已提交
218 219 220
      config.EnableTensorRtEngine(1 << 20,
                                  max_batch,
                                  min_subgraph_size,
221
                                  precision_type,
Z
update  
zhangjun 已提交
222
                                  false,
Z
fix  
zhangjun 已提交
223
                                  FLAGS_use_calib);
Z
update  
zhangjun 已提交
224
      LOG(INFO) << "create TensorRT predictor";
Z
zhangjun 已提交
225 226
    }

Z
zhangjun 已提交
227
    if (engine_conf.has_use_lite() && engine_conf.use_lite()) {
228 229 230 231 232 233
      config.EnableLiteEngine(precision_type, true);
    }

    if ((!engine_conf.has_use_lite() && !engine_conf.has_use_gpu()) ||
        (engine_conf.has_use_lite() && !engine_conf.use_lite() &&
         engine_conf.has_use_gpu() && !engine_conf.use_gpu())) {
Z
zhangjun 已提交
234
#ifdef WITH_MKLML
Z
update  
zhangjun 已提交
235 236 237 238 239 240
#ifdef WITH_MKLDNN
      config.EnableMKLDNN();
      config.SwitchIrOptim(true);
      config.DisableGpu();
      // config.SetCpuMathLibraryNumThreads(2);

Z
fix  
zhangjun 已提交
241
      if (precision_type == PrecisionType::kInt8) {
242
        config.EnableMkldnnQuantizer();
Z
update  
zhangjun 已提交
243
        auto quantizer_config = config.mkldnn_quantizer_config();
244
        // TODO(somebody): warmup data
Z
update  
zhangjun 已提交
245 246 247
        // quantizer_config -> SetWarmupData();
        // quantizer_config -> SetWarmupBatchSize();
        // quantizer_config -> SetEnabledOpTypes(4);
Z
fix  
zhangjun 已提交
248
      } else if (precision_type == PrecisionType::kHalf) {
249 250
        config.EnableMkldnnBfloat16();
      }
Z
update  
zhangjun 已提交
251
#endif
Z
zhangjun 已提交
252
#endif
Z
zhangjun 已提交
253 254
    }

Z
zhangjun 已提交
255
    if (engine_conf.has_use_xpu() && engine_conf.use_xpu()) {
Z
update  
zhangjun 已提交
256 257 258
      // 2 MB l3 cache
      config.EnableXpu(2 * 1024 * 1024);
    }
Z
zhangjun 已提交
259

Z
zhangjun 已提交
260 261
    if (engine_conf.has_enable_memory_optimization() &&
        engine_conf.enable_memory_optimization()) {
Z
update  
zhangjun 已提交
262
      config.EnableMemoryOptim();
Z
zhangjun 已提交
263
    }
Z
zhangjun 已提交
264

Z
zhangjun 已提交
265
    predictor::AutoLock lock(predictor::GlobalCreateMutex::instance());
Z
update  
zhangjun 已提交
266 267
    _predictor = CreatePredictor(config);
    if (NULL == _predictor.get()) {
Z
zhangjun 已提交
268
      LOG(ERROR) << "create paddle predictor failed, path: " << model_path;
Z
zhangjun 已提交
269 270
      return -1;
    }
Z
update  
zhangjun 已提交
271

Z
zhangjun 已提交
272
    VLOG(2) << "create paddle predictor sucess, path: " << model_path;
Z
zhangjun 已提交
273 274 275 276
    return 0;
  }
};

Z
update  
zhangjun 已提交
277
}  // namespace inference
Z
zhangjun 已提交
278 279
}  // namespace paddle_serving
}  // namespace baidu