paddle_engine.h 12.4 KB
Newer Older
Z
update  
zhangjun 已提交
1
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Z
zhangjun 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

H
HexToString 已提交
17
#include <dirent.h>
Z
zhangjun 已提交
18 19 20
#include <pthread.h>
#include <fstream>
#include <map>
Z
zhangjun 已提交
21
#include <memory>
Z
zhangjun 已提交
22
#include <string>
23
#include <utility>
Z
zhangjun 已提交
24 25 26
#include <vector>
#include "core/configure/include/configure_parser.h"
#include "core/configure/inferencer_configure.pb.h"
Z
zhangjun 已提交
27
#include "core/predictor/common/utils.h"
Z
zhangjun 已提交
28
#include "core/predictor/framework/infer.h"
Z
zhangjun 已提交
29 30 31 32 33 34 35
#include "paddle_inference_api.h"  // NOLINT

namespace baidu {
namespace paddle_serving {
namespace inference {

using paddle_infer::Config;
Z
zhangjun 已提交
36
using paddle_infer::PrecisionType;
Z
zhangjun 已提交
37 38 39
using paddle_infer::Predictor;
using paddle_infer::Tensor;
using paddle_infer::CreatePredictor;
40
using paddle_infer::DistConfig;
Z
zhangjun 已提交
41

Z
zhangjun 已提交
42
DECLARE_int32(gpuid);
Z
fix  
zhangjun 已提交
43 44
DECLARE_string(precision);
DECLARE_bool(use_calib);
45 46 47
DECLARE_string(nnadapter_device_names);
DECLARE_string(nnadapter_context_properties);
DECLARE_string(nnadapter_model_cache_dir);
Z
zhangjun 已提交
48

Z
zhangjun 已提交
49 50
static const int max_batch = 32;
static const int min_subgraph_size = 3;
Z
fix  
zhangjun 已提交
51 52
static PrecisionType precision_type;

Z
update  
zhangjun 已提交
53 54 55
std::shared_ptr<std::vector<paddle::PaddleTensor>> PrepareWarmupData() {
  auto warmup_data = std::make_shared<std::vector<paddle::PaddleTensor>>(1);
  paddle::PaddleTensor images;
Z
update  
zhangjun 已提交
56 57
  images.name = "image";
  images.shape = {2, 3, 300, 300};
Z
update  
zhangjun 已提交
58
  images.dtype = paddle::PaddleDType::FLOAT32;
Z
update  
zhangjun 已提交
59 60 61 62 63 64
  images.data.Resize(sizeof(float) * 2 * 3 * 300 * 300);

  (*warmup_data)[0] = std::move(images);
  return warmup_data;
}

Z
fix  
zhangjun 已提交
65 66 67 68 69 70 71 72 73 74 75
PrecisionType GetPrecision(const std::string& precision_data) {
  std::string precision_type = predictor::ToLower(precision_data);
  if (precision_type == "fp32") {
    return PrecisionType::kFloat32;
  } else if (precision_type == "int8") {
    return PrecisionType::kInt8;
  } else if (precision_type == "fp16") {
    return PrecisionType::kHalf;
  }
  return PrecisionType::kFloat32;
}
Z
zhangjun 已提交
76

H
HexToString 已提交
77
const std::string getFileBySuffix(
H
HexToString 已提交
78 79 80 81 82 83 84 85 86 87
    const std::string& path, const std::vector<std::string>& suffixVector) {
  DIR* dp = nullptr;
  std::string fileName = "";
  struct dirent* dirp = nullptr;
  if ((dp = opendir(path.c_str())) == nullptr) {
    return fileName;
  }
  while ((dirp = readdir(dp)) != nullptr) {
    if (dirp->d_type == DT_REG) {
      for (int idx = 0; idx < suffixVector.size(); ++idx) {
H
fix bug  
HexToString 已提交
88 89 90 91 92 93
        std::string fileName_in_Dir = static_cast<std::string>(dirp->d_name);
        if (fileName_in_Dir.length() >= suffixVector[idx].length() &&
            fileName_in_Dir.substr(
                fileName_in_Dir.length() - suffixVector[idx].length(),
                suffixVector[idx].length()) == suffixVector[idx]) {
          fileName = fileName_in_Dir;
H
HexToString 已提交
94 95 96 97 98 99 100 101 102 103
          break;
        }
      }
    }
    if (fileName.length() != 0) break;
  }
  closedir(dp);
  return fileName;
}

T
TeslaZhao 已提交
104 105 106
// Engine Core is the base class of inference engines, which can be derived from
// paddle Inference Engine, or inference engines of other machine learning
// platforms
H
HexToString 已提交
107
class EngineCore {
Z
zhangjun 已提交
108
 public:
H
HexToString 已提交
109
  virtual ~EngineCore() {}
Z
zhangjun 已提交
110
  virtual std::vector<std::string> GetInputNames() {
Z
zhangjun 已提交
111
    return _predictor->GetInputNames();
Z
zhangjun 已提交
112 113 114
  }

  virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
Z
zhangjun 已提交
115
    return _predictor->GetInputHandle(name);
Z
zhangjun 已提交
116 117 118
  }

  virtual std::vector<std::string> GetOutputNames() {
Z
zhangjun 已提交
119
    return _predictor->GetOutputNames();
Z
zhangjun 已提交
120 121 122
  }

  virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
Z
zhangjun 已提交
123
    return _predictor->GetOutputHandle(name);
Z
zhangjun 已提交
124 125 126
  }

  virtual bool Run() {
Z
zhangjun 已提交
127
    if (!_predictor->Run()) {
Z
zhangjun 已提交
128 129 130 131 132 133
      LOG(ERROR) << "Failed call Run with paddle predictor";
      return false;
    }
    return true;
  }

134
  virtual int create(const configure::EngineDesc& conf, int gpu_id) = 0;
Z
zhangjun 已提交
135

Z
update  
zhangjun 已提交
136 137
  virtual int clone(void* predictor) {
    if (predictor == NULL) {
Z
zhangjun 已提交
138 139 140
      LOG(ERROR) << "origin paddle Predictor is null.";
      return -1;
    }
Z
zhangjun 已提交
141 142
    Predictor* prep = static_cast<Predictor*>(predictor);
    _predictor = prep->Clone();
Z
update  
zhangjun 已提交
143 144
    if (_predictor.get() == NULL) {
      LOG(ERROR) << "fail to clone paddle predictor: " << predictor;
Z
zhangjun 已提交
145 146 147 148 149
      return -1;
    }
    return 0;
  }

Z
update  
zhangjun 已提交
150
  virtual void* get() { return _predictor.get(); }
Z
zhangjun 已提交
151 152

 protected:
T
TeslaZhao 已提交
153 154 155 156 157
  // _predictor is a prediction instance of Paddle Inference.
  // when inferring on the CPU, _predictor is bound to a model.
  // when inferring on the GPU, _predictor is bound to a model and a GPU card.
  // Therefore, when using GPU multi-card inference, you need to create multiple
  // EngineCore.
Z
update  
zhangjun 已提交
158
  std::shared_ptr<Predictor> _predictor;
Z
zhangjun 已提交
159 160
};

Z
update  
zhangjun 已提交
161
// Paddle Inference Engine
H
HexToString 已提交
162
class PaddleInferenceEngine : public EngineCore {
Z
zhangjun 已提交
163
 public:
164
  int create(const configure::EngineDesc& engine_conf, int gpu_id) {
Z
update  
zhangjun 已提交
165 166
    std::string model_path = engine_conf.model_dir();
    if (access(model_path.c_str(), F_OK) == -1) {
Z
zhangjun 已提交
167
      LOG(ERROR) << "create paddle predictor failed, path not exits: "
Z
update  
zhangjun 已提交
168
                 << model_path;
Z
zhangjun 已提交
169 170 171 172
      return -1;
    }

    Config config;
H
fix bug  
HexToString 已提交
173 174 175 176
    std::vector<std::string> suffixParaVector = {
        ".pdiparams", "__params__", "params"};
    std::vector<std::string> suffixModelVector = {
        ".pdmodel", "__model__", "model"};
H
HexToString 已提交
177 178 179 180 181 182 183 184 185 186 187
    std::string paraFileName = getFileBySuffix(model_path, suffixParaVector);
    std::string modelFileName = getFileBySuffix(model_path, suffixModelVector);

    std::string encryParaPath = model_path + "/encrypt_model";
    std::string encryModelPath = model_path + "/encrypt_params";
    std::string encryKeyPath = model_path + "/key";

    // encrypt model
    if (access(encryParaPath.c_str(), F_OK) != -1 &&
        access(encryModelPath.c_str(), F_OK) != -1 &&
        access(encryKeyPath.c_str(), F_OK) != -1) {
Z
zhangjun 已提交
188
      // decrypt model
H
HexToString 已提交
189

Z
zhangjun 已提交
190
      std::string model_buffer, params_buffer, key_buffer;
H
HexToString 已提交
191 192 193
      predictor::ReadBinaryFile(model_path + "/encrypt_model", &model_buffer);
      predictor::ReadBinaryFile(model_path + "/encrypt_params", &params_buffer);
      predictor::ReadBinaryFile(model_path + "/key", &key_buffer);
Z
zhangjun 已提交
194 195 196 197 198 199 200 201 202

      auto cipher = paddle::MakeCipher("");
      std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
      std::string real_params_buffer =
          cipher->Decrypt(params_buffer, key_buffer);
      config.SetModelBuffer(&real_model_buffer[0],
                            real_model_buffer.size(),
                            &real_params_buffer[0],
                            real_params_buffer.size());
H
HexToString 已提交
203 204 205
    } else if (paraFileName.length() != 0 && modelFileName.length() != 0) {
      config.SetParamsFile(model_path + "/" + paraFileName);
      config.SetProgFile(model_path + "/" + modelFileName);
Z
update  
zhangjun 已提交
206
    } else {
H
HexToString 已提交
207
      config.SetModel(model_path);
Z
zhangjun 已提交
208
    }
Z
zhangjun 已提交
209

210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
    // Enable distributed model inferencing
    DistConfig distCfg;
    if (engine_conf.has_enable_dist_model() &&
        engine_conf.enable_dist_model()) {
      int ep_size = engine_conf.dist_endpoints_size();
      int cur_index = engine_conf.dist_subgraph_index();
      if (ep_size <= cur_index) {
        LOG(ERROR) << "create paddle predictor failed, Distributed model error."
                   << " dist_endpoints_size=" << ep_size
                   << " is not bigger than dist_subgraph_index=" << cur_index;
        return -1;
      }
      std::vector<std::string> vec_eps;
      for (int i = 0; i < ep_size; ++i) {
        vec_eps.emplace_back(engine_conf.dist_endpoints(i));
      }
      distCfg.EnableDistModel(true);
      distCfg.SetCarrierId(engine_conf.dist_carrier_id());
      distCfg.SetRanks(engine_conf.dist_nranks(), cur_index);
      distCfg.SetEndpoints(vec_eps, engine_conf.dist_endpoints(cur_index));
      distCfg.SetCommInitConfig(engine_conf.dist_cfg_file());

      config.SetDistConfig(distCfg);
      LOG(INFO) << "Create Distributed predictor! dist_carrier_id="
                << engine_conf.dist_carrier_id()
                << ", Ranks=" << engine_conf.dist_nranks()
                << ", current index of ranks=" << cur_index
                << ", current endpoint="
                << engine_conf.dist_endpoints(cur_index)
                << ", communicate init config file="
                << engine_conf.dist_cfg_file();
    }

Z
zhangjun 已提交
243
    config.SwitchSpecifyInputNames(true);
Z
update  
zhangjun 已提交
244 245 246
    config.SetCpuMathLibraryNumThreads(1);
    if (engine_conf.has_use_gpu() && engine_conf.use_gpu()) {
      // 2000MB GPU memory
247 248 249 250 251
      config.EnableUseGpu(50, gpu_id);
      if (engine_conf.has_gpu_multi_stream() &&
          engine_conf.gpu_multi_stream()) {
        config.EnableGpuMultiStream();
      }
Z
zhangjun 已提交
252
    }
Z
fix  
zhangjun 已提交
253
    precision_type = GetPrecision(FLAGS_precision);
Z
zhangjun 已提交
254

Z
update  
zhangjun 已提交
255 256 257 258 259 260 261
    if (engine_conf.has_enable_ir_optimization() &&
        !engine_conf.enable_ir_optimization()) {
      config.SwitchIrOptim(false);
    } else {
      config.SwitchIrOptim(true);
    }

Z
update  
zhangjun 已提交
262
    if (engine_conf.has_use_trt() && engine_conf.use_trt()) {
263
      config.SwitchIrOptim(true);
Z
zhangjun 已提交
264
      if (!engine_conf.has_use_gpu() || !engine_conf.use_gpu()) {
265 266 267 268 269
        config.EnableUseGpu(50, gpu_id);
        if (engine_conf.has_gpu_multi_stream() &&
            engine_conf.gpu_multi_stream()) {
          config.EnableGpuMultiStream();
        }
Z
zhangjun 已提交
270
      }
271
      config.EnableTensorRtEngine(1 << 25,
Z
update  
zhangjun 已提交
272 273
                                  max_batch,
                                  min_subgraph_size,
274
                                  precision_type,
Z
update  
zhangjun 已提交
275
                                  false,
Z
fix  
zhangjun 已提交
276
                                  FLAGS_use_calib);
Z
update  
zhangjun 已提交
277
      LOG(INFO) << "create TensorRT predictor";
Z
zhangjun 已提交
278 279
    }

Z
zhangjun 已提交
280
    if (engine_conf.has_use_lite() && engine_conf.use_lite()) {
281
      config.EnableLiteEngine(precision_type, true);
282
      config.SwitchIrOptim(true);
283 284 285 286 287
    }

    if ((!engine_conf.has_use_lite() && !engine_conf.has_use_gpu()) ||
        (engine_conf.has_use_lite() && !engine_conf.use_lite() &&
         engine_conf.has_use_gpu() && !engine_conf.use_gpu())) {
Z
zhangjun 已提交
288
#ifdef WITH_MKLML
Z
update  
zhangjun 已提交
289 290 291 292 293 294
#ifdef WITH_MKLDNN
      config.EnableMKLDNN();
      config.SwitchIrOptim(true);
      config.DisableGpu();
      // config.SetCpuMathLibraryNumThreads(2);

Z
fix  
zhangjun 已提交
295
      if (precision_type == PrecisionType::kInt8) {
296
        config.EnableMkldnnQuantizer();
Z
update  
zhangjun 已提交
297
        auto quantizer_config = config.mkldnn_quantizer_config();
298
        // TODO(somebody): warmup data
Z
update  
zhangjun 已提交
299 300 301
        // quantizer_config -> SetWarmupData();
        // quantizer_config -> SetWarmupBatchSize();
        // quantizer_config -> SetEnabledOpTypes(4);
Z
fix  
zhangjun 已提交
302
      } else if (precision_type == PrecisionType::kHalf) {
303 304
        config.EnableMkldnnBfloat16();
      }
Z
update  
zhangjun 已提交
305
#endif
Z
zhangjun 已提交
306
#endif
Z
zhangjun 已提交
307 308
    }

Z
zhangjun 已提交
309
    if (engine_conf.has_use_xpu() && engine_conf.use_xpu()) {
Z
update  
zhangjun 已提交
310 311
      // 2 MB l3 cache
      config.EnableXpu(2 * 1024 * 1024);
S
ShiningZhang 已提交
312
      config.SetXpuDeviceId(gpu_id);
Z
update  
zhangjun 已提交
313
    }
Z
zhangjun 已提交
314

H
fix bug  
HexToString 已提交
315
    if (engine_conf.has_use_ascend_cl() && engine_conf.use_ascend_cl()) {
316
      if (engine_conf.has_use_lite() && engine_conf.use_lite()) {
H
fix bug  
HexToString 已提交
317
        // for ascend 310
318 319
        FLAGS_nnadapter_device_names = "huawei_ascend_npu";
        FLAGS_nnadapter_context_properties =
H
fix bug  
HexToString 已提交
320
            "HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=" + std::to_string(gpu_id);
321 322
        FLAGS_nnadapter_model_cache_dir = "";
        config.NNAdapter()
H
fix bug  
HexToString 已提交
323 324 325 326
            .Enable()
            .SetDeviceNames({FLAGS_nnadapter_device_names})
            .SetContextProperties(FLAGS_nnadapter_context_properties)
            .SetModelCacheDir(FLAGS_nnadapter_model_cache_dir);
327
        LOG(INFO) << "Enable Lite NNAdapter for Ascend,"
H
fix bug  
HexToString 已提交
328
                  << "nnadapter_device_names=" << FLAGS_nnadapter_device_names
329 330 331 332
                  << ",nnadapter_context_properties="
                  << FLAGS_nnadapter_context_properties
                  << ",nnadapter_model_cache_dir="
                  << FLAGS_nnadapter_model_cache_dir;
S
ShiningZhang 已提交
333
      } else {
S
ShiningZhang 已提交
334
        // for ascend 910
S
ShiningZhang 已提交
335
        config.EnableNpu(gpu_id);
336 337 338
      }
    }

Z
zhangjun 已提交
339 340
    if (engine_conf.has_enable_memory_optimization() &&
        engine_conf.enable_memory_optimization()) {
Z
update  
zhangjun 已提交
341
      config.EnableMemoryOptim();
Z
zhangjun 已提交
342
    }
Z
zhangjun 已提交
343

Z
zhangjun 已提交
344
    predictor::AutoLock lock(predictor::GlobalCreateMutex::instance());
Z
update  
zhangjun 已提交
345 346
    _predictor = CreatePredictor(config);
    if (NULL == _predictor.get()) {
Z
zhangjun 已提交
347
      LOG(ERROR) << "create paddle predictor failed, path: " << model_path;
Z
zhangjun 已提交
348 349
      return -1;
    }
Z
update  
zhangjun 已提交
350

Z
zhangjun 已提交
351
    VLOG(2) << "create paddle predictor sucess, path: " << model_path;
Z
zhangjun 已提交
352 353 354 355
    return 0;
  }
};

Z
update  
zhangjun 已提交
356
}  // namespace inference
Z
zhangjun 已提交
357 358
}  // namespace paddle_serving
}  // namespace baidu