paddle_engine.h 15.5 KB
Newer Older
Z
update  
zhangjun 已提交
1
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Z
zhangjun 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

H
HexToString 已提交
17
#include <dirent.h>
Z
zhangjun 已提交
18 19 20
#include <pthread.h>
#include <fstream>
#include <map>
Z
zhangjun 已提交
21
#include <memory>
Z
zhangjun 已提交
22
#include <string>
23
#include <utility>
Z
zhangjun 已提交
24 25 26
#include <vector>
#include "core/configure/include/configure_parser.h"
#include "core/configure/inferencer_configure.pb.h"
Z
zhangjun 已提交
27
#include "core/predictor/common/utils.h"
Z
zhangjun 已提交
28
#include "core/predictor/framework/infer.h"
Z
zhangjun 已提交
29 30 31 32 33 34 35
#include "paddle_inference_api.h"  // NOLINT

namespace baidu {
namespace paddle_serving {
namespace inference {

using paddle_infer::Config;
Z
zhangjun 已提交
36
using paddle_infer::PrecisionType;
Z
zhangjun 已提交
37 38 39
using paddle_infer::Predictor;
using paddle_infer::Tensor;
using paddle_infer::CreatePredictor;
40
using paddle_infer::DistConfig;
Z
zhangjun 已提交
41

Z
zhangjun 已提交
42
DECLARE_int32(gpuid);
Z
fix  
zhangjun 已提交
43 44
DECLARE_string(precision);
DECLARE_bool(use_calib);
45 46 47
DECLARE_string(nnadapter_device_names);
DECLARE_string(nnadapter_context_properties);
DECLARE_string(nnadapter_model_cache_dir);
Z
zhangjun 已提交
48

Z
zhangjun 已提交
49 50
static const int max_batch = 32;
static const int min_subgraph_size = 3;
Z
fix  
zhangjun 已提交
51 52
static PrecisionType precision_type;

Z
update  
zhangjun 已提交
53 54 55
std::shared_ptr<std::vector<paddle::PaddleTensor>> PrepareWarmupData() {
  auto warmup_data = std::make_shared<std::vector<paddle::PaddleTensor>>(1);
  paddle::PaddleTensor images;
Z
update  
zhangjun 已提交
56 57
  images.name = "image";
  images.shape = {2, 3, 300, 300};
Z
update  
zhangjun 已提交
58
  images.dtype = paddle::PaddleDType::FLOAT32;
Z
update  
zhangjun 已提交
59 60 61 62 63 64
  images.data.Resize(sizeof(float) * 2 * 3 * 300 * 300);

  (*warmup_data)[0] = std::move(images);
  return warmup_data;
}

Z
fix  
zhangjun 已提交
65 66 67 68 69 70 71 72 73 74 75
PrecisionType GetPrecision(const std::string& precision_data) {
  std::string precision_type = predictor::ToLower(precision_data);
  if (precision_type == "fp32") {
    return PrecisionType::kFloat32;
  } else if (precision_type == "int8") {
    return PrecisionType::kInt8;
  } else if (precision_type == "fp16") {
    return PrecisionType::kHalf;
  }
  return PrecisionType::kFloat32;
}
Z
zhangjun 已提交
76

H
HexToString 已提交
77
const std::string getFileBySuffix(
H
HexToString 已提交
78 79 80 81 82 83 84 85 86 87
    const std::string& path, const std::vector<std::string>& suffixVector) {
  DIR* dp = nullptr;
  std::string fileName = "";
  struct dirent* dirp = nullptr;
  if ((dp = opendir(path.c_str())) == nullptr) {
    return fileName;
  }
  while ((dirp = readdir(dp)) != nullptr) {
    if (dirp->d_type == DT_REG) {
      for (int idx = 0; idx < suffixVector.size(); ++idx) {
H
fix bug  
HexToString 已提交
88 89 90 91 92 93
        std::string fileName_in_Dir = static_cast<std::string>(dirp->d_name);
        if (fileName_in_Dir.length() >= suffixVector[idx].length() &&
            fileName_in_Dir.substr(
                fileName_in_Dir.length() - suffixVector[idx].length(),
                suffixVector[idx].length()) == suffixVector[idx]) {
          fileName = fileName_in_Dir;
H
HexToString 已提交
94 95 96 97 98 99 100 101 102 103
          break;
        }
      }
    }
    if (fileName.length() != 0) break;
  }
  closedir(dp);
  return fileName;
}

T
TeslaZhao 已提交
104 105 106
// Engine Core is the base class of inference engines, which can be derived from
// paddle Inference Engine, or inference engines of other machine learning
// platforms
H
HexToString 已提交
107
class EngineCore {
Z
zhangjun 已提交
108
 public:
H
HexToString 已提交
109
  virtual ~EngineCore() {}
Z
zhangjun 已提交
110
  virtual std::vector<std::string> GetInputNames() {
Z
zhangjun 已提交
111
    return _predictor->GetInputNames();
Z
zhangjun 已提交
112 113 114
  }

  virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
Z
zhangjun 已提交
115
    return _predictor->GetInputHandle(name);
Z
zhangjun 已提交
116 117 118
  }

  virtual std::vector<std::string> GetOutputNames() {
Z
zhangjun 已提交
119
    return _predictor->GetOutputNames();
Z
zhangjun 已提交
120 121 122
  }

  virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
Z
zhangjun 已提交
123
    return _predictor->GetOutputHandle(name);
Z
zhangjun 已提交
124 125 126
  }

  virtual bool Run() {
Z
zhangjun 已提交
127
    if (!_predictor->Run()) {
Z
zhangjun 已提交
128 129 130 131 132 133
      LOG(ERROR) << "Failed call Run with paddle predictor";
      return false;
    }
    return true;
  }

134
  virtual int create(const configure::EngineDesc& conf, int gpu_id) = 0;
Z
zhangjun 已提交
135

Z
update  
zhangjun 已提交
136 137
  virtual int clone(void* predictor) {
    if (predictor == NULL) {
Z
zhangjun 已提交
138 139 140
      LOG(ERROR) << "origin paddle Predictor is null.";
      return -1;
    }
Z
zhangjun 已提交
141 142
    Predictor* prep = static_cast<Predictor*>(predictor);
    _predictor = prep->Clone();
Z
update  
zhangjun 已提交
143 144
    if (_predictor.get() == NULL) {
      LOG(ERROR) << "fail to clone paddle predictor: " << predictor;
Z
zhangjun 已提交
145 146 147 148 149
      return -1;
    }
    return 0;
  }

Z
update  
zhangjun 已提交
150
  virtual void* get() { return _predictor.get(); }
Z
zhangjun 已提交
151 152

 protected:
T
TeslaZhao 已提交
153 154 155 156 157
  // _predictor is a prediction instance of Paddle Inference.
  // when inferring on the CPU, _predictor is bound to a model.
  // when inferring on the GPU, _predictor is bound to a model and a GPU card.
  // Therefore, when using GPU multi-card inference, you need to create multiple
  // EngineCore.
Z
update  
zhangjun 已提交
158
  std::shared_ptr<Predictor> _predictor;
Z
zhangjun 已提交
159 160
};

Z
update  
zhangjun 已提交
161
// Paddle Inference Engine
H
HexToString 已提交
162
class PaddleInferenceEngine : public EngineCore {
Z
zhangjun 已提交
163
 public:
164
  int create(const configure::EngineDesc& engine_conf, int gpu_id) {
Z
update  
zhangjun 已提交
165 166
    std::string model_path = engine_conf.model_dir();
    if (access(model_path.c_str(), F_OK) == -1) {
Z
zhangjun 已提交
167
      LOG(ERROR) << "create paddle predictor failed, path not exits: "
Z
update  
zhangjun 已提交
168
                 << model_path;
Z
zhangjun 已提交
169 170 171 172
      return -1;
    }

    Config config;
H
fix bug  
HexToString 已提交
173 174 175 176
    std::vector<std::string> suffixParaVector = {
        ".pdiparams", "__params__", "params"};
    std::vector<std::string> suffixModelVector = {
        ".pdmodel", "__model__", "model"};
H
HexToString 已提交
177 178 179 180 181 182 183 184 185 186 187
    std::string paraFileName = getFileBySuffix(model_path, suffixParaVector);
    std::string modelFileName = getFileBySuffix(model_path, suffixModelVector);

    std::string encryParaPath = model_path + "/encrypt_model";
    std::string encryModelPath = model_path + "/encrypt_params";
    std::string encryKeyPath = model_path + "/key";

    // encrypt model
    if (access(encryParaPath.c_str(), F_OK) != -1 &&
        access(encryModelPath.c_str(), F_OK) != -1 &&
        access(encryKeyPath.c_str(), F_OK) != -1) {
Z
zhangjun 已提交
188
      // decrypt model
H
HexToString 已提交
189

Z
zhangjun 已提交
190
      std::string model_buffer, params_buffer, key_buffer;
H
HexToString 已提交
191 192 193
      predictor::ReadBinaryFile(model_path + "/encrypt_model", &model_buffer);
      predictor::ReadBinaryFile(model_path + "/encrypt_params", &params_buffer);
      predictor::ReadBinaryFile(model_path + "/key", &key_buffer);
Z
zhangjun 已提交
194 195 196 197 198 199 200 201 202

      auto cipher = paddle::MakeCipher("");
      std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
      std::string real_params_buffer =
          cipher->Decrypt(params_buffer, key_buffer);
      config.SetModelBuffer(&real_model_buffer[0],
                            real_model_buffer.size(),
                            &real_params_buffer[0],
                            real_params_buffer.size());
H
HexToString 已提交
203 204 205
    } else if (paraFileName.length() != 0 && modelFileName.length() != 0) {
      config.SetParamsFile(model_path + "/" + paraFileName);
      config.SetProgFile(model_path + "/" + modelFileName);
Z
update  
zhangjun 已提交
206
    } else {
H
HexToString 已提交
207
      config.SetModel(model_path);
Z
zhangjun 已提交
208
    }
Z
zhangjun 已提交
209

210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
    // Enable distributed model inferencing
    DistConfig distCfg;
    if (engine_conf.has_enable_dist_model() &&
        engine_conf.enable_dist_model()) {
      int ep_size = engine_conf.dist_endpoints_size();
      int cur_index = engine_conf.dist_subgraph_index();
      if (ep_size <= cur_index) {
        LOG(ERROR) << "create paddle predictor failed, Distributed model error."
                   << " dist_endpoints_size=" << ep_size
                   << " is not bigger than dist_subgraph_index=" << cur_index;
        return -1;
      }
      std::vector<std::string> vec_eps;
      for (int i = 0; i < ep_size; ++i) {
        vec_eps.emplace_back(engine_conf.dist_endpoints(i));
      }
      distCfg.EnableDistModel(true);
      distCfg.SetCarrierId(engine_conf.dist_carrier_id());
      distCfg.SetRanks(engine_conf.dist_nranks(), cur_index);
      distCfg.SetEndpoints(vec_eps, engine_conf.dist_endpoints(cur_index));
      distCfg.SetCommInitConfig(engine_conf.dist_cfg_file());

      config.SetDistConfig(distCfg);
      LOG(INFO) << "Create Distributed predictor! dist_carrier_id="
                << engine_conf.dist_carrier_id()
                << ", Ranks=" << engine_conf.dist_nranks()
                << ", current index of ranks=" << cur_index
                << ", current endpoint="
                << engine_conf.dist_endpoints(cur_index)
                << ", communicate init config file="
                << engine_conf.dist_cfg_file();
    }

Z
zhangjun 已提交
243
    config.SwitchSpecifyInputNames(true);
T
TeslaZhao 已提交
244
    config.SetCpuMathLibraryNumThreads(engine_conf.cpu_math_thread_num());
Z
update  
zhangjun 已提交
245 246
    if (engine_conf.has_use_gpu() && engine_conf.use_gpu()) {
      // 2000MB GPU memory
T
TeslaZhao 已提交
247
      config.EnableUseGpu(engine_conf.gpu_memory_mb(), gpu_id);
248 249 250 251
      if (engine_conf.has_gpu_multi_stream() &&
          engine_conf.gpu_multi_stream()) {
        config.EnableGpuMultiStream();
      }
Z
zhangjun 已提交
252
    }
Z
fix  
zhangjun 已提交
253
    precision_type = GetPrecision(FLAGS_precision);
Z
zhangjun 已提交
254

Z
update  
zhangjun 已提交
255 256 257 258 259 260 261
    if (engine_conf.has_enable_ir_optimization() &&
        !engine_conf.enable_ir_optimization()) {
      config.SwitchIrOptim(false);
    } else {
      config.SwitchIrOptim(true);
    }

S
ShiningZhang 已提交
262 263 264 265 266
    int local_min_subgraph_size = min_subgraph_size;
    if (engine_conf.has_min_subgraph_size()) {
      local_min_subgraph_size = engine_conf.min_subgraph_size();
    }

Z
update  
zhangjun 已提交
267
    if (engine_conf.has_use_trt() && engine_conf.use_trt()) {
268
      config.SwitchIrOptim(true);
Z
zhangjun 已提交
269
      if (!engine_conf.has_use_gpu() || !engine_conf.use_gpu()) {
T
TeslaZhao 已提交
270
        config.EnableUseGpu(engine_conf.gpu_memory_mb(), gpu_id);
271 272 273 274
        if (engine_conf.has_gpu_multi_stream() &&
            engine_conf.gpu_multi_stream()) {
          config.EnableGpuMultiStream();
        }
Z
zhangjun 已提交
275
      }
T
TeslaZhao 已提交
276
      config.EnableTensorRtEngine(engine_conf.trt_workspace_size(),
Z
update  
zhangjun 已提交
277
                                  max_batch,
S
ShiningZhang 已提交
278
                                  local_min_subgraph_size,
279
                                  precision_type,
T
TeslaZhao 已提交
280
                                  engine_conf.trt_use_static(),
Z
fix  
zhangjun 已提交
281
                                  FLAGS_use_calib);
S
ShiningZhang 已提交
282 283 284 285 286 287 288 289 290 291
      std::map<std::string, std::vector<int>> min_input_shape;
      std::map<std::string, std::vector<int>> max_input_shape;
      std::map<std::string, std::vector<int>> optim_input_shape;
      if (engine_conf.min_input_shape_size() > 0) {
        for (auto& iter : engine_conf.min_input_shape()) {
          std::string key = iter.first;
          std::string value = iter.second;
          std::istringstream ss(value);
          std::string word;
          std::vector<int> arr;
T
TeslaZhao 已提交
292
          while (ss >> word) {
S
ShiningZhang 已提交
293 294 295 296 297 298 299 300 301 302 303 304
            arr.push_back(std::stoi(word));
          }
          min_input_shape[key] = arr;
        }
      }
      if (engine_conf.max_input_shape_size() > 0) {
        for (auto& iter : engine_conf.max_input_shape()) {
          std::string key = iter.first;
          std::string value = iter.second;
          std::istringstream ss(value);
          std::string word;
          std::vector<int> arr;
T
TeslaZhao 已提交
305
          while (ss >> word) {
S
ShiningZhang 已提交
306 307 308 309 310 311 312 313 314 315 316 317
            arr.push_back(std::stoi(word));
          }
          max_input_shape[key] = arr;
        }
      }
      if (engine_conf.opt_input_shape_size() > 0) {
        for (auto& iter : engine_conf.opt_input_shape()) {
          std::string key = iter.first;
          std::string value = iter.second;
          std::istringstream ss(value);
          std::string word;
          std::vector<int> arr;
T
TeslaZhao 已提交
318
          while (ss >> word) {
S
ShiningZhang 已提交
319 320 321 322 323
            arr.push_back(std::stoi(word));
          }
          optim_input_shape[key] = arr;
        }
      }
T
TeslaZhao 已提交
324 325
      config.SetTRTDynamicShapeInfo(
          min_input_shape, max_input_shape, optim_input_shape);
Z
update  
zhangjun 已提交
326
      LOG(INFO) << "create TensorRT predictor";
Z
zhangjun 已提交
327 328
    }

Z
zhangjun 已提交
329
    if (engine_conf.has_use_lite() && engine_conf.use_lite()) {
330
      config.EnableLiteEngine(precision_type, true);
331
      config.SwitchIrOptim(true);
332 333 334 335 336
    }

    if ((!engine_conf.has_use_lite() && !engine_conf.has_use_gpu()) ||
        (engine_conf.has_use_lite() && !engine_conf.use_lite() &&
         engine_conf.has_use_gpu() && !engine_conf.use_gpu())) {
Z
zhangjun 已提交
337
#ifdef WITH_MKLML
Z
update  
zhangjun 已提交
338 339 340 341 342 343
#ifdef WITH_MKLDNN
      config.EnableMKLDNN();
      config.SwitchIrOptim(true);
      config.DisableGpu();
      // config.SetCpuMathLibraryNumThreads(2);

Z
fix  
zhangjun 已提交
344
      if (precision_type == PrecisionType::kInt8) {
345
        config.EnableMkldnnQuantizer();
Z
update  
zhangjun 已提交
346
        auto quantizer_config = config.mkldnn_quantizer_config();
347
        // TODO(somebody): warmup data
Z
update  
zhangjun 已提交
348 349 350
        // quantizer_config -> SetWarmupData();
        // quantizer_config -> SetWarmupBatchSize();
        // quantizer_config -> SetEnabledOpTypes(4);
Z
fix  
zhangjun 已提交
351
      } else if (precision_type == PrecisionType::kHalf) {
352 353
        config.EnableMkldnnBfloat16();
      }
Z
update  
zhangjun 已提交
354
#endif
Z
zhangjun 已提交
355
#endif
Z
zhangjun 已提交
356 357
    }

Z
zhangjun 已提交
358
    if (engine_conf.has_use_xpu() && engine_conf.use_xpu()) {
Z
update  
zhangjun 已提交
359 360
      // 2 MB l3 cache
      config.EnableXpu(2 * 1024 * 1024);
S
ShiningZhang 已提交
361
      config.SetXpuDeviceId(gpu_id);
Z
update  
zhangjun 已提交
362
    }
Z
zhangjun 已提交
363

H
fix bug  
HexToString 已提交
364
    if (engine_conf.has_use_ascend_cl() && engine_conf.use_ascend_cl()) {
365
      if (engine_conf.has_use_lite() && engine_conf.use_lite()) {
H
fix bug  
HexToString 已提交
366
        // for ascend 310
367 368
        FLAGS_nnadapter_device_names = "huawei_ascend_npu";
        FLAGS_nnadapter_context_properties =
H
fix bug  
HexToString 已提交
369
            "HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=" + std::to_string(gpu_id);
370 371
        FLAGS_nnadapter_model_cache_dir = "";
        config.NNAdapter()
H
fix bug  
HexToString 已提交
372 373 374 375
            .Enable()
            .SetDeviceNames({FLAGS_nnadapter_device_names})
            .SetContextProperties(FLAGS_nnadapter_context_properties)
            .SetModelCacheDir(FLAGS_nnadapter_model_cache_dir);
376
        LOG(INFO) << "Enable Lite NNAdapter for Ascend,"
H
fix bug  
HexToString 已提交
377
                  << "nnadapter_device_names=" << FLAGS_nnadapter_device_names
378 379 380 381
                  << ",nnadapter_context_properties="
                  << FLAGS_nnadapter_context_properties
                  << ",nnadapter_model_cache_dir="
                  << FLAGS_nnadapter_model_cache_dir;
S
ShiningZhang 已提交
382
      } else {
S
ShiningZhang 已提交
383
        // for ascend 910
S
ShiningZhang 已提交
384
        config.EnableNpu(gpu_id);
385 386 387
      }
    }

Z
zhangjun 已提交
388 389
    if (engine_conf.has_enable_memory_optimization() &&
        engine_conf.enable_memory_optimization()) {
Z
update  
zhangjun 已提交
390
      config.EnableMemoryOptim();
Z
zhangjun 已提交
391
    }
Z
zhangjun 已提交
392

Z
zhangjun 已提交
393
    predictor::AutoLock lock(predictor::GlobalCreateMutex::instance());
Z
update  
zhangjun 已提交
394 395
    _predictor = CreatePredictor(config);
    if (NULL == _predictor.get()) {
Z
zhangjun 已提交
396
      LOG(ERROR) << "create paddle predictor failed, path: " << model_path;
Z
zhangjun 已提交
397 398
      return -1;
    }
Z
update  
zhangjun 已提交
399

T
TeslaZhao 已提交
400 401
    LOG(INFO) << "paddle_engine params : enable_dist_model:"
              << engine_conf.enable_dist_model()
402 403 404 405
              << ", use_gpu: " << engine_conf.has_use_gpu()
              << ", gpu_id: " << gpu_id
              << ", use_gpu_multi_stream: " << engine_conf.gpu_multi_stream()
              << ", precision: " << FLAGS_precision
T
TeslaZhao 已提交
406 407
              << ", enable_ir_optimization: "
              << engine_conf.enable_ir_optimization()
408 409 410 411 412 413 414
              << ", use_trt: " << engine_conf.use_trt()
              << ", trt_max_batch: " << max_batch
              << ", trt_min_subgraph_size: " << min_subgraph_size
              << ", use_calib: " << FLAGS_use_calib
              << ", use_lite: " << engine_conf.use_lite()
              << ", use_ascend_cl: " << engine_conf.has_use_ascend_cl()
              << ", use_xpu: " << engine_conf.use_xpu()
T
TeslaZhao 已提交
415
              << ", enable_memory_optimization: "
T
TeslaZhao 已提交
416 417 418 419 420
              << engine_conf.enable_memory_optimization()
              << ", gpu_memory_mb: " << engine_conf.gpu_memory_mb()
              << ", cpu_math_thread_num: " << engine_conf.cpu_math_thread_num()
              << ", trt_workspace_size: " << engine_conf.trt_workspace_size()
              << ", trt_use_static: " << engine_conf.trt_use_static();
421

Z
zhangjun 已提交
422
    VLOG(2) << "create paddle predictor sucess, path: " << model_path;
Z
zhangjun 已提交
423 424 425 426
    return 0;
  }
};

Z
update  
zhangjun 已提交
427
}  // namespace inference
Z
zhangjun 已提交
428 429
}  // namespace paddle_serving
}  // namespace baidu