paddle_engine.h 12.7 KB
Newer Older
Z
update  
zhangjun 已提交
1
// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Z
zhangjun 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

H
HexToString 已提交
17
#include <dirent.h>
Z
zhangjun 已提交
18 19 20
#include <pthread.h>
#include <fstream>
#include <map>
Z
zhangjun 已提交
21
#include <memory>
Z
zhangjun 已提交
22
#include <string>
23
#include <utility>
Z
zhangjun 已提交
24 25 26
#include <vector>
#include "core/configure/include/configure_parser.h"
#include "core/configure/inferencer_configure.pb.h"
Z
zhangjun 已提交
27
#include "core/predictor/common/utils.h"
Z
zhangjun 已提交
28
#include "core/predictor/framework/infer.h"
Z
zhangjun 已提交
29 30 31 32 33 34 35
#include "paddle_inference_api.h"  // NOLINT

namespace baidu {
namespace paddle_serving {
namespace inference {

using paddle_infer::Config;
Z
zhangjun 已提交
36
using paddle_infer::PrecisionType;
Z
zhangjun 已提交
37 38 39 40
using paddle_infer::Predictor;
using paddle_infer::Tensor;
using paddle_infer::CreatePredictor;

Z
zhangjun 已提交
41
DECLARE_int32(gpuid);
Z
fix  
zhangjun 已提交
42 43
DECLARE_string(precision);
DECLARE_bool(use_calib);
44 45 46
DECLARE_string(nnadapter_device_names);
DECLARE_string(nnadapter_context_properties);
DECLARE_string(nnadapter_model_cache_dir);
Z
zhangjun 已提交
47

Z
zhangjun 已提交
48 49
static const int max_batch = 32;
static const int min_subgraph_size = 3;
Z
fix  
zhangjun 已提交
50 51
static PrecisionType precision_type;

Z
update  
zhangjun 已提交
52 53 54
std::shared_ptr<std::vector<paddle::PaddleTensor>> PrepareWarmupData() {
  auto warmup_data = std::make_shared<std::vector<paddle::PaddleTensor>>(1);
  paddle::PaddleTensor images;
Z
update  
zhangjun 已提交
55 56
  images.name = "image";
  images.shape = {2, 3, 300, 300};
Z
update  
zhangjun 已提交
57
  images.dtype = paddle::PaddleDType::FLOAT32;
Z
update  
zhangjun 已提交
58 59 60 61 62 63
  images.data.Resize(sizeof(float) * 2 * 3 * 300 * 300);

  (*warmup_data)[0] = std::move(images);
  return warmup_data;
}

Z
fix  
zhangjun 已提交
64 65 66 67 68 69 70 71 72 73 74
PrecisionType GetPrecision(const std::string& precision_data) {
  std::string precision_type = predictor::ToLower(precision_data);
  if (precision_type == "fp32") {
    return PrecisionType::kFloat32;
  } else if (precision_type == "int8") {
    return PrecisionType::kInt8;
  } else if (precision_type == "fp16") {
    return PrecisionType::kHalf;
  }
  return PrecisionType::kFloat32;
}
Z
zhangjun 已提交
75

H
HexToString 已提交
76
const std::string getFileBySuffix(
H
HexToString 已提交
77 78 79 80 81 82 83 84 85 86
    const std::string& path, const std::vector<std::string>& suffixVector) {
  DIR* dp = nullptr;
  std::string fileName = "";
  struct dirent* dirp = nullptr;
  if ((dp = opendir(path.c_str())) == nullptr) {
    return fileName;
  }
  while ((dirp = readdir(dp)) != nullptr) {
    if (dirp->d_type == DT_REG) {
      for (int idx = 0; idx < suffixVector.size(); ++idx) {
H
fix bug  
HexToString 已提交
87 88 89 90 91 92
        std::string fileName_in_Dir = static_cast<std::string>(dirp->d_name);
        if (fileName_in_Dir.length() >= suffixVector[idx].length() &&
            fileName_in_Dir.substr(
                fileName_in_Dir.length() - suffixVector[idx].length(),
                suffixVector[idx].length()) == suffixVector[idx]) {
          fileName = fileName_in_Dir;
H
HexToString 已提交
93 94 95 96 97 98 99 100 101 102
          break;
        }
      }
    }
    if (fileName.length() != 0) break;
  }
  closedir(dp);
  return fileName;
}

T
TeslaZhao 已提交
103 104 105
// Engine Core is the base class of inference engines, which can be derived from
// paddle Inference Engine, or inference engines of other machine learning
// platforms
H
HexToString 已提交
106
class EngineCore {
Z
zhangjun 已提交
107
 public:
H
HexToString 已提交
108
  virtual ~EngineCore() {}
Z
zhangjun 已提交
109
  virtual std::vector<std::string> GetInputNames() {
Z
zhangjun 已提交
110
    return _predictor->GetInputNames();
Z
zhangjun 已提交
111 112 113
  }

  virtual std::unique_ptr<Tensor> GetInputHandle(const std::string& name) {
Z
zhangjun 已提交
114
    return _predictor->GetInputHandle(name);
Z
zhangjun 已提交
115 116 117
  }

  virtual std::vector<std::string> GetOutputNames() {
Z
zhangjun 已提交
118
    return _predictor->GetOutputNames();
Z
zhangjun 已提交
119 120 121
  }

  virtual std::unique_ptr<Tensor> GetOutputHandle(const std::string& name) {
Z
zhangjun 已提交
122
    return _predictor->GetOutputHandle(name);
Z
zhangjun 已提交
123 124 125
  }

  virtual bool Run() {
Z
zhangjun 已提交
126
    if (!_predictor->Run()) {
Z
zhangjun 已提交
127 128 129 130 131 132
      LOG(ERROR) << "Failed call Run with paddle predictor";
      return false;
    }
    return true;
  }

133
  virtual int create(const configure::EngineDesc& conf, int gpu_id) = 0;
Z
zhangjun 已提交
134

Z
update  
zhangjun 已提交
135 136
  virtual int clone(void* predictor) {
    if (predictor == NULL) {
Z
zhangjun 已提交
137 138 139
      LOG(ERROR) << "origin paddle Predictor is null.";
      return -1;
    }
Z
zhangjun 已提交
140 141
    Predictor* prep = static_cast<Predictor*>(predictor);
    _predictor = prep->Clone();
Z
update  
zhangjun 已提交
142 143
    if (_predictor.get() == NULL) {
      LOG(ERROR) << "fail to clone paddle predictor: " << predictor;
Z
zhangjun 已提交
144 145 146 147 148
      return -1;
    }
    return 0;
  }

Z
update  
zhangjun 已提交
149
  virtual void* get() { return _predictor.get(); }
Z
zhangjun 已提交
150 151

 protected:
T
TeslaZhao 已提交
152 153 154 155 156
  // _predictor is a prediction instance of Paddle Inference.
  // when inferring on the CPU, _predictor is bound to a model.
  // when inferring on the GPU, _predictor is bound to a model and a GPU card.
  // Therefore, when using GPU multi-card inference, you need to create multiple
  // EngineCore.
Z
update  
zhangjun 已提交
157
  std::shared_ptr<Predictor> _predictor;
Z
zhangjun 已提交
158 159
};

Z
update  
zhangjun 已提交
160
// Paddle Inference Engine
H
HexToString 已提交
161
class PaddleInferenceEngine : public EngineCore {
Z
zhangjun 已提交
162
 public:
163
  int create(const configure::EngineDesc& engine_conf, int gpu_id) {
Z
update  
zhangjun 已提交
164 165
    std::string model_path = engine_conf.model_dir();
    if (access(model_path.c_str(), F_OK) == -1) {
Z
zhangjun 已提交
166
      LOG(ERROR) << "create paddle predictor failed, path not exits: "
Z
update  
zhangjun 已提交
167
                 << model_path;
Z
zhangjun 已提交
168 169 170 171
      return -1;
    }

    Config config;
H
fix bug  
HexToString 已提交
172 173 174 175
    std::vector<std::string> suffixParaVector = {
        ".pdiparams", "__params__", "params"};
    std::vector<std::string> suffixModelVector = {
        ".pdmodel", "__model__", "model"};
H
HexToString 已提交
176 177 178 179 180 181 182 183 184 185 186
    std::string paraFileName = getFileBySuffix(model_path, suffixParaVector);
    std::string modelFileName = getFileBySuffix(model_path, suffixModelVector);

    std::string encryParaPath = model_path + "/encrypt_model";
    std::string encryModelPath = model_path + "/encrypt_params";
    std::string encryKeyPath = model_path + "/key";

    // encrypt model
    if (access(encryParaPath.c_str(), F_OK) != -1 &&
        access(encryModelPath.c_str(), F_OK) != -1 &&
        access(encryKeyPath.c_str(), F_OK) != -1) {
Z
zhangjun 已提交
187
      // decrypt model
H
HexToString 已提交
188

Z
zhangjun 已提交
189
      std::string model_buffer, params_buffer, key_buffer;
H
HexToString 已提交
190 191 192
      predictor::ReadBinaryFile(model_path + "/encrypt_model", &model_buffer);
      predictor::ReadBinaryFile(model_path + "/encrypt_params", &params_buffer);
      predictor::ReadBinaryFile(model_path + "/key", &key_buffer);
Z
zhangjun 已提交
193 194 195 196 197 198 199 200 201

      auto cipher = paddle::MakeCipher("");
      std::string real_model_buffer = cipher->Decrypt(model_buffer, key_buffer);
      std::string real_params_buffer =
          cipher->Decrypt(params_buffer, key_buffer);
      config.SetModelBuffer(&real_model_buffer[0],
                            real_model_buffer.size(),
                            &real_params_buffer[0],
                            real_params_buffer.size());
H
HexToString 已提交
202 203 204
    } else if (paraFileName.length() != 0 && modelFileName.length() != 0) {
      config.SetParamsFile(model_path + "/" + paraFileName);
      config.SetProgFile(model_path + "/" + modelFileName);
Z
update  
zhangjun 已提交
205
    } else {
H
HexToString 已提交
206
      config.SetModel(model_path);
Z
zhangjun 已提交
207
    }
Z
zhangjun 已提交
208

Z
zhangjun 已提交
209
    config.SwitchSpecifyInputNames(true);
Z
update  
zhangjun 已提交
210 211 212
    config.SetCpuMathLibraryNumThreads(1);
    if (engine_conf.has_use_gpu() && engine_conf.use_gpu()) {
      // 2000MB GPU memory
213 214 215 216 217
      config.EnableUseGpu(50, gpu_id);
      if (engine_conf.has_gpu_multi_stream() &&
          engine_conf.gpu_multi_stream()) {
        config.EnableGpuMultiStream();
      }
Z
zhangjun 已提交
218
    }
Z
fix  
zhangjun 已提交
219
    precision_type = GetPrecision(FLAGS_precision);
Z
zhangjun 已提交
220

Z
update  
zhangjun 已提交
221 222 223 224 225 226 227
    if (engine_conf.has_enable_ir_optimization() &&
        !engine_conf.enable_ir_optimization()) {
      config.SwitchIrOptim(false);
    } else {
      config.SwitchIrOptim(true);
    }

S
ShiningZhang 已提交
228 229 230 231 232
    int local_min_subgraph_size = min_subgraph_size;
    if (engine_conf.has_min_subgraph_size()) {
      local_min_subgraph_size = engine_conf.min_subgraph_size();
    }

Z
update  
zhangjun 已提交
233
    if (engine_conf.has_use_trt() && engine_conf.use_trt()) {
234
      config.SwitchIrOptim(true);
Z
zhangjun 已提交
235
      if (!engine_conf.has_use_gpu() || !engine_conf.use_gpu()) {
236 237 238 239 240
        config.EnableUseGpu(50, gpu_id);
        if (engine_conf.has_gpu_multi_stream() &&
            engine_conf.gpu_multi_stream()) {
          config.EnableGpuMultiStream();
        }
Z
zhangjun 已提交
241
      }
Z
update  
zhangjun 已提交
242 243
      config.EnableTensorRtEngine(1 << 20,
                                  max_batch,
S
ShiningZhang 已提交
244
                                  local_min_subgraph_size,
245
                                  precision_type,
Z
update  
zhangjun 已提交
246
                                  false,
Z
fix  
zhangjun 已提交
247
                                  FLAGS_use_calib);
S
ShiningZhang 已提交
248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
      std::map<std::string, std::vector<int>> min_input_shape;
      std::map<std::string, std::vector<int>> max_input_shape;
      std::map<std::string, std::vector<int>> optim_input_shape;
      if (engine_conf.min_input_shape_size() > 0) {
        for (auto& iter : engine_conf.min_input_shape()) {
          std::string key = iter.first;
          std::string value = iter.second;
          std::istringstream ss(value);
          std::string word;
          std::vector<int> arr;
          while(ss >> word) {
            arr.push_back(std::stoi(word));
          }
          min_input_shape[key] = arr;
        }
      }
      if (engine_conf.max_input_shape_size() > 0) {
        for (auto& iter : engine_conf.max_input_shape()) {
          std::string key = iter.first;
          std::string value = iter.second;
          std::istringstream ss(value);
          std::string word;
          std::vector<int> arr;
          while(ss >> word) {
            arr.push_back(std::stoi(word));
          }
          max_input_shape[key] = arr;
        }
      }
      if (engine_conf.opt_input_shape_size() > 0) {
        for (auto& iter : engine_conf.opt_input_shape()) {
          std::string key = iter.first;
          std::string value = iter.second;
          std::istringstream ss(value);
          std::string word;
          std::vector<int> arr;
          while(ss >> word) {
            arr.push_back(std::stoi(word));
          }
          optim_input_shape[key] = arr;
        }
      }
      config.SetTRTDynamicShapeInfo(min_input_shape,
                                    max_input_shape,
                                    optim_input_shape);
Z
update  
zhangjun 已提交
293
      LOG(INFO) << "create TensorRT predictor";
Z
zhangjun 已提交
294 295
    }

Z
zhangjun 已提交
296
    if (engine_conf.has_use_lite() && engine_conf.use_lite()) {
297
      config.EnableLiteEngine(precision_type, true);
298
      config.SwitchIrOptim(true);
299 300 301 302 303
    }

    if ((!engine_conf.has_use_lite() && !engine_conf.has_use_gpu()) ||
        (engine_conf.has_use_lite() && !engine_conf.use_lite() &&
         engine_conf.has_use_gpu() && !engine_conf.use_gpu())) {
Z
zhangjun 已提交
304
#ifdef WITH_MKLML
Z
update  
zhangjun 已提交
305 306 307 308 309 310
#ifdef WITH_MKLDNN
      config.EnableMKLDNN();
      config.SwitchIrOptim(true);
      config.DisableGpu();
      // config.SetCpuMathLibraryNumThreads(2);

Z
fix  
zhangjun 已提交
311
      if (precision_type == PrecisionType::kInt8) {
312
        config.EnableMkldnnQuantizer();
Z
update  
zhangjun 已提交
313
        auto quantizer_config = config.mkldnn_quantizer_config();
314
        // TODO(somebody): warmup data
Z
update  
zhangjun 已提交
315 316 317
        // quantizer_config -> SetWarmupData();
        // quantizer_config -> SetWarmupBatchSize();
        // quantizer_config -> SetEnabledOpTypes(4);
Z
fix  
zhangjun 已提交
318
      } else if (precision_type == PrecisionType::kHalf) {
319 320
        config.EnableMkldnnBfloat16();
      }
Z
update  
zhangjun 已提交
321
#endif
Z
zhangjun 已提交
322
#endif
Z
zhangjun 已提交
323 324
    }

Z
zhangjun 已提交
325
    if (engine_conf.has_use_xpu() && engine_conf.use_xpu()) {
Z
update  
zhangjun 已提交
326 327
      // 2 MB l3 cache
      config.EnableXpu(2 * 1024 * 1024);
S
ShiningZhang 已提交
328
      config.SetXpuDeviceId(gpu_id);
Z
update  
zhangjun 已提交
329
    }
Z
zhangjun 已提交
330

H
fix bug  
HexToString 已提交
331
    if (engine_conf.has_use_ascend_cl() && engine_conf.use_ascend_cl()) {
332
      if (engine_conf.has_use_lite() && engine_conf.use_lite()) {
H
fix bug  
HexToString 已提交
333
        // for ascend 310
334 335
        FLAGS_nnadapter_device_names = "huawei_ascend_npu";
        FLAGS_nnadapter_context_properties =
H
fix bug  
HexToString 已提交
336
            "HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=" + std::to_string(gpu_id);
337 338
        FLAGS_nnadapter_model_cache_dir = "";
        config.NNAdapter()
H
fix bug  
HexToString 已提交
339 340 341 342
            .Enable()
            .SetDeviceNames({FLAGS_nnadapter_device_names})
            .SetContextProperties(FLAGS_nnadapter_context_properties)
            .SetModelCacheDir(FLAGS_nnadapter_model_cache_dir);
343
        LOG(INFO) << "Enable Lite NNAdapter for Ascend,"
H
fix bug  
HexToString 已提交
344
                  << "nnadapter_device_names=" << FLAGS_nnadapter_device_names
345 346 347 348
                  << ",nnadapter_context_properties="
                  << FLAGS_nnadapter_context_properties
                  << ",nnadapter_model_cache_dir="
                  << FLAGS_nnadapter_model_cache_dir;
S
ShiningZhang 已提交
349
      } else {
S
ShiningZhang 已提交
350
        // for ascend 910
S
ShiningZhang 已提交
351
        config.EnableNpu(gpu_id);
352 353 354
      }
    }

Z
zhangjun 已提交
355 356
    if (engine_conf.has_enable_memory_optimization() &&
        engine_conf.enable_memory_optimization()) {
Z
update  
zhangjun 已提交
357
      config.EnableMemoryOptim();
Z
zhangjun 已提交
358
    }
Z
zhangjun 已提交
359

Z
zhangjun 已提交
360
    predictor::AutoLock lock(predictor::GlobalCreateMutex::instance());
Z
update  
zhangjun 已提交
361 362
    _predictor = CreatePredictor(config);
    if (NULL == _predictor.get()) {
Z
zhangjun 已提交
363
      LOG(ERROR) << "create paddle predictor failed, path: " << model_path;
Z
zhangjun 已提交
364 365
      return -1;
    }
Z
update  
zhangjun 已提交
366

Z
zhangjun 已提交
367
    VLOG(2) << "create paddle predictor sucess, path: " << model_path;
Z
zhangjun 已提交
368 369 370 371
    return 0;
  }
};

Z
update  
zhangjun 已提交
372
}  // namespace inference
Z
zhangjun 已提交
373 374
}  // namespace paddle_serving
}  // namespace baidu