infer.h 18.6 KB
Newer Older
W
wangguibao 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once
16
#include <pthread.h>
W
wangguibao 已提交
17
#include <sys/stat.h>
W
wangguibao 已提交
18
#include <sys/types.h>
W
wangguibao 已提交
19
#include <unistd.h>
20
#include <numeric>
W
wangguibao 已提交
21
#include <string>
M
MRXLT 已提交
22
#include <utility>
W
wangguibao 已提交
23
#include <vector>
G
guru4elephant 已提交
24
#include "core/predictor/common/inner_common.h"
H
HexToString 已提交
25
#include "core/predictor/framework/bsf.h"
G
guru4elephant 已提交
26 27
#include "core/predictor/framework/factory.h"
#include "core/predictor/framework/infer_data.h"
W
wangjiawei04 已提交
28
#include "paddle_inference_api.h"  // NOLINT
W
wangguibao 已提交
29 30 31 32
namespace baidu {
namespace paddle_serving {
namespace predictor {

W
wangguibao 已提交
33 34
using configure::ModelToolkitConf;

Z
zhangjun 已提交
35 36 37 38 39 40 41 42 43 44 45
class AutoLock {
 public:
  explicit AutoLock(pthread_mutex_t& mutex) : _mut(mutex) {
    pthread_mutex_lock(&mutex);
  }
  ~AutoLock() { pthread_mutex_unlock(&_mut); }

 private:
  pthread_mutex_t& _mut;
};

Z
update  
zhangjun 已提交
46
class GlobalCreateMutex {
Z
zhangjun 已提交
47 48 49 50
 public:
  pthread_mutex_t& mutex() { return _mut; }

  static pthread_mutex_t& instance() {
Z
update  
zhangjun 已提交
51
    static GlobalCreateMutex gmutex;
Z
zhangjun 已提交
52 53 54 55
    return gmutex.mutex();
  }

 private:
Z
update  
zhangjun 已提交
56
  GlobalCreateMutex() { pthread_mutex_init(&_mut, NULL); }
Z
zhangjun 已提交
57 58 59
  pthread_mutex_t _mut;
};

W
wangguibao 已提交
60
class InferEngine {
W
wangguibao 已提交
61 62 63 64 65 66 67 68 69 70
 public:
  virtual ~InferEngine() {}

  virtual int proc_initialize(const configure::EngineDesc& conf, bool version) {
    return proc_initialize_impl(conf, version);
  }
  virtual int proc_finalize() { return proc_finalize_impl(); }
  virtual int thrd_initialize() { return thrd_initialize_impl(); }
  virtual int thrd_clear() { return thrd_clear_impl(); }
  virtual int thrd_finalize() { return thrd_finalize_impl(); }
H
HexToString 已提交
71 72 73
  virtual int infer(const void* in, void* out, uint32_t batch_size = -1) {
    return infer_impl(in, out, batch_size);
  }
W
wangguibao 已提交
74 75 76 77 78 79 80 81 82 83 84 85

  virtual int reload() = 0;

  virtual uint64_t version() const = 0;

  // begin: framework inner call
  virtual int proc_initialize_impl(const configure::EngineDesc& conf,
                                   bool version) = 0;
  virtual int thrd_initialize_impl() = 0;
  virtual int thrd_finalize_impl() = 0;
  virtual int thrd_clear_impl() = 0;
  virtual int proc_finalize_impl() = 0;
H
HexToString 已提交
86
  virtual int infer_impl(const void* in,
87 88
                         void* out,
                         uint32_t batch_size = -1) = 0;
H
HexToString 已提交
89
  virtual int task_infer_impl(const BatchTensor& in,
90
                              BatchTensor& out) = 0;  // NOLINT
H
HexToString 已提交
91

W
wangguibao 已提交
92 93 94 95 96 97
  // end: framework inner call
};

class ReloadableInferEngine : public InferEngine {
 public:
  virtual ~ReloadableInferEngine() {}
W
wangguibao 已提交
98

99 100 101 102 103
  // Reloadable record
  union ReloadableRecord {
    time_t timestamp;
    uint64_t md5sum;
    uint64_t revision;
W
wangguibao 已提交
104
  };
W
wangguibao 已提交
105

Z
update  
zhangjun 已提交
106
  virtual int load(const configure::EngineDesc& conf) = 0;
H
HexToString 已提交
107
  typedef im::bsf::Task<Tensor, Tensor> TaskT;
W
wangguibao 已提交
108

109
  int proc_initialize_impl(const configure::EngineDesc& conf, bool version);
W
wangguibao 已提交
110

111
  int proc_initialize(const configure::EngineDesc& conf, bool version);
H
HexToString 已提交
112

113
  int infer(const void* in, void* out, uint32_t batch_size = -1);
W
wangguibao 已提交
114

115
  int thrd_initialize();
W
wangguibao 已提交
116

117
  int thrd_clear();
W
wangguibao 已提交
118

119
  int proc_finalize();
W
wangguibao 已提交
120

121
  int reload();
W
wangguibao 已提交
122 123 124

  uint64_t version() const { return _version; }
  uint32_t thread_num() const { return _infer_thread_num; }
W
wangguibao 已提交
125

W
wangguibao 已提交
126
 private:
127
  int parse_version_info(const configure::EngineDesc& config, bool version);
W
wangguibao 已提交
128

129
  bool check_need_reload();
W
wangguibao 已提交
130

131
  bool check_timestamp_ne();
W
wangguibao 已提交
132

133
  bool check_timestamp_gt();
W
wangguibao 已提交
134 135 136 137 138 139

  bool check_md5sum() { return false; }

  bool check_revision() { return false; }

 protected:
140 141 142 143
  // Model directory
  std::string _model_dir;

  // The description of inference engine
Z
update  
zhangjun 已提交
144
  configure::EngineDesc _conf;
W
wangguibao 已提交
145 146

 private:
147
  // Tag file of reloadable model
W
wangguibao 已提交
148
  std::string _reload_tag_file;
149 150 151 152 153 154 155 156

  // Type of reload, e.g. timestamp_ne, timestamp_gt, md5sum, reversion
  std::string _reload_type;

  // Record the last loading infermation
  ReloadableRecord _last_record;

  // Number of inference threads
W
wangguibao 已提交
157
  uint32_t _infer_thread_num;
158 159

  // Size of inference batch
W
wangguibao 已提交
160
  uint32_t _infer_batch_size;
161 162

  // Need to align batch_size in inferring
W
wangguibao 已提交
163
  bool _infer_batch_align;
164 165

  // model version
W
wangguibao 已提交
166 167
  uint64_t _version;
};
W
wangguibao 已提交
168

169
// Lock free switching two models
W
wangguibao 已提交
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
template <typename EngineCore>
struct ModelData {
  ModelData() : current_idx(1) {
    cores[0] = NULL;
    cores[1] = NULL;
  }

  ~ModelData() {
    delete cores[0];
    delete cores[1];
  }

  EngineCore* cores[2];
  uint32_t current_idx;
};

template <typename EngineCore>
class DBReloadableInferEngine : public ReloadableInferEngine {
 public:
  virtual ~DBReloadableInferEngine() {}

  int proc_initialize(const configure::EngineDesc& conf, bool version) {
    THREAD_KEY_CREATE(&_skey, NULL);
    THREAD_MUTEX_INIT(&_mutex, NULL);
    return ReloadableInferEngine::proc_initialize(conf, version);
  }

Z
update  
zhangjun 已提交
197
  virtual int load(const configure::EngineDesc& conf) {
W
wangguibao 已提交
198 199
    if (_reload_vec.empty()) {
      return 0;
W
wangguibao 已提交
200 201
    }

W
wangguibao 已提交
202
    for (uint32_t ti = 0; ti < _reload_vec.size(); ++ti) {
Z
update  
zhangjun 已提交
203
      if (load_data(_reload_vec[ti], conf) != 0) {
W
wangguibao 已提交
204 205 206 207 208
        LOG(ERROR) << "Failed reload engine model: " << ti;
        return -1;
      }
    }

Z
update  
zhangjun 已提交
209
    LOG(WARNING) << "Succ load engine, path: " << conf.model_dir();
W
wangguibao 已提交
210 211
    return 0;
  }
W
wangguibao 已提交
212

213
  int load_data(ModelData<EngineCore>* md, const configure::EngineDesc& conf) {
W
wangguibao 已提交
214 215 216
    uint32_t next_idx = (md->current_idx + 1) % 2;
    if (md->cores[next_idx]) {
      delete md->cores[next_idx];
W
wangguibao 已提交
217 218
    }

W
wangguibao 已提交
219
    md->cores[next_idx] = new (std::nothrow) EngineCore;
220

H
HexToString 已提交
221
    // params.dump();
Z
update  
zhangjun 已提交
222 223
    if (!md->cores[next_idx] || md->cores[next_idx]->create(conf) != 0) {
      LOG(ERROR) << "Failed create model, path: " << conf.model_dir();
W
wangguibao 已提交
224
      return -1;
W
wangguibao 已提交
225
    }
W
wangguibao 已提交
226 227 228
    md->current_idx = next_idx;
    return 0;
  }
W
wangguibao 已提交
229

W
wangguibao 已提交
230 231
  virtual int thrd_initialize_impl() {
    // memory pool to be inited in non-serving-threads
H
HexToString 已提交
232 233 234 235
    if (MempoolWrapper::instance().thread_initialize() != 0) {
      LOG(ERROR) << "Failed thread initialize mempool";
      return -1;
    }
W
wangguibao 已提交
236

W
wangguibao 已提交
237
    ModelData<EngineCore>* md = new (std::nothrow) ModelData<EngineCore>;
Z
update  
zhangjun 已提交
238
    if (!md || load_data(md, _conf) != 0) {
239
      LOG(ERROR) << "Failed create thread data from " << _conf.model_dir();
W
wangguibao 已提交
240
      return -1;
W
wangguibao 已提交
241 242
    }

243
    LOG(ERROR) << "THREAD_SETSPECIFIC _skey = md";
W
wangguibao 已提交
244
    THREAD_SETSPECIFIC(_skey, md);
H
HexToString 已提交
245
    im::bsf::AutoMutex lock(_mutex);
W
wangguibao 已提交
246 247 248 249 250 251
    _reload_vec.push_back(md);
    return 0;
  }

  int thrd_clear_impl() {
    // for non-serving-threads
H
HexToString 已提交
252 253 254 255
    if (MempoolWrapper::instance().thread_clear() != 0) {
      LOG(ERROR) << "Failed thread clear mempool";
      return -1;
    }
W
wangguibao 已提交
256 257 258 259
    return 0;
  }

  int thrd_finalize_impl() { return 0; }
W
wangguibao 已提交
260

W
wangguibao 已提交
261 262 263 264 265
  int proc_finalize_impl() {
    THREAD_KEY_DELETE(_skey);
    THREAD_MUTEX_DESTROY(&_mutex);
    return 0;
  }
W
wangguibao 已提交
266

W
wangguibao 已提交
267 268 269 270 271 272
  EngineCore* get_core() {
    ModelData<EngineCore>* md =
        (ModelData<EngineCore>*)THREAD_GETSPECIFIC(_skey);
    if (!md) {
      LOG(ERROR) << "Failed get thread specific data";
      return NULL;
W
wangguibao 已提交
273
    }
W
wangguibao 已提交
274 275
    return md->cores[md->current_idx];
  }
W
wangguibao 已提交
276

W
wangguibao 已提交
277 278 279 280 281
 protected:
  THREAD_KEY_T _skey;
  THREAD_MUTEX_T _mutex;
  std::vector<ModelData<EngineCore>*> _reload_vec;
};
W
wangguibao 已提交
282

W
wangguibao 已提交
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298
// 多个EngineCore共用同一份模型数据
template <typename EngineCore>
class CloneDBReloadableInferEngine
    : public DBReloadableInferEngine<EngineCore> {
 public:
  virtual ~CloneDBReloadableInferEngine() {}

  virtual int proc_initialize(const configure::EngineDesc& conf, bool version) {
    _pd = new (std::nothrow) ModelData<EngineCore>;
    if (!_pd) {
      LOG(ERROR) << "Failed to allocate for ProcData";
      return -1;
    }
    return DBReloadableInferEngine<EngineCore>::proc_initialize(conf, version);
  }

Z
update  
zhangjun 已提交
299
  virtual int load(const configure::EngineDesc& conf) {
W
wangguibao 已提交
300 301
    // 加载进程级模型数据
    if (!_pd ||
Z
update  
zhangjun 已提交
302
        DBReloadableInferEngine<EngineCore>::load_data(_pd, conf) != 0) {
Z
zhangjun 已提交
303
      LOG(ERROR) << "Failed to create common model from [" << conf.model_dir()
W
wangguibao 已提交
304 305 306 307
                 << "].";
      return -1;
    }
    LOG(WARNING) << "Succ load common model[" << _pd->cores[_pd->current_idx]
Z
update  
zhangjun 已提交
308
                 << "], path[" << conf.model_dir() << "].";
W
wangguibao 已提交
309 310 311 312 313 314 315 316 317 318 319 320 321

    if (DBReloadableInferEngine<EngineCore>::_reload_vec.empty()) {
      return 0;
    }

    for (uint32_t ti = 0;
         ti < DBReloadableInferEngine<EngineCore>::_reload_vec.size();
         ++ti) {
      if (load_data(DBReloadableInferEngine<EngineCore>::_reload_vec[ti],
                    _pd->cores[_pd->current_idx]) != 0) {
        LOG(ERROR) << "Failed reload engine model: " << ti;
        return -1;
      }
W
wangguibao 已提交
322 323
    }

Z
update  
zhangjun 已提交
324
    LOG(WARNING) << "Succ load clone model, path[" << conf.model_dir() << "]";
W
wangguibao 已提交
325 326
    return 0;
  }
W
wangguibao 已提交
327

W
wangguibao 已提交
328 329 330 331 332
  // 加载线程级对象,多个线程级对象共用pd_core的模型数据
  int load_data(ModelData<EngineCore>* td, EngineCore* pd_core) {
    uint32_t next_idx = (td->current_idx + 1) % 2;
    if (td->cores[next_idx]) {
      delete td->cores[next_idx];
W
wangguibao 已提交
333 334
    }

W
wangguibao 已提交
335 336 337 338 339 340
    td->cores[next_idx] = new (std::nothrow) EngineCore;
    if (!td->cores[next_idx] ||
        td->cores[next_idx]->clone(pd_core->get()) != 0) {
      LOG(ERROR) << "Failed clone model from pd_core[ " << pd_core << "], idx["
                 << next_idx << "]";
      return -1;
W
wangguibao 已提交
341
    }
W
wangguibao 已提交
342 343 344 345 346 347
    td->current_idx = next_idx;
    LOG(WARNING) << "td_core[" << td->cores[td->current_idx]
                 << "] clone model from pd_core[" << pd_core
                 << "] succ, cur_idx[" << td->current_idx << "].";
    return 0;
  }
W
wangguibao 已提交
348

W
wangguibao 已提交
349
  virtual int thrd_initialize_impl() {
H
HexToString 已提交
350 351 352 353 354 355
    // memory pool to be inited in non-serving-threads
    if (MempoolWrapper::instance().thread_initialize() != 0) {
      LOG(ERROR) << "Failed thread initialize mempool";
      return -1;
    }

W
wangguibao 已提交
356 357 358 359 360 361 362 363
    ModelData<EngineCore>* md = new (std::nothrow) ModelData<EngineCore>;
    if (!md || load_data(md, _pd->cores[_pd->current_idx]) != 0) {
      LOG(ERROR) << "Failed clone thread data, origin_core["
                 << _pd->cores[_pd->current_idx] << "].";
      return -1;
    }

    THREAD_SETSPECIFIC(DBReloadableInferEngine<EngineCore>::_skey, md);
H
HexToString 已提交
364
    im::bsf::AutoMutex lock(DBReloadableInferEngine<EngineCore>::_mutex);
W
wangguibao 已提交
365 366 367
    DBReloadableInferEngine<EngineCore>::_reload_vec.push_back(md);
    return 0;
  }
W
wangguibao 已提交
368

W
wangguibao 已提交
369 370 371
 protected:
  ModelData<EngineCore>*
      _pd;  // 进程级EngineCore,多个线程级EngineCore共用该对象的模型数据
W
wangguibao 已提交
372 373
};

H
HexToString 已提交
374
template <typename EngineCore>
M
bug fix  
MRXLT 已提交
375
#ifdef WITH_TRT
H
HexToString 已提交
376
class FluidInferEngine : public DBReloadableInferEngine<EngineCore> {
M
bug fix  
MRXLT 已提交
377
#else
H
HexToString 已提交
378
class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
M
bug fix  
MRXLT 已提交
379 380
#endif
 public:  // NOLINT
W
wangguibao 已提交
381 382
  FluidInferEngine() {}
  ~FluidInferEngine() {}
H
HexToString 已提交
383
  typedef std::vector<paddle::PaddleTensor> TensorVector;
H
HexToString 已提交
384
  int infer_impl(const void* in, void* out, uint32_t batch_size = -1) {
H
HexToString 已提交
385 386 387
    // First of all, get the real core acording to the
    // Template parameter <EngineCore>.
    EngineCore* core = DBReloadableInferEngine<EngineCore>::get_core();
W
wangguibao 已提交
388 389 390
    if (!core || !core->get()) {
      LOG(ERROR) << "Failed get fluid core in infer_impl()";
      return -1;
W
wangguibao 已提交
391
    }
H
HexToString 已提交
392 393 394 395 396 397
    // We use the for loop to process the input data.
    // Inside each for loop, use the in[i]->name as inputName and call
    // 'core->GetInputHandle(inputName)' to get the pointer of InputData.
    // Set the lod and shape information of InputData first.
    // Then copy data from cpu to the core.
    const TensorVector* tensorVector_in_pointer =
398 399
        reinterpret_cast<const TensorVector*>(in);
    for (int i = 0; i < tensorVector_in_pointer->size(); ++i) {
H
HexToString 已提交
400
      auto lod_tensor_in =
401
          core->GetInputHandle((*tensorVector_in_pointer)[i].name);
H
HexToString 已提交
402 403 404
      lod_tensor_in->SetLoD((*tensorVector_in_pointer)[i].lod);
      lod_tensor_in->Reshape((*tensorVector_in_pointer)[i].shape);
      void* origin_data = (*tensorVector_in_pointer)[i].data.data();
H
HexToString 已提交
405 406 407 408
      // Because the core needs to determine the size of memory space
      // according to the data type passed in.
      // The pointer type of data must be one of
      // float *,int64_t*,int32_t* instead void*.
H
HexToString 已提交
409
      if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::FLOAT32) {
H
HexToString 已提交
410
        float* data = static_cast<float*>(origin_data);
H
HexToString 已提交
411
        lod_tensor_in->CopyFromCpu(data);
H
HexToString 已提交
412
      } else if ((*tensorVector_in_pointer)[i].dtype ==
413
                 paddle::PaddleDType::INT64) {
H
HexToString 已提交
414
        int64_t* data = static_cast<int64_t*>(origin_data);
H
HexToString 已提交
415
        lod_tensor_in->CopyFromCpu(data);
H
HexToString 已提交
416
      } else if ((*tensorVector_in_pointer)[i].dtype ==
417
                 paddle::PaddleDType::INT32) {
H
HexToString 已提交
418
        int32_t* data = static_cast<int32_t*>(origin_data);
H
HexToString 已提交
419
        lod_tensor_in->CopyFromCpu(data);
H
HexToString 已提交
420
      }
W
wangjiawei04 已提交
421
    }
H
HexToString 已提交
422 423
    // After the input data is passed in,
    // call 'core->Run()' perform the prediction process.
W
wangjiawei04 已提交
424
    if (!core->Run()) {
425 426
      LOG(ERROR) << "Failed run fluid family core";
      return -1;
W
wangjiawei04 已提交
427
    }
H
HexToString 已提交
428 429 430 431
    // In order to get the results,
    // first, call the 'core->GetOutputNames()' to get the name of output
    // (which is a dict like {OutputName:pointer of OutputValue}).
    // Then, use for-loop to get OutputValue by calling 'core->GetOutputHandle'.
H
HexToString 已提交
432
    std::vector<std::string> outnames = core->GetOutputNames();
H
HexToString 已提交
433
    std::vector<int> output_shape;
H
HexToString 已提交
434 435
    int out_num = 0;
    int dataType = 0;
H
HexToString 已提交
436 437 438
    void* databuf_data = NULL;
    char* databuf_char = NULL;
    size_t databuf_size = 0;
H
HexToString 已提交
439
    TensorVector* tensorVector_out_pointer =
440
        reinterpret_cast<TensorVector*>(out);
H
HexToString 已提交
441
    if (!tensorVector_out_pointer) {
H
HexToString 已提交
442
      LOG(ERROR) << "tensorVector_out_pointer is nullptr,error";
W
wangguibao 已提交
443 444
      return -1;
    }
H
HexToString 已提交
445 446 447 448
    // Get the type and shape information of OutputData first.
    // then copy data to cpu from the core.
    // The pointer type of data_out must be one of
    // float *,int64_t*,int32_t* instead void*.
449
    for (int i = 0; i < outnames.size(); ++i) {
H
HexToString 已提交
450
      auto lod_tensor_out = core->GetOutputHandle(outnames[i]);
H
HexToString 已提交
451
      output_shape = lod_tensor_out->shape();
H
HexToString 已提交
452 453
      out_num = std::accumulate(
          output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
H
HexToString 已提交
454
      dataType = lod_tensor_out->type();
H
HexToString 已提交
455
      if (dataType == paddle::PaddleDType::FLOAT32) {
456
        databuf_size = out_num * sizeof(float);
H
HexToString 已提交
457
        databuf_data = MempoolWrapper::instance().malloc(databuf_size);
H
HexToString 已提交
458
        if (!databuf_data) {
459 460
          LOG(ERROR) << "Malloc failed, size: " << databuf_size;
          return -1;
H
HexToString 已提交
461 462
        }
        float* data_out = reinterpret_cast<float*>(databuf_data);
H
HexToString 已提交
463
        lod_tensor_out->CopyToCpu(data_out);
H
HexToString 已提交
464
        databuf_char = reinterpret_cast<char*>(data_out);
H
HexToString 已提交
465
      } else if (dataType == paddle::PaddleDType::INT64) {
466
        databuf_size = out_num * sizeof(int64_t);
H
HexToString 已提交
467
        databuf_data = MempoolWrapper::instance().malloc(databuf_size);
H
HexToString 已提交
468
        if (!databuf_data) {
469 470
          LOG(ERROR) << "Malloc failed, size: " << databuf_size;
          return -1;
H
HexToString 已提交
471
        }
H
HexToString 已提交
472
        int64_t* data_out = reinterpret_cast<int64_t*>(databuf_data);
H
HexToString 已提交
473
        lod_tensor_out->CopyToCpu(data_out);
H
HexToString 已提交
474
        databuf_char = reinterpret_cast<char*>(data_out);
H
HexToString 已提交
475
      } else if (dataType == paddle::PaddleDType::INT32) {
476
        databuf_size = out_num * sizeof(int32_t);
H
HexToString 已提交
477
        databuf_data = MempoolWrapper::instance().malloc(databuf_size);
H
HexToString 已提交
478
        if (!databuf_data) {
479 480
          LOG(ERROR) << "Malloc failed, size: " << databuf_size;
          return -1;
H
HexToString 已提交
481 482 483 484
        }
        int32_t* data_out = reinterpret_cast<int32_t*>(databuf_data);
        lod_tensor_out->CopyToCpu(data_out);
        databuf_char = reinterpret_cast<char*>(data_out);
H
HexToString 已提交
485
      }
H
HexToString 已提交
486 487 488 489 490
      // Because task scheduling requires OPs to use 'Channel'
      // (which is a data structure) to transfer data between OPs.
      // We need to copy the processed data to the 'Channel' for the next OP.
      // In this function, it means we should copy the 'databuf_char' to
      // 'void* out'.(which is also called ‘tensorVector_out_pointer’)
H
HexToString 已提交
491 492 493 494 495
      paddle::PaddleTensor tensor_out;
      tensor_out.name = outnames[i];
      tensor_out.dtype = paddle::PaddleDType(dataType);
      tensor_out.shape.assign(output_shape.begin(), output_shape.end());
      std::vector<std::vector<size_t>> out_lod = lod_tensor_out->lod();
496
      for (int li = 0; li < out_lod.size(); ++li) {
H
HexToString 已提交
497 498 499 500
        std::vector<size_t> lod_element;
        lod_element.assign(out_lod[li].begin(), out_lod[li].end());
        tensor_out.lod.push_back(lod_element);
      }
H
HexToString 已提交
501
      paddle::PaddleBuf paddleBuf(databuf_char, databuf_size);
H
HexToString 已提交
502 503
      tensor_out.data = paddleBuf;
      tensorVector_out_pointer->push_back(tensor_out);
H
HexToString 已提交
504
    }
W
wangguibao 已提交
505 506
    return 0;
  }
H
HexToString 已提交
507

H
HexToString 已提交
508 509
  int task_infer_impl(const BatchTensor& in, BatchTensor& out) {  // NOLINT
    return infer_impl(&in, &out);
H
HexToString 已提交
510
  }
W
wangguibao 已提交
511 512
};

W
wangguibao 已提交
513 514 515 516 517 518 519
typedef FactoryPool<InferEngine> StaticInferFactory;

class VersionedInferEngine : public InferEngine {
 public:
  VersionedInferEngine() { _versions.clear(); }
  ~VersionedInferEngine() {}

520
  int proc_initialize(const configure::EngineDesc& conf);
W
wangguibao 已提交
521

522
  int proc_initialize(const configure::EngineDesc& conf, bool version);
W
wangguibao 已提交
523

524
  int proc_finalize();
W
wangguibao 已提交
525

526
  int thrd_initialize();
W
wangguibao 已提交
527

528
  int thrd_clear();
W
wangguibao 已提交
529

530
  int thrd_finalize();
W
wangguibao 已提交
531

532
  int reload();
W
wangguibao 已提交
533

534
  uint64_t version() const;
W
wangguibao 已提交
535 536

  // inference interface
537
  InferEngine* default_engine() const;
W
wangguibao 已提交
538

539
  int infer(const void* in, void* out, uint32_t batch_size);
W
wangguibao 已提交
540 541

  template <typename T>
542
  T* get_core();
W
wangguibao 已提交
543 544

  // versioned inference interface
545
  int infer(const void* in, void* out, uint32_t batch_size, uint64_t version);
W
wangguibao 已提交
546 547

  template <typename T>
548
  T* get_core(uint64_t version);
W
wangguibao 已提交
549

550
  int proc_initialize_impl(const configure::EngineDesc& conf, bool);
W
wangguibao 已提交
551

552 553 554 555 556 557 558 559 560 561 562
  int thrd_initialize_impl();

  int thrd_finalize_impl();

  int thrd_clear_impl();

  int proc_finalize_impl();

  int infer_impl(const void* in, void* out, uint32_t batch_size = -1);

  int task_infer_impl(const BatchTensor& in, BatchTensor& out);
W
wangguibao 已提交
563 564 565

 private:
  boost::unordered_map<uint64_t, InferEngine*> _versions;
W
wangguibao 已提交
566 567
};

W
wangguibao 已提交
568 569 570 571 572 573 574
class InferManager {
 public:
  static InferManager& instance() {
    static InferManager ins;
    return ins;
  }

575
  int proc_initialize(const char* path, const char* file);
W
wangguibao 已提交
576

577
  int thrd_initialize();
W
wangguibao 已提交
578

579
  int thrd_clear();
W
wangguibao 已提交
580

581
  int reload();
W
wangguibao 已提交
582

583
  int thrd_finalize();
W
wangguibao 已提交
584

585
  int proc_finalize();
W
wangguibao 已提交
586 587

  // Inference interface
H
HexToString 已提交
588 589 590
  int infer(const char* model_name,
            const void* in,
            void* out,
591
            uint32_t batch_size = -1);
W
wangguibao 已提交
592 593

  template <typename T>
594
  T* get_core(const char* model_name);
W
wangguibao 已提交
595 596

  // Versioned inference interface
H
HexToString 已提交
597
  int infer(const char* model_name,
H
HexToString 已提交
598 599 600
            const void* in,
            void* out,
            uint32_t batch_size,
601
            uint64_t version);
W
wangguibao 已提交
602 603

  template <typename T>
604
  T* get_core(const char* model_name, uint64_t version);
W
wangguibao 已提交
605

606
  int query_version(const std::string& model, uint64_t& version);
W
wangguibao 已提交
607 608 609 610

 private:
  boost::unordered_map<std::string, VersionedInferEngine*> _map;
};
W
wangguibao 已提交
611

W
wangguibao 已提交
612 613 614
}  // namespace predictor
}  // namespace paddle_serving
}  // namespace baidu