api_impl.cc 15.3 KB
Newer Older
X
Xin Pan 已提交
1 2
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Y
Yan Chunwei 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
X
Xin Pan 已提交
6

Y
Yan Chunwei 已提交
7
http://www.apache.org/licenses/LICENSE-2.0
X
Xin Pan 已提交
8

Y
Yan Chunwei 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
X
Xin Pan 已提交
14

15 16
#include "paddle/fluid/inference/api/api_impl.h"

F
flame 已提交
17
#include <glog/logging.h>
18

W
Wilber 已提交
19
#include <memory>
X
Xin Pan 已提交
20 21 22
#include <sstream>
#include <string>

23
#include "paddle/fluid/framework/feed_fetch_method.h"
24
#include "paddle/fluid/inference/api/helper.h"
25
#include "paddle/fluid/platform/cpu_helper.h"
W
Wilber 已提交
26
#include "paddle/fluid/platform/place.h"
27 28 29
#include "paddle/fluid/platform/profiler.h"

DEFINE_bool(profile, false, "Turn on profiler for fluid");
X
Xin Pan 已提交
30 31

namespace paddle {
32 33 34 35 36 37 38 39 40 41
namespace {
using paddle::inference::Timer;

template <class T>
std::string num2str(T a) {
  std::stringstream istr;
  istr << a;
  return istr.str();
}
}  // namespace
X
Xin Pan 已提交
42

43 44 45
void NativePaddlePredictor::PrepareFeedFetch() {
  for (auto *op : inference_program_->Block(0).AllOps()) {
    if (op->Type() == "feed") {
R
Ruibiao Chen 已提交
46
      int idx = PADDLE_GET_CONST(int, op->GetAttr("col"));
T
tensor-tang 已提交
47
      if (feeds_.size() <= static_cast<size_t>(idx)) {
48 49 50 51 52
        feeds_.resize(idx + 1);
      }
      feeds_[idx] = op;
      feed_names_[op->Output("Out")[0]] = idx;
    } else if (op->Type() == "fetch") {
R
Ruibiao Chen 已提交
53
      int idx = PADDLE_GET_CONST(int, op->GetAttr("col"));
T
tensor-tang 已提交
54
      if (fetchs_.size() <= static_cast<size_t>(idx)) {
55 56 57 58 59 60 61
        fetchs_.resize(idx + 1);
      }
      fetchs_[idx] = op;
    }
  }
}

T
tensor-tang 已提交
62 63
bool NativePaddlePredictor::Init(
    std::shared_ptr<framework::Scope> parent_scope) {
64
  VLOG(3) << "Predictor::init()";
65 66 67 68 69 70 71 72 73
  if (FLAGS_profile) {
    LOG(WARNING) << "Profiler is actived, might affect the performance";
    LOG(INFO) << "You can turn off by set gflags '-profile false'";

    auto tracking_device = config_.use_gpu ? platform::ProfilerState::kAll
                                           : platform::ProfilerState::kCPU;
    platform::EnableProfiler(tracking_device);
  }

74
  // no matter with or without MKLDNN
L
luotao1 已提交
75
  paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
76

Y
Yan Chunwei 已提交
77
  if (config_.use_gpu) {
78 79
    PADDLE_ENFORCE_EQ(config_.use_xpu,
                      false,
80 81
                      platform::errors::InvalidArgument(
                          "Only one choice can be made between CPU and XPU."));
X
Xin Pan 已提交
82
    place_ = paddle::platform::CUDAPlace(config_.device);
83 84
  } else if (config_.use_xpu) {
    place_ = paddle::platform::XPUPlace(config_.device);
W
Wilber 已提交
85 86
  } else if (config_.use_npu) {
    place_ = paddle::platform::NPUPlace(config_.device);
X
Xin Pan 已提交
87 88 89
  } else {
    place_ = paddle::platform::CPUPlace();
  }
T
tensor-tang 已提交
90 91 92
  if (parent_scope) {
    scope_ = parent_scope;
    sub_scope_ = &(parent_scope->NewScope());
93 94 95
    PADDLE_ENFORCE_NOT_NULL(sub_scope_,
                            platform::errors::PreconditionNotMet(
                                "The sub_scope should not be nullptr."));
96
  } else {
97
    paddle::framework::InitDevices();
98
    paddle::framework::InitDefaultKernelSignatureMap();
99 100
    scope_.reset(new paddle::framework::Scope());
  }
101

X
Xin Pan 已提交
102
  executor_.reset(new paddle::framework::Executor(place_));
103

X
Xin Pan 已提交
104 105 106 107
  // Initialize the inference program
  if (!config_.model_dir.empty()) {
    // Parameters are saved in separate files sited in
    // the specified `dirname`.
108 109
    inference_program_ = paddle::inference::Load(
        executor_.get(), scope_.get(), config_.model_dir);
X
Xin Pan 已提交
110 111 112 113 114 115 116
  } else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
    // All parameters are saved in a single file.
    // The file names should be consistent with that used
    // in Python API `fluid.io.save_inference_model`.
    inference_program_ = paddle::inference::Load(
        executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
  } else {
Y
Yan Chunwei 已提交
117
    LOG(ERROR) << "fail to load inference model from " << config_.model_dir;
X
Xin Pan 已提交
118 119
    return false;
  }
120

X
Xin Pan 已提交
121
  ctx_ = executor_->Prepare(*inference_program_, 0);
122 123
  executor_->CreateVariables(
      *inference_program_, sub_scope_ ? sub_scope_ : scope_.get(), 0);
Y
Yan Chunwei 已提交
124

X
Xin Pan 已提交
125
  // Get the feed_target_names and fetch_target_names
126
  PrepareFeedFetch();
X
Xin Pan 已提交
127 128 129
  return true;
}

130
NativePaddlePredictor::~NativePaddlePredictor() {
131 132 133 134
  if (FLAGS_profile) {
    platform::DisableProfiler(platform::EventSortingKey::kTotal,
                              "./profile.log");
  }
135 136 137
  if (sub_scope_) {
    scope_->DeleteScope(sub_scope_);
  }
L
Luo Tao 已提交
138
}
139

Y
Yan Chunwei 已提交
140
bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
141 142
                                std::vector<PaddleTensor> *output_data,
                                int batch_size) {
F
flame 已提交
143 144 145 146 147 148 149 150 151 152
#ifndef PADDLE_ON_INFERENCE
  LOG_FIRST_N(WARNING, 5) << "The NaiveExecutor can not work properly if the "
                             "cmake flag ON_INFER is not set.";
  LOG_FIRST_N(WARNING, 5) << "Unlike the training phase, all the scopes and "
                             "variables will be reused to save the allocation "
                             "overhead.";
  LOG_FIRST_N(WARNING, 5) << "Please re-compile the inference library by "
                             "setting the cmake flag ON_INFER=ON if you are "
                             "running Paddle Inference";
#endif  // PADDLE_ON_INFERENCE
L
luotao1 已提交
153 154 155
  if (UNLIKELY(config_.cpu_math_library_num_threads() > 1)) {
    paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
  }
156
  VLOG(3) << "Predictor::predict";
X
Xin Pan 已提交
157 158 159
  Timer timer;
  timer.tic();
  // set feed variable
160 161
  framework::Scope *scope = sub_scope_ != nullptr ? sub_scope_ : scope_.get();
  if (!SetFeed(inputs, scope)) {
X
Xin Pan 已提交
162 163 164 165 166
    LOG(ERROR) << "fail to set feed";
    return false;
  }
  // Run the inference program
  // if share variables, we need not create variables
167
  VLOG(4) << "Run prepared context";
168 169
  executor_->RunPreparedContext(ctx_.get(),
                                scope,
170
                                false, /* don't create local scope each time*/
171
                                false /* don't create variable each time */);
172
  VLOG(4) << "Finish prepared context";
173 174
  // get fetch variable
  if (!GetFetch(output_data, scope)) {
175
    LOG(ERROR) << "fail to get fetches";
X
Xin Pan 已提交
176 177
    return false;
  }
M
minqiyang 已提交
178
  VLOG(3) << "predict cost: " << timer.toc() << "ms";
Y
Yan Chunwei 已提交
179

Y
Yan Chunwei 已提交
180 181 182
  // For some other vector like containers not cleaned after each batch.
  tensor_array_batch_cleaner_.CollectNoTensorVars(scope_.get());
  tensor_array_batch_cleaner_.ResetNoTensorVars();
X
Xin Pan 已提交
183 184 185
  return true;
}

186
std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone(void *stream) {
Y
Yan Chunwei 已提交
187 188
  std::lock_guard<std::mutex> lk(clone_mutex_);
  VLOG(3) << "Predictor::clone";
Y
Yan Chunwei 已提交
189
  std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
Y
Yan Chunwei 已提交
190 191
  // Hot fix the bug that result diff in multi-thread.
  // TODO(Superjomn) re-implement a real clone here.
192 193 194 195
  PADDLE_ENFORCE_NOT_NULL(
      dynamic_cast<NativePaddlePredictor *>(cls.get()),
      platform::errors::PreconditionNotMet(
          "Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
Y
Yan Chunwei 已提交
196
  if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(nullptr)) {
Y
Yan Chunwei 已提交
197
    LOG(ERROR) << "fail to call Init";
X
Xin Pan 已提交
198 199
    return nullptr;
  }
J
Fix mac  
JiabinYang 已提交
200
  return cls;
X
Xin Pan 已提交
201 202
}

Y
Yan Chunwei 已提交
203
bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
204
                                    framework::Scope *scope) {
205
  VLOG(3) << "Predictor::set_feed";
206
  if (inputs.size() != feeds_.size()) {
207 208
    LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get "
               << inputs.size();
X
Xin Pan 已提交
209 210
    return false;
  }
211 212 213 214

  // Cache the inputs memory for better concurrency performance.
  feed_tensors_.resize(inputs.size());

215
  for (size_t i = 0; i < inputs.size(); ++i) {
216
    auto &input = feed_tensors_[i];
217
    framework::DDim ddim = phi::make_ddim(inputs[i].shape);
X
Xin Pan 已提交
218 219
    void *input_ptr;
    if (inputs[i].dtype == PaddleDType::INT64) {
220
      input_ptr = input.mutable_data<int64_t>(ddim, place_);
X
Xin Pan 已提交
221
    } else if (inputs[i].dtype == PaddleDType::FLOAT32) {
222
      input_ptr = input.mutable_data<float>(ddim, place_);
223 224
    } else if (inputs[i].dtype == PaddleDType::INT32) {
      input_ptr = input.mutable_data<int32_t>(ddim, place_);
X
Xin Pan 已提交
225 226 227 228 229
    } else {
      LOG(ERROR) << "unsupported feed type " << inputs[i].dtype;
      return false;
    }

230 231 232 233 234 235 236
    PADDLE_ENFORCE_NOT_NULL(input_ptr,
                            platform::errors::InvalidArgument(
                                "The input_ptr should not be nullptr."));
    PADDLE_ENFORCE_NOT_NULL(
        inputs[i].data.data(),
        platform::errors::InvalidArgument(
            "The data of input tensor should not be null."));
237 238
    if (platform::is_cpu_place(place_)) {
      // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
239 240
      std::memcpy(static_cast<void *>(input_ptr),
                  inputs[i].data.data(),
241
                  inputs[i].data.length());
242 243
    } else if (platform::is_gpu_place(place_)) {
      PADDLE_ENFORCE_EQ(
244 245
          platform::is_xpu_place(place_),
          false,
246 247
          platform::errors::InvalidArgument(
              "Only one choice can be made between CPU and XPU."));
248
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
Q
qingqing01 已提交
249 250
      platform::DeviceContextPool &pool =
          platform::DeviceContextPool::Instance();
L
Leo Chen 已提交
251
      auto *dev_ctx = static_cast<const phi::GPUContext *>(pool.Get(place_));
252
      auto dst_gpu_place = place_;
253 254 255 256 257 258
      memory::Copy(dst_gpu_place,
                   static_cast<void *>(input_ptr),
                   platform::CPUPlace(),
                   inputs[i].data.data(),
                   inputs[i].data.length(),
                   dev_ctx->stream());
259
#else
260 261
      PADDLE_THROW(platform::errors::Unavailable(
          "Not compile with CUDA, should not reach here."));
262
#endif
W
Wilber 已提交
263
    } else if (platform::is_xpu_place(place_)) {
264
#ifdef PADDLE_WITH_XPU
265
      auto dst_xpu_place = place_;
266 267 268 269
      memory::Copy(dst_xpu_place,
                   static_cast<void *>(input_ptr),
                   platform::CPUPlace(),
                   inputs[i].data.data(),
270 271 272 273
                   inputs[i].data.length());
#else
      PADDLE_THROW(platform::errors::Unavailable(
          "Not compile with XPU, should not reach here."));
W
Wilber 已提交
274 275 276 277 278 279 280
#endif
    } else {
#ifdef PADDLE_WITH_ASCEND_CL
      platform::DeviceContextPool &pool =
          platform::DeviceContextPool::Instance();
      auto *dev_ctx =
          static_cast<const platform::NPUDeviceContext *>(pool.Get(place_));
281
      auto dst_npu_place = place_;
282 283 284 285 286 287
      memory::Copy(dst_npu_place,
                   static_cast<void *>(input_ptr),
                   platform::CPUPlace(),
                   inputs[i].data.data(),
                   inputs[i].data.length(),
                   dev_ctx->stream());
W
Wilber 已提交
288 289 290
#else
      PADDLE_THROW(platform::errors::Unavailable(
          "Not compile with NPU, should not reach here."));
291 292 293
#endif
    }

Y
Yan Chunwei 已提交
294 295 296 297 298 299
    // TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
    framework::LoD lod;
    for (auto &level : inputs[i].lod) {
      lod.emplace_back(level);
    }
    input.set_lod(lod);
300 301
    int idx = -1;
    if (config_.specify_input_name) {
X
polish  
Xin Pan 已提交
302
      idx = feed_names_[inputs[i].name];
303
    } else {
R
Ruibiao Chen 已提交
304
      idx = PADDLE_GET_CONST(int, feeds_[i]->GetAttr("col"));
305 306
    }
    framework::SetFeedVariable(scope, input, "feed", idx);
X
Xin Pan 已提交
307 308 309
  }
  return true;
}
L
luotao1 已提交
310 311 312
template <typename T>
void NativePaddlePredictor::GetFetchOne(const framework::LoDTensor &fetch,
                                        PaddleTensor *output) {
313
  // set shape.
314
  auto shape = phi::vectorize(fetch.dims());
315 316 317 318 319 320 321 322 323 324 325 326
  output->shape.assign(shape.begin(), shape.end());
  // set data.
  const T *data = fetch.data<T>();
  int num_elems = inference::VecReduceToInt(shape);
  output->data.Resize(num_elems * sizeof(T));
  // The fetched tensor output by fetch op, should always in CPU memory, so just
  // copy.
  memcpy(output->data.data(), data, num_elems * sizeof(T));
  // set lod
  output->lod.clear();
  for (auto &level : fetch.lod()) {
    output->lod.emplace_back(level.begin(), level.end());
L
luotao1 已提交
327 328
  }
}
X
Xin Pan 已提交
329

330 331
bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
                                     framework::Scope *scope) {
332
  VLOG(3) << "Predictor::get_fetch";
333 334
  outputs->resize(fetchs_.size());
  for (size_t i = 0; i < fetchs_.size(); ++i) {
R
Ruibiao Chen 已提交
335
    int idx = PADDLE_GET_CONST(int, fetchs_[i]->GetAttr("col"));
336
    PADDLE_ENFORCE_EQ(
337 338
        static_cast<size_t>(idx),
        i,
339
        platform::errors::InvalidArgument(
340 341
            "Fetch op's col attr(%d) should be equal to the index(%d)",
            idx,
342
            i));
343
    framework::FetchType &fetch_var =
344
        framework::GetFetchVariable(*scope, "fetch", idx);
R
Ruibiao Chen 已提交
345
    auto fetch = PADDLE_GET_CONST(framework::LoDTensor, fetch_var);
346
    auto type = framework::TransToProtoVarType(fetch.dtype());
L
luotao1 已提交
347
    auto output = &(outputs->at(i));
348
    output->name = fetchs_[idx]->Input("X")[0];
349
    if (type == framework::DataTypeTrait<float>::DataType()) {
L
luotao1 已提交
350 351
      GetFetchOne<float>(fetch, output);
      output->dtype = PaddleDType::FLOAT32;
352
    } else if (type == framework::DataTypeTrait<int64_t>::DataType()) {
L
luotao1 已提交
353 354
      GetFetchOne<int64_t>(fetch, output);
      output->dtype = PaddleDType::INT64;
355
    } else if (type == framework::DataTypeTrait<int32_t>::DataType()) {
356 357
      GetFetchOne<int32_t>(fetch, output);
      output->dtype = PaddleDType::INT32;
X
Xin Pan 已提交
358
    } else {
359
      LOG(ERROR) << "unknown type, only support float32, int64 and int32 now.";
Y
Yan Chunwei 已提交
360
    }
X
Xin Pan 已提交
361 362 363 364
  }
  return true;
}

365
template <>
366 367 368
std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(
    const NativeConfig &config) {
W
Wilber 已提交
369 370
  // TODO(NHZlX): Should add the link to the doc of
  // paddle_infer::CreatePredictor<paddle_infer::Config>
371
  VLOG(3) << "create NativePaddlePredictor";
Y
Yan Chunwei 已提交
372
  if (config.use_gpu) {
S
Sylwester Fraczek 已提交
373
    // 1. GPU memory
374 375
    PADDLE_ENFORCE_GE(config.fraction_of_gpu_memory,
                      0.f,
376 377 378
                      platform::errors::InvalidArgument(
                          "fraction_of_gpu_memory in the config should be set "
                          "to range (0., 1.]"));
379 380
    PADDLE_ENFORCE_GE(config.device,
                      0,
381 382 383 384
                      platform::errors::PreconditionNotMet(
                          "Invalid device id %d, the device id should be "
                          "greater than or equal to 0.",
                          config.device));
Y
Yan Chunwei 已提交
385 386 387 388 389
    std::vector<std::string> flags;
    if (config.fraction_of_gpu_memory >= 0.0f ||
        config.fraction_of_gpu_memory <= 0.95f) {
      flags.push_back("dummpy");
      std::string flag = "--fraction_of_gpu_memory_to_use=" +
390
                         num2str<float>(config.fraction_of_gpu_memory);
Y
Yan Chunwei 已提交
391
      flags.push_back(flag);
392
      VLOG(3) << "set flag: " << flag;
Y
Yan Chunwei 已提交
393 394
      framework::InitGflags(flags);
    }
X
Xin Pan 已提交
395
  }
396

Y
Yan Chunwei 已提交
397
  std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
L
liuwei1031 已提交
398
  PADDLE_ENFORCE_NOT_NULL(
399 400 401
      dynamic_cast<NativePaddlePredictor *>(predictor.get()),
      platform::errors::PreconditionNotMet(
          "Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
T
tensor-tang 已提交
402
  if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) {
X
Xin Pan 已提交
403 404
    return nullptr;
  }
J
Fix mac  
JiabinYang 已提交
405
  return predictor;
X
Xin Pan 已提交
406 407
}

Y
Yan Chunwei 已提交
408 409 410
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<NativeConfig>(
    const NativeConfig &config) {
W
Wilber 已提交
411
  LOG(WARNING) << "Deprecated. Please use CreatePredictor instead.";
Y
Yan Chunwei 已提交
412 413 414
  return CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
}

X
Xin Pan 已提交
415
}  // namespace paddle