api_impl.cc 15.3 KB
Newer Older
X
Xin Pan 已提交
1 2
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Y
Yan Chunwei 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
X
Xin Pan 已提交
6

Y
Yan Chunwei 已提交
7
http://www.apache.org/licenses/LICENSE-2.0
X
Xin Pan 已提交
8

Y
Yan Chunwei 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
X
Xin Pan 已提交
14

15 16
#include "paddle/fluid/inference/api/api_impl.h"

F
flame 已提交
17
#include <glog/logging.h>
18

W
Wilber 已提交
19
#include <memory>
X
Xin Pan 已提交
20 21 22
#include <sstream>
#include <string>

23
#include "paddle/fluid/framework/feed_fetch_method.h"
24
#include "paddle/fluid/inference/api/helper.h"
25
#include "paddle/fluid/platform/cpu_helper.h"
W
Wilber 已提交
26
#include "paddle/fluid/platform/place.h"
27 28 29
#include "paddle/fluid/platform/profiler.h"

DEFINE_bool(profile, false, "Turn on profiler for fluid");
X
Xin Pan 已提交
30 31

namespace paddle {
32 33 34 35 36 37 38 39 40 41
namespace {
using paddle::inference::Timer;

template <class T>
std::string num2str(T a) {
  std::stringstream istr;
  istr << a;
  return istr.str();
}
}  // namespace
X
Xin Pan 已提交
42

43 44 45
void NativePaddlePredictor::PrepareFeedFetch() {
  for (auto *op : inference_program_->Block(0).AllOps()) {
    if (op->Type() == "feed") {
R
Ruibiao Chen 已提交
46
      int idx = PADDLE_GET_CONST(int, op->GetAttr("col"));
T
tensor-tang 已提交
47
      if (feeds_.size() <= static_cast<size_t>(idx)) {
48 49 50 51 52
        feeds_.resize(idx + 1);
      }
      feeds_[idx] = op;
      feed_names_[op->Output("Out")[0]] = idx;
    } else if (op->Type() == "fetch") {
R
Ruibiao Chen 已提交
53
      int idx = PADDLE_GET_CONST(int, op->GetAttr("col"));
T
tensor-tang 已提交
54
      if (fetchs_.size() <= static_cast<size_t>(idx)) {
55 56 57 58 59 60 61
        fetchs_.resize(idx + 1);
      }
      fetchs_[idx] = op;
    }
  }
}

T
tensor-tang 已提交
62 63
bool NativePaddlePredictor::Init(
    std::shared_ptr<framework::Scope> parent_scope) {
64
  VLOG(3) << "Predictor::init()";
65 66 67 68 69 70 71 72 73
  if (FLAGS_profile) {
    LOG(WARNING) << "Profiler is actived, might affect the performance";
    LOG(INFO) << "You can turn off by set gflags '-profile false'";

    auto tracking_device = config_.use_gpu ? platform::ProfilerState::kAll
                                           : platform::ProfilerState::kCPU;
    platform::EnableProfiler(tracking_device);
  }

74
  // no matter with or without MKLDNN
L
luotao1 已提交
75
  paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
76

Y
Yan Chunwei 已提交
77
  if (config_.use_gpu) {
78 79
    PADDLE_ENFORCE_EQ(config_.use_xpu,
                      false,
80 81
                      platform::errors::InvalidArgument(
                          "Only one choice can be made between CPU and XPU."));
X
Xin Pan 已提交
82
    place_ = paddle::platform::CUDAPlace(config_.device);
83 84
  } else if (config_.use_xpu) {
    place_ = paddle::platform::XPUPlace(config_.device);
W
Wilber 已提交
85 86
  } else if (config_.use_npu) {
    place_ = paddle::platform::NPUPlace(config_.device);
X
Xin Pan 已提交
87 88 89
  } else {
    place_ = paddle::platform::CPUPlace();
  }
T
tensor-tang 已提交
90 91 92
  if (parent_scope) {
    scope_ = parent_scope;
    sub_scope_ = &(parent_scope->NewScope());
93 94 95
    PADDLE_ENFORCE_NOT_NULL(sub_scope_,
                            platform::errors::PreconditionNotMet(
                                "The sub_scope should not be nullptr."));
96
  } else {
97
    paddle::framework::InitDevices();
98
    paddle::framework::InitDefaultKernelSignatureMap();
99 100
    scope_.reset(new paddle::framework::Scope());
  }
101

X
Xin Pan 已提交
102
  executor_.reset(new paddle::framework::Executor(place_));
103

X
Xin Pan 已提交
104 105 106 107
  // Initialize the inference program
  if (!config_.model_dir.empty()) {
    // Parameters are saved in separate files sited in
    // the specified `dirname`.
108 109
    inference_program_ = paddle::inference::Load(
        executor_.get(), scope_.get(), config_.model_dir);
X
Xin Pan 已提交
110 111 112 113 114 115 116
  } else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
    // All parameters are saved in a single file.
    // The file names should be consistent with that used
    // in Python API `fluid.io.save_inference_model`.
    inference_program_ = paddle::inference::Load(
        executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
  } else {
Y
Yan Chunwei 已提交
117
    LOG(ERROR) << "fail to load inference model from " << config_.model_dir;
X
Xin Pan 已提交
118 119
    return false;
  }
120

X
Xin Pan 已提交
121
  ctx_ = executor_->Prepare(*inference_program_, 0);
122 123
  executor_->CreateVariables(
      *inference_program_, sub_scope_ ? sub_scope_ : scope_.get(), 0);
Y
Yan Chunwei 已提交
124

X
Xin Pan 已提交
125
  // Get the feed_target_names and fetch_target_names
126
  PrepareFeedFetch();
X
Xin Pan 已提交
127 128 129
  return true;
}

130
NativePaddlePredictor::~NativePaddlePredictor() {
131 132 133 134
  if (FLAGS_profile) {
    platform::DisableProfiler(platform::EventSortingKey::kTotal,
                              "./profile.log");
  }
135 136 137
  if (sub_scope_) {
    scope_->DeleteScope(sub_scope_);
  }
L
Luo Tao 已提交
138
}
139

Y
Yan Chunwei 已提交
140
bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
141 142
                                std::vector<PaddleTensor> *output_data,
                                int batch_size) {
F
flame 已提交
143 144 145 146 147 148 149 150 151 152
#ifndef PADDLE_ON_INFERENCE
  LOG_FIRST_N(WARNING, 5) << "The NaiveExecutor can not work properly if the "
                             "cmake flag ON_INFER is not set.";
  LOG_FIRST_N(WARNING, 5) << "Unlike the training phase, all the scopes and "
                             "variables will be reused to save the allocation "
                             "overhead.";
  LOG_FIRST_N(WARNING, 5) << "Please re-compile the inference library by "
                             "setting the cmake flag ON_INFER=ON if you are "
                             "running Paddle Inference";
#endif  // PADDLE_ON_INFERENCE
L
luotao1 已提交
153 154 155
  if (UNLIKELY(config_.cpu_math_library_num_threads() > 1)) {
    paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
  }
156
  VLOG(3) << "Predictor::predict";
X
Xin Pan 已提交
157 158 159
  Timer timer;
  timer.tic();
  // set feed variable
160 161
  framework::Scope *scope = sub_scope_ != nullptr ? sub_scope_ : scope_.get();
  if (!SetFeed(inputs, scope)) {
X
Xin Pan 已提交
162 163 164 165 166
    LOG(ERROR) << "fail to set feed";
    return false;
  }
  // Run the inference program
  // if share variables, we need not create variables
167
  VLOG(4) << "Run prepared context";
168 169
  executor_->RunPreparedContext(ctx_.get(),
                                scope,
170
                                false, /* don't create local scope each time*/
171
                                false /* don't create variable each time */);
172
  VLOG(4) << "Finish prepared context";
173 174
  // get fetch variable
  if (!GetFetch(output_data, scope)) {
175
    LOG(ERROR) << "fail to get fetches";
X
Xin Pan 已提交
176 177
    return false;
  }
M
minqiyang 已提交
178
  VLOG(3) << "predict cost: " << timer.toc() << "ms";
Y
Yan Chunwei 已提交
179

Y
Yan Chunwei 已提交
180 181 182
  // For some other vector like containers not cleaned after each batch.
  tensor_array_batch_cleaner_.CollectNoTensorVars(scope_.get());
  tensor_array_batch_cleaner_.ResetNoTensorVars();
X
Xin Pan 已提交
183 184 185
  return true;
}

186
std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone(void *stream) {
Y
Yan Chunwei 已提交
187 188
  std::lock_guard<std::mutex> lk(clone_mutex_);
  VLOG(3) << "Predictor::clone";
Y
Yan Chunwei 已提交
189
  std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
Y
Yan Chunwei 已提交
190 191
  // Hot fix the bug that result diff in multi-thread.
  // TODO(Superjomn) re-implement a real clone here.
192 193 194 195
  PADDLE_ENFORCE_NOT_NULL(
      dynamic_cast<NativePaddlePredictor *>(cls.get()),
      platform::errors::PreconditionNotMet(
          "Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
Y
Yan Chunwei 已提交
196
  if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(nullptr)) {
Y
Yan Chunwei 已提交
197
    LOG(ERROR) << "fail to call Init";
X
Xin Pan 已提交
198 199
    return nullptr;
  }
J
Fix mac  
JiabinYang 已提交
200
  return cls;
X
Xin Pan 已提交
201 202
}

Y
Yan Chunwei 已提交
203
bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
204
                                    framework::Scope *scope) {
205
  VLOG(3) << "Predictor::set_feed";
206
  if (inputs.size() != feeds_.size()) {
207 208
    LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get "
               << inputs.size();
X
Xin Pan 已提交
209 210
    return false;
  }
211 212 213 214

  // Cache the inputs memory for better concurrency performance.
  feed_tensors_.resize(inputs.size());

215
  for (size_t i = 0; i < inputs.size(); ++i) {
216
    auto &input = feed_tensors_[i];
217
    framework::DDim ddim = phi::make_ddim(inputs[i].shape);
X
Xin Pan 已提交
218 219
    void *input_ptr;
    if (inputs[i].dtype == PaddleDType::INT64) {
220
      input_ptr = input.mutable_data<int64_t>(ddim, place_);
X
Xin Pan 已提交
221
    } else if (inputs[i].dtype == PaddleDType::FLOAT32) {
222
      input_ptr = input.mutable_data<float>(ddim, place_);
223 224
    } else if (inputs[i].dtype == PaddleDType::INT32) {
      input_ptr = input.mutable_data<int32_t>(ddim, place_);
X
Xin Pan 已提交
225 226 227 228 229
    } else {
      LOG(ERROR) << "unsupported feed type " << inputs[i].dtype;
      return false;
    }

230 231 232 233 234 235 236
    PADDLE_ENFORCE_NOT_NULL(input_ptr,
                            platform::errors::InvalidArgument(
                                "The input_ptr should not be nullptr."));
    PADDLE_ENFORCE_NOT_NULL(
        inputs[i].data.data(),
        platform::errors::InvalidArgument(
            "The data of input tensor should not be null."));
237 238
    if (platform::is_cpu_place(place_)) {
      // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
239 240
      std::memcpy(static_cast<void *>(input_ptr),
                  inputs[i].data.data(),
241
                  inputs[i].data.length());
242 243
    } else if (platform::is_gpu_place(place_)) {
      PADDLE_ENFORCE_EQ(
244 245
          platform::is_xpu_place(place_),
          false,
246 247
          platform::errors::InvalidArgument(
              "Only one choice can be made between CPU and XPU."));
248
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
Q
qingqing01 已提交
249 250 251 252
      platform::DeviceContextPool &pool =
          platform::DeviceContextPool::Instance();
      auto *dev_ctx =
          static_cast<const platform::CUDADeviceContext *>(pool.Get(place_));
253
      auto dst_gpu_place = place_;
254 255 256 257 258 259
      memory::Copy(dst_gpu_place,
                   static_cast<void *>(input_ptr),
                   platform::CPUPlace(),
                   inputs[i].data.data(),
                   inputs[i].data.length(),
                   dev_ctx->stream());
260
#else
261 262
      PADDLE_THROW(platform::errors::Unavailable(
          "Not compile with CUDA, should not reach here."));
263
#endif
W
Wilber 已提交
264
    } else if (platform::is_xpu_place(place_)) {
265
#ifdef PADDLE_WITH_XPU
266
      auto dst_xpu_place = place_;
267 268 269 270
      memory::Copy(dst_xpu_place,
                   static_cast<void *>(input_ptr),
                   platform::CPUPlace(),
                   inputs[i].data.data(),
271 272 273 274
                   inputs[i].data.length());
#else
      PADDLE_THROW(platform::errors::Unavailable(
          "Not compile with XPU, should not reach here."));
W
Wilber 已提交
275 276 277 278 279 280 281
#endif
    } else {
#ifdef PADDLE_WITH_ASCEND_CL
      platform::DeviceContextPool &pool =
          platform::DeviceContextPool::Instance();
      auto *dev_ctx =
          static_cast<const platform::NPUDeviceContext *>(pool.Get(place_));
282
      auto dst_npu_place = place_;
283 284 285 286 287 288
      memory::Copy(dst_npu_place,
                   static_cast<void *>(input_ptr),
                   platform::CPUPlace(),
                   inputs[i].data.data(),
                   inputs[i].data.length(),
                   dev_ctx->stream());
W
Wilber 已提交
289 290 291
#else
      PADDLE_THROW(platform::errors::Unavailable(
          "Not compile with NPU, should not reach here."));
292 293 294
#endif
    }

Y
Yan Chunwei 已提交
295 296 297 298 299 300
    // TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
    framework::LoD lod;
    for (auto &level : inputs[i].lod) {
      lod.emplace_back(level);
    }
    input.set_lod(lod);
301 302
    int idx = -1;
    if (config_.specify_input_name) {
X
polish  
Xin Pan 已提交
303
      idx = feed_names_[inputs[i].name];
304
    } else {
R
Ruibiao Chen 已提交
305
      idx = PADDLE_GET_CONST(int, feeds_[i]->GetAttr("col"));
306 307
    }
    framework::SetFeedVariable(scope, input, "feed", idx);
X
Xin Pan 已提交
308 309 310
  }
  return true;
}
L
luotao1 已提交
311 312 313
template <typename T>
void NativePaddlePredictor::GetFetchOne(const framework::LoDTensor &fetch,
                                        PaddleTensor *output) {
314
  // set shape.
315
  auto shape = phi::vectorize(fetch.dims());
316 317 318 319 320 321 322 323 324 325 326 327
  output->shape.assign(shape.begin(), shape.end());
  // set data.
  const T *data = fetch.data<T>();
  int num_elems = inference::VecReduceToInt(shape);
  output->data.Resize(num_elems * sizeof(T));
  // The fetched tensor output by fetch op, should always in CPU memory, so just
  // copy.
  memcpy(output->data.data(), data, num_elems * sizeof(T));
  // set lod
  output->lod.clear();
  for (auto &level : fetch.lod()) {
    output->lod.emplace_back(level.begin(), level.end());
L
luotao1 已提交
328 329
  }
}
X
Xin Pan 已提交
330

331 332
bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
                                     framework::Scope *scope) {
333
  VLOG(3) << "Predictor::get_fetch";
334 335
  outputs->resize(fetchs_.size());
  for (size_t i = 0; i < fetchs_.size(); ++i) {
R
Ruibiao Chen 已提交
336
    int idx = PADDLE_GET_CONST(int, fetchs_[i]->GetAttr("col"));
337
    PADDLE_ENFORCE_EQ(
338 339
        static_cast<size_t>(idx),
        i,
340
        platform::errors::InvalidArgument(
341 342
            "Fetch op's col attr(%d) should be equal to the index(%d)",
            idx,
343
            i));
344
    framework::FetchType &fetch_var =
345
        framework::GetFetchVariable(*scope, "fetch", idx);
R
Ruibiao Chen 已提交
346
    auto fetch = PADDLE_GET_CONST(framework::LoDTensor, fetch_var);
347
    auto type = framework::TransToProtoVarType(fetch.dtype());
L
luotao1 已提交
348
    auto output = &(outputs->at(i));
349
    output->name = fetchs_[idx]->Input("X")[0];
350
    if (type == framework::DataTypeTrait<float>::DataType()) {
L
luotao1 已提交
351 352
      GetFetchOne<float>(fetch, output);
      output->dtype = PaddleDType::FLOAT32;
353
    } else if (type == framework::DataTypeTrait<int64_t>::DataType()) {
L
luotao1 已提交
354 355
      GetFetchOne<int64_t>(fetch, output);
      output->dtype = PaddleDType::INT64;
356
    } else if (type == framework::DataTypeTrait<int32_t>::DataType()) {
357 358
      GetFetchOne<int32_t>(fetch, output);
      output->dtype = PaddleDType::INT32;
X
Xin Pan 已提交
359
    } else {
360
      LOG(ERROR) << "unknown type, only support float32, int64 and int32 now.";
Y
Yan Chunwei 已提交
361
    }
X
Xin Pan 已提交
362 363 364 365
  }
  return true;
}

366
template <>
367 368 369
std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(
    const NativeConfig &config) {
W
Wilber 已提交
370 371
  // TODO(NHZlX): Should add the link to the doc of
  // paddle_infer::CreatePredictor<paddle_infer::Config>
372
  VLOG(3) << "create NativePaddlePredictor";
Y
Yan Chunwei 已提交
373
  if (config.use_gpu) {
S
Sylwester Fraczek 已提交
374
    // 1. GPU memory
375 376
    PADDLE_ENFORCE_GE(config.fraction_of_gpu_memory,
                      0.f,
377 378 379
                      platform::errors::InvalidArgument(
                          "fraction_of_gpu_memory in the config should be set "
                          "to range (0., 1.]"));
380 381
    PADDLE_ENFORCE_GE(config.device,
                      0,
382 383 384 385
                      platform::errors::PreconditionNotMet(
                          "Invalid device id %d, the device id should be "
                          "greater than or equal to 0.",
                          config.device));
Y
Yan Chunwei 已提交
386 387 388 389 390
    std::vector<std::string> flags;
    if (config.fraction_of_gpu_memory >= 0.0f ||
        config.fraction_of_gpu_memory <= 0.95f) {
      flags.push_back("dummpy");
      std::string flag = "--fraction_of_gpu_memory_to_use=" +
391
                         num2str<float>(config.fraction_of_gpu_memory);
Y
Yan Chunwei 已提交
392
      flags.push_back(flag);
393
      VLOG(3) << "set flag: " << flag;
Y
Yan Chunwei 已提交
394 395
      framework::InitGflags(flags);
    }
X
Xin Pan 已提交
396
  }
397

Y
Yan Chunwei 已提交
398
  std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
L
liuwei1031 已提交
399
  PADDLE_ENFORCE_NOT_NULL(
400 401 402
      dynamic_cast<NativePaddlePredictor *>(predictor.get()),
      platform::errors::PreconditionNotMet(
          "Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
T
tensor-tang 已提交
403
  if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) {
X
Xin Pan 已提交
404 405
    return nullptr;
  }
J
Fix mac  
JiabinYang 已提交
406
  return predictor;
X
Xin Pan 已提交
407 408
}

Y
Yan Chunwei 已提交
409 410 411
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<NativeConfig>(
    const NativeConfig &config) {
W
Wilber 已提交
412
  LOG(WARNING) << "Deprecated. Please use CreatePredictor instead.";
Y
Yan Chunwei 已提交
413 414 415
  return CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
}

X
Xin Pan 已提交
416
}  // namespace paddle