api_impl.cc 14.9 KB
Newer Older
X
Xin Pan 已提交
1 2
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Y
Yan Chunwei 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
X
Xin Pan 已提交
6

Y
Yan Chunwei 已提交
7
http://www.apache.org/licenses/LICENSE-2.0
X
Xin Pan 已提交
8

Y
Yan Chunwei 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
X
Xin Pan 已提交
14

15 16
#include "paddle/fluid/inference/api/api_impl.h"

F
flame 已提交
17
#include <glog/logging.h>
18

W
Wilber 已提交
19
#include <memory>
X
Xin Pan 已提交
20 21 22
#include <sstream>
#include <string>

23
#include "paddle/fluid/framework/feed_fetch_method.h"
24
#include "paddle/fluid/inference/api/helper.h"
25
#include "paddle/fluid/platform/cpu_helper.h"
W
Wilber 已提交
26
#include "paddle/fluid/platform/place.h"
27 28 29
#include "paddle/fluid/platform/profiler.h"

DEFINE_bool(profile, false, "Turn on profiler for fluid");
X
Xin Pan 已提交
30 31

namespace paddle {
32 33 34 35 36 37 38 39 40 41
namespace {
using paddle::inference::Timer;

template <class T>
std::string num2str(T a) {
  std::stringstream istr;
  istr << a;
  return istr.str();
}
}  // namespace
X
Xin Pan 已提交
42

43 44 45
void NativePaddlePredictor::PrepareFeedFetch() {
  for (auto *op : inference_program_->Block(0).AllOps()) {
    if (op->Type() == "feed") {
R
Ruibiao Chen 已提交
46
      int idx = PADDLE_GET_CONST(int, op->GetAttr("col"));
T
tensor-tang 已提交
47
      if (feeds_.size() <= static_cast<size_t>(idx)) {
48 49 50 51 52
        feeds_.resize(idx + 1);
      }
      feeds_[idx] = op;
      feed_names_[op->Output("Out")[0]] = idx;
    } else if (op->Type() == "fetch") {
R
Ruibiao Chen 已提交
53
      int idx = PADDLE_GET_CONST(int, op->GetAttr("col"));
T
tensor-tang 已提交
54
      if (fetchs_.size() <= static_cast<size_t>(idx)) {
55 56 57 58 59 60 61
        fetchs_.resize(idx + 1);
      }
      fetchs_[idx] = op;
    }
  }
}

T
tensor-tang 已提交
62 63
bool NativePaddlePredictor::Init(
    std::shared_ptr<framework::Scope> parent_scope) {
64
  VLOG(3) << "Predictor::init()";
65 66 67 68 69 70 71 72 73
  if (FLAGS_profile) {
    LOG(WARNING) << "Profiler is actived, might affect the performance";
    LOG(INFO) << "You can turn off by set gflags '-profile false'";

    auto tracking_device = config_.use_gpu ? platform::ProfilerState::kAll
                                           : platform::ProfilerState::kCPU;
    platform::EnableProfiler(tracking_device);
  }

74
  // no matter with or without MKLDNN
L
luotao1 已提交
75
  paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
76

Y
Yan Chunwei 已提交
77
  if (config_.use_gpu) {
78 79
    PADDLE_ENFORCE_EQ(config_.use_xpu,
                      false,
80 81
                      platform::errors::InvalidArgument(
                          "Only one choice can be made between CPU and XPU."));
X
Xin Pan 已提交
82
    place_ = paddle::platform::CUDAPlace(config_.device);
83 84
  } else if (config_.use_xpu) {
    place_ = paddle::platform::XPUPlace(config_.device);
X
Xin Pan 已提交
85 86 87
  } else {
    place_ = paddle::platform::CPUPlace();
  }
T
tensor-tang 已提交
88 89 90
  if (parent_scope) {
    scope_ = parent_scope;
    sub_scope_ = &(parent_scope->NewScope());
91 92 93
    PADDLE_ENFORCE_NOT_NULL(sub_scope_,
                            platform::errors::PreconditionNotMet(
                                "The sub_scope should not be nullptr."));
94
  } else {
95
    paddle::framework::InitMemoryMethod();
96
    paddle::framework::InitDevices();
97
    paddle::framework::InitDefaultKernelSignatureMap();
98 99
    scope_.reset(new paddle::framework::Scope());
  }
100

X
Xin Pan 已提交
101
  executor_.reset(new paddle::framework::Executor(place_));
102

X
Xin Pan 已提交
103 104 105 106
  // Initialize the inference program
  if (!config_.model_dir.empty()) {
    // Parameters are saved in separate files sited in
    // the specified `dirname`.
107 108
    inference_program_ = paddle::inference::Load(
        executor_.get(), scope_.get(), config_.model_dir);
X
Xin Pan 已提交
109 110 111 112 113 114 115
  } else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
    // All parameters are saved in a single file.
    // The file names should be consistent with that used
    // in Python API `fluid.io.save_inference_model`.
    inference_program_ = paddle::inference::Load(
        executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
  } else {
Y
Yan Chunwei 已提交
116
    LOG(ERROR) << "fail to load inference model from " << config_.model_dir;
X
Xin Pan 已提交
117 118
    return false;
  }
119

X
Xin Pan 已提交
120
  ctx_ = executor_->Prepare(*inference_program_, 0);
121 122
  executor_->CreateVariables(
      *inference_program_, sub_scope_ ? sub_scope_ : scope_.get(), 0);
Y
Yan Chunwei 已提交
123

X
Xin Pan 已提交
124
  // Get the feed_target_names and fetch_target_names
125
  PrepareFeedFetch();
X
Xin Pan 已提交
126 127 128
  return true;
}

129
NativePaddlePredictor::~NativePaddlePredictor() {
130 131 132 133
  if (FLAGS_profile) {
    platform::DisableProfiler(platform::EventSortingKey::kTotal,
                              "./profile.log");
  }
134 135 136
  if (sub_scope_) {
    scope_->DeleteScope(sub_scope_);
  }
L
Luo Tao 已提交
137
}
138

Y
Yan Chunwei 已提交
139
bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
140 141
                                std::vector<PaddleTensor> *output_data,
                                int batch_size) {
F
flame 已提交
142 143 144 145 146 147 148 149 150 151
#ifndef PADDLE_ON_INFERENCE
  LOG_FIRST_N(WARNING, 5) << "The NaiveExecutor can not work properly if the "
                             "cmake flag ON_INFER is not set.";
  LOG_FIRST_N(WARNING, 5) << "Unlike the training phase, all the scopes and "
                             "variables will be reused to save the allocation "
                             "overhead.";
  LOG_FIRST_N(WARNING, 5) << "Please re-compile the inference library by "
                             "setting the cmake flag ON_INFER=ON if you are "
                             "running Paddle Inference";
#endif  // PADDLE_ON_INFERENCE
L
luotao1 已提交
152 153 154
  if (UNLIKELY(config_.cpu_math_library_num_threads() > 1)) {
    paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
  }
155
  VLOG(3) << "Predictor::predict";
X
Xin Pan 已提交
156 157 158
  Timer timer;
  timer.tic();
  // set feed variable
159 160
  framework::Scope *scope = sub_scope_ != nullptr ? sub_scope_ : scope_.get();
  if (!SetFeed(inputs, scope)) {
X
Xin Pan 已提交
161 162 163 164 165
    LOG(ERROR) << "fail to set feed";
    return false;
  }
  // Run the inference program
  // if share variables, we need not create variables
166
  VLOG(4) << "Run prepared context";
167 168
  executor_->RunPreparedContext(ctx_.get(),
                                scope,
169
                                false, /* don't create local scope each time*/
170
                                false /* don't create variable each time */);
171
  VLOG(4) << "Finish prepared context";
172 173
  // get fetch variable
  if (!GetFetch(output_data, scope)) {
174
    LOG(ERROR) << "fail to get fetches";
X
Xin Pan 已提交
175 176
    return false;
  }
M
minqiyang 已提交
177
  VLOG(3) << "predict cost: " << timer.toc() << "ms";
Y
Yan Chunwei 已提交
178

Y
Yan Chunwei 已提交
179 180 181
  // For some other vector like containers not cleaned after each batch.
  tensor_array_batch_cleaner_.CollectNoTensorVars(scope_.get());
  tensor_array_batch_cleaner_.ResetNoTensorVars();
X
Xin Pan 已提交
182 183 184
  return true;
}

185
std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone(void *stream) {
Y
Yan Chunwei 已提交
186 187
  std::lock_guard<std::mutex> lk(clone_mutex_);
  VLOG(3) << "Predictor::clone";
Y
Yan Chunwei 已提交
188
  std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
Y
Yan Chunwei 已提交
189 190
  // Hot fix the bug that result diff in multi-thread.
  // TODO(Superjomn) re-implement a real clone here.
191 192 193 194
  PADDLE_ENFORCE_NOT_NULL(
      dynamic_cast<NativePaddlePredictor *>(cls.get()),
      platform::errors::PreconditionNotMet(
          "Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
Y
Yan Chunwei 已提交
195
  if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(nullptr)) {
Y
Yan Chunwei 已提交
196
    LOG(ERROR) << "fail to call Init";
X
Xin Pan 已提交
197 198
    return nullptr;
  }
J
Fix mac  
JiabinYang 已提交
199
  return cls;
X
Xin Pan 已提交
200 201
}

Y
Yan Chunwei 已提交
202
bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
203
                                    framework::Scope *scope) {
204
  VLOG(3) << "Predictor::set_feed";
205
  if (inputs.size() != feeds_.size()) {
206 207
    LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get "
               << inputs.size();
X
Xin Pan 已提交
208 209
    return false;
  }
210 211 212 213

  // Cache the inputs memory for better concurrency performance.
  feed_tensors_.resize(inputs.size());

214
  for (size_t i = 0; i < inputs.size(); ++i) {
215
    auto &input = feed_tensors_[i];
216
    framework::DDim ddim = phi::make_ddim(inputs[i].shape);
X
Xin Pan 已提交
217 218
    void *input_ptr;
    if (inputs[i].dtype == PaddleDType::INT64) {
219
      input_ptr = input.mutable_data<int64_t>(ddim, place_);
X
Xin Pan 已提交
220
    } else if (inputs[i].dtype == PaddleDType::FLOAT32) {
221
      input_ptr = input.mutable_data<float>(ddim, place_);
222 223
    } else if (inputs[i].dtype == PaddleDType::INT32) {
      input_ptr = input.mutable_data<int32_t>(ddim, place_);
X
Xin Pan 已提交
224 225 226 227 228
    } else {
      LOG(ERROR) << "unsupported feed type " << inputs[i].dtype;
      return false;
    }

229 230 231 232 233 234 235
    PADDLE_ENFORCE_NOT_NULL(input_ptr,
                            platform::errors::InvalidArgument(
                                "The input_ptr should not be nullptr."));
    PADDLE_ENFORCE_NOT_NULL(
        inputs[i].data.data(),
        platform::errors::InvalidArgument(
            "The data of input tensor should not be null."));
236 237
    PADDLE_ENFORCE_EQ(
        inputs[i].data.length(),
238
        input.numel() * phi::SizeOf(input.dtype()),
239 240 241
        paddle::platform::errors::InvalidArgument(
            "The data contained in the input PaddleTensor had wrong length."));

242 243
    if (platform::is_cpu_place(place_)) {
      // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
244 245
      std::memcpy(static_cast<void *>(input_ptr),
                  inputs[i].data.data(),
246
                  inputs[i].data.length());
247 248
    } else if (platform::is_gpu_place(place_)) {
      PADDLE_ENFORCE_EQ(
249 250
          platform::is_xpu_place(place_),
          false,
251 252
          platform::errors::InvalidArgument(
              "Only one choice can be made between CPU and XPU."));
253
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
Q
qingqing01 已提交
254 255
      platform::DeviceContextPool &pool =
          platform::DeviceContextPool::Instance();
L
Leo Chen 已提交
256
      auto *dev_ctx = static_cast<const phi::GPUContext *>(pool.Get(place_));
257
      auto dst_gpu_place = place_;
258 259 260 261 262 263
      memory::Copy(dst_gpu_place,
                   static_cast<void *>(input_ptr),
                   platform::CPUPlace(),
                   inputs[i].data.data(),
                   inputs[i].data.length(),
                   dev_ctx->stream());
264
#else
265 266
      PADDLE_THROW(platform::errors::Unavailable(
          "Not compile with CUDA, should not reach here."));
267
#endif
W
Wilber 已提交
268
    } else if (platform::is_xpu_place(place_)) {
269
#ifdef PADDLE_WITH_XPU
270
      auto dst_xpu_place = place_;
271 272 273 274
      memory::Copy(dst_xpu_place,
                   static_cast<void *>(input_ptr),
                   platform::CPUPlace(),
                   inputs[i].data.data(),
275 276 277 278
                   inputs[i].data.length());
#else
      PADDLE_THROW(platform::errors::Unavailable(
          "Not compile with XPU, should not reach here."));
279 280 281
#endif
    }

Y
Yan Chunwei 已提交
282 283 284 285 286 287
    // TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
    framework::LoD lod;
    for (auto &level : inputs[i].lod) {
      lod.emplace_back(level);
    }
    input.set_lod(lod);
288 289
    int idx = -1;
    if (config_.specify_input_name) {
X
polish  
Xin Pan 已提交
290
      idx = feed_names_[inputs[i].name];
291
    } else {
R
Ruibiao Chen 已提交
292
      idx = PADDLE_GET_CONST(int, feeds_[i]->GetAttr("col"));
293 294
    }
    framework::SetFeedVariable(scope, input, "feed", idx);
X
Xin Pan 已提交
295 296 297
  }
  return true;
}
L
luotao1 已提交
298
template <typename T>
299
void NativePaddlePredictor::GetFetchOne(const phi::DenseTensor &fetch,
L
luotao1 已提交
300
                                        PaddleTensor *output) {
301
  // set shape.
302
  auto shape = phi::vectorize(fetch.dims());
303 304 305 306 307 308 309 310 311 312 313 314
  output->shape.assign(shape.begin(), shape.end());
  // set data.
  const T *data = fetch.data<T>();
  int num_elems = inference::VecReduceToInt(shape);
  output->data.Resize(num_elems * sizeof(T));
  // The fetched tensor output by fetch op, should always in CPU memory, so just
  // copy.
  memcpy(output->data.data(), data, num_elems * sizeof(T));
  // set lod
  output->lod.clear();
  for (auto &level : fetch.lod()) {
    output->lod.emplace_back(level.begin(), level.end());
L
luotao1 已提交
315 316
  }
}
X
Xin Pan 已提交
317

318 319
bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
                                     framework::Scope *scope) {
320
  VLOG(3) << "Predictor::get_fetch";
321 322
  outputs->resize(fetchs_.size());
  for (size_t i = 0; i < fetchs_.size(); ++i) {
R
Ruibiao Chen 已提交
323
    int idx = PADDLE_GET_CONST(int, fetchs_[i]->GetAttr("col"));
324
    PADDLE_ENFORCE_EQ(
325 326
        static_cast<size_t>(idx),
        i,
327
        platform::errors::InvalidArgument(
328 329
            "Fetch op's col attr(%d) should be equal to the index(%d)",
            idx,
330
            i));
331
    framework::FetchType &fetch_var =
332
        framework::GetFetchVariable(*scope, "fetch", idx);
333
    auto fetch = PADDLE_GET_CONST(phi::DenseTensor, fetch_var);
334
    auto type = framework::TransToProtoVarType(fetch.dtype());
L
luotao1 已提交
335
    auto output = &(outputs->at(i));
336
    output->name = fetchs_[idx]->Input("X")[0];
337
    if (type == framework::DataTypeTrait<float>::DataType()) {
L
luotao1 已提交
338 339
      GetFetchOne<float>(fetch, output);
      output->dtype = PaddleDType::FLOAT32;
340
    } else if (type == framework::DataTypeTrait<int64_t>::DataType()) {
L
luotao1 已提交
341 342
      GetFetchOne<int64_t>(fetch, output);
      output->dtype = PaddleDType::INT64;
343
    } else if (type == framework::DataTypeTrait<int32_t>::DataType()) {
344 345
      GetFetchOne<int32_t>(fetch, output);
      output->dtype = PaddleDType::INT32;
X
Xin Pan 已提交
346
    } else {
347
      LOG(ERROR) << "unknown type, only support float32, int64 and int32 now.";
Y
Yan Chunwei 已提交
348
    }
X
Xin Pan 已提交
349 350 351 352
  }
  return true;
}

353
template <>
354 355 356
std::unique_ptr<PaddlePredictor>
CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(
    const NativeConfig &config) {
W
Wilber 已提交
357 358
  // TODO(NHZlX): Should add the link to the doc of
  // paddle_infer::CreatePredictor<paddle_infer::Config>
359
  VLOG(3) << "create NativePaddlePredictor";
Y
Yan Chunwei 已提交
360
  if (config.use_gpu) {
S
Sylwester Fraczek 已提交
361
    // 1. GPU memory
362 363
    PADDLE_ENFORCE_GE(config.fraction_of_gpu_memory,
                      0.f,
364 365 366
                      platform::errors::InvalidArgument(
                          "fraction_of_gpu_memory in the config should be set "
                          "to range (0., 1.]"));
367 368
    PADDLE_ENFORCE_GE(config.device,
                      0,
369 370 371 372
                      platform::errors::PreconditionNotMet(
                          "Invalid device id %d, the device id should be "
                          "greater than or equal to 0.",
                          config.device));
Y
Yan Chunwei 已提交
373 374 375
    std::vector<std::string> flags;
    if (config.fraction_of_gpu_memory >= 0.0f ||
        config.fraction_of_gpu_memory <= 0.95f) {
376
      flags.emplace_back("dummpy");
Y
Yan Chunwei 已提交
377
      std::string flag = "--fraction_of_gpu_memory_to_use=" +
378
                         num2str<float>(config.fraction_of_gpu_memory);
Y
Yan Chunwei 已提交
379
      flags.push_back(flag);
380
      VLOG(3) << "set flag: " << flag;
Y
Yan Chunwei 已提交
381 382
      framework::InitGflags(flags);
    }
X
Xin Pan 已提交
383
  }
384

Y
Yan Chunwei 已提交
385
  std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
L
liuwei1031 已提交
386
  PADDLE_ENFORCE_NOT_NULL(
387 388 389
      dynamic_cast<NativePaddlePredictor *>(predictor.get()),
      platform::errors::PreconditionNotMet(
          "Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
T
tensor-tang 已提交
390
  if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) {
X
Xin Pan 已提交
391 392
    return nullptr;
  }
J
Fix mac  
JiabinYang 已提交
393
  return predictor;
X
Xin Pan 已提交
394 395
}

Y
Yan Chunwei 已提交
396 397 398
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<NativeConfig>(
    const NativeConfig &config) {
W
Wilber 已提交
399
  LOG(WARNING) << "Deprecated. Please use CreatePredictor instead.";
Y
Yan Chunwei 已提交
400 401 402
  return CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
}

X
Xin Pan 已提交
403
}  // namespace paddle