api_impl.cc 14.5 KB
Newer Older
X
Xin Pan 已提交
1 2
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Y
Yan Chunwei 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
X
Xin Pan 已提交
6

Y
Yan Chunwei 已提交
7
http://www.apache.org/licenses/LICENSE-2.0
X
Xin Pan 已提交
8

Y
Yan Chunwei 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
X
Xin Pan 已提交
14

F
flame 已提交
15
#include <glog/logging.h>
W
Wilber 已提交
16
#include <memory>
X
Xin Pan 已提交
17 18 19
#include <sstream>
#include <string>

20
#include "paddle/fluid/framework/feed_fetch_method.h"
L
Luo Tao 已提交
21
#include "paddle/fluid/inference/api/api_impl.h"
22
#include "paddle/fluid/inference/api/helper.h"
23
#include "paddle/fluid/platform/cpu_helper.h"
24 25 26
#include "paddle/fluid/platform/profiler.h"

DEFINE_bool(profile, false, "Turn on profiler for fluid");
X
Xin Pan 已提交
27 28

namespace paddle {
29 30 31 32 33 34 35 36 37 38
namespace {
using paddle::inference::Timer;

template <class T>
std::string num2str(T a) {
  std::stringstream istr;
  istr << a;
  return istr.str();
}
}  // namespace
X
Xin Pan 已提交
39

40 41 42
void NativePaddlePredictor::PrepareFeedFetch() {
  for (auto *op : inference_program_->Block(0).AllOps()) {
    if (op->Type() == "feed") {
43
      int idx = BOOST_GET_CONST(int, op->GetAttr("col"));
T
tensor-tang 已提交
44
      if (feeds_.size() <= static_cast<size_t>(idx)) {
45 46 47 48 49
        feeds_.resize(idx + 1);
      }
      feeds_[idx] = op;
      feed_names_[op->Output("Out")[0]] = idx;
    } else if (op->Type() == "fetch") {
50
      int idx = BOOST_GET_CONST(int, op->GetAttr("col"));
T
tensor-tang 已提交
51
      if (fetchs_.size() <= static_cast<size_t>(idx)) {
52 53 54 55 56 57 58
        fetchs_.resize(idx + 1);
      }
      fetchs_[idx] = op;
    }
  }
}

T
tensor-tang 已提交
59 60
bool NativePaddlePredictor::Init(
    std::shared_ptr<framework::Scope> parent_scope) {
61
  VLOG(3) << "Predictor::init()";
62 63 64 65 66 67 68 69 70
  if (FLAGS_profile) {
    LOG(WARNING) << "Profiler is actived, might affect the performance";
    LOG(INFO) << "You can turn off by set gflags '-profile false'";

    auto tracking_device = config_.use_gpu ? platform::ProfilerState::kAll
                                           : platform::ProfilerState::kCPU;
    platform::EnableProfiler(tracking_device);
  }

71
  // no matter with or without MKLDNN
L
luotao1 已提交
72
  paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
73

Y
Yan Chunwei 已提交
74
  if (config_.use_gpu) {
75 76 77
    PADDLE_ENFORCE_EQ(config_.use_xpu, false,
                      platform::errors::InvalidArgument(
                          "Only one choice can be made between CPU and XPU."));
X
Xin Pan 已提交
78
    place_ = paddle::platform::CUDAPlace(config_.device);
79 80
  } else if (config_.use_xpu) {
    place_ = paddle::platform::XPUPlace(config_.device);
X
Xin Pan 已提交
81 82 83
  } else {
    place_ = paddle::platform::CPUPlace();
  }
T
tensor-tang 已提交
84 85 86
  if (parent_scope) {
    scope_ = parent_scope;
    sub_scope_ = &(parent_scope->NewScope());
87 88 89
    PADDLE_ENFORCE_NOT_NULL(sub_scope_,
                            platform::errors::PreconditionNotMet(
                                "The sub_scope should not be nullptr."));
90
  } else {
91
    paddle::framework::InitDevices();
92 93
    scope_.reset(new paddle::framework::Scope());
  }
94

X
Xin Pan 已提交
95
  executor_.reset(new paddle::framework::Executor(place_));
96

X
Xin Pan 已提交
97 98 99 100
  // Initialize the inference program
  if (!config_.model_dir.empty()) {
    // Parameters are saved in separate files sited in
    // the specified `dirname`.
101 102
    inference_program_ = paddle::inference::Load(executor_.get(), scope_.get(),
                                                 config_.model_dir);
X
Xin Pan 已提交
103 104 105 106 107 108 109
  } else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
    // All parameters are saved in a single file.
    // The file names should be consistent with that used
    // in Python API `fluid.io.save_inference_model`.
    inference_program_ = paddle::inference::Load(
        executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
  } else {
Y
Yan Chunwei 已提交
110
    LOG(ERROR) << "fail to load inference model from " << config_.model_dir;
X
Xin Pan 已提交
111 112
    return false;
  }
113

X
Xin Pan 已提交
114
  ctx_ = executor_->Prepare(*inference_program_, 0);
115 116
  executor_->CreateVariables(*inference_program_,
                             sub_scope_ ? sub_scope_ : scope_.get(), 0);
Y
Yan Chunwei 已提交
117

X
Xin Pan 已提交
118
  // Get the feed_target_names and fetch_target_names
119
  PrepareFeedFetch();
X
Xin Pan 已提交
120 121 122
  return true;
}

123
NativePaddlePredictor::~NativePaddlePredictor() {
124 125 126 127
  if (FLAGS_profile) {
    platform::DisableProfiler(platform::EventSortingKey::kTotal,
                              "./profile.log");
  }
128 129 130
  if (sub_scope_) {
    scope_->DeleteScope(sub_scope_);
  }
L
Luo Tao 已提交
131
}
132

Y
Yan Chunwei 已提交
133
bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
134 135
                                std::vector<PaddleTensor> *output_data,
                                int batch_size) {
F
flame 已提交
136 137 138 139 140 141 142 143 144 145
#ifndef PADDLE_ON_INFERENCE
  LOG_FIRST_N(WARNING, 5) << "The NaiveExecutor can not work properly if the "
                             "cmake flag ON_INFER is not set.";
  LOG_FIRST_N(WARNING, 5) << "Unlike the training phase, all the scopes and "
                             "variables will be reused to save the allocation "
                             "overhead.";
  LOG_FIRST_N(WARNING, 5) << "Please re-compile the inference library by "
                             "setting the cmake flag ON_INFER=ON if you are "
                             "running Paddle Inference";
#endif  // PADDLE_ON_INFERENCE
L
luotao1 已提交
146 147 148
  if (UNLIKELY(config_.cpu_math_library_num_threads() > 1)) {
    paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
  }
149
  VLOG(3) << "Predictor::predict";
X
Xin Pan 已提交
150 151 152
  Timer timer;
  timer.tic();
  // set feed variable
153 154
  framework::Scope *scope = sub_scope_ != nullptr ? sub_scope_ : scope_.get();
  if (!SetFeed(inputs, scope)) {
X
Xin Pan 已提交
155 156 157 158 159
    LOG(ERROR) << "fail to set feed";
    return false;
  }
  // Run the inference program
  // if share variables, we need not create variables
160
  VLOG(4) << "Run prepared context";
161 162
  executor_->RunPreparedContext(ctx_.get(), scope,
                                false, /* don't create local scope each time*/
163
                                false /* don't create variable each time */);
164
  VLOG(4) << "Finish prepared context";
165 166
  // get fetch variable
  if (!GetFetch(output_data, scope)) {
167
    LOG(ERROR) << "fail to get fetches";
X
Xin Pan 已提交
168 169
    return false;
  }
M
minqiyang 已提交
170
  VLOG(3) << "predict cost: " << timer.toc() << "ms";
Y
Yan Chunwei 已提交
171

Y
Yan Chunwei 已提交
172 173 174
  // For some other vector like containers not cleaned after each batch.
  tensor_array_batch_cleaner_.CollectNoTensorVars(scope_.get());
  tensor_array_batch_cleaner_.ResetNoTensorVars();
X
Xin Pan 已提交
175 176 177
  return true;
}

Y
Yan Chunwei 已提交
178
std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
Y
Yan Chunwei 已提交
179 180
  std::lock_guard<std::mutex> lk(clone_mutex_);
  VLOG(3) << "Predictor::clone";
Y
Yan Chunwei 已提交
181
  std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
Y
Yan Chunwei 已提交
182 183
  // Hot fix the bug that result diff in multi-thread.
  // TODO(Superjomn) re-implement a real clone here.
184 185 186 187
  PADDLE_ENFORCE_NOT_NULL(
      dynamic_cast<NativePaddlePredictor *>(cls.get()),
      platform::errors::PreconditionNotMet(
          "Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
Y
Yan Chunwei 已提交
188
  if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(nullptr)) {
Y
Yan Chunwei 已提交
189
    LOG(ERROR) << "fail to call Init";
X
Xin Pan 已提交
190 191
    return nullptr;
  }
Y
Yan Chunwei 已提交
192

J
Fix mac  
JiabinYang 已提交
193 194 195 196
#ifdef __clang__
  // fix clang compile error
  return cls;
#else
197 198
  // fix manylinux compile error.
  return std::move(cls);
J
Fix mac  
JiabinYang 已提交
199
#endif
X
Xin Pan 已提交
200 201
}

Y
Yan Chunwei 已提交
202
bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
203
                                    framework::Scope *scope) {
204
  VLOG(3) << "Predictor::set_feed";
205
  if (inputs.size() != feeds_.size()) {
206 207
    LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get "
               << inputs.size();
X
Xin Pan 已提交
208 209
    return false;
  }
210 211 212 213

  // Cache the inputs memory for better concurrency performance.
  feed_tensors_.resize(inputs.size());

214
  for (size_t i = 0; i < inputs.size(); ++i) {
215
    auto &input = feed_tensors_[i];
216
    framework::DDim ddim = framework::make_ddim(inputs[i].shape);
X
Xin Pan 已提交
217 218
    void *input_ptr;
    if (inputs[i].dtype == PaddleDType::INT64) {
219
      input_ptr = input.mutable_data<int64_t>(ddim, place_);
X
Xin Pan 已提交
220
    } else if (inputs[i].dtype == PaddleDType::FLOAT32) {
221
      input_ptr = input.mutable_data<float>(ddim, place_);
222 223
    } else if (inputs[i].dtype == PaddleDType::INT32) {
      input_ptr = input.mutable_data<int32_t>(ddim, place_);
X
Xin Pan 已提交
224 225 226 227 228
    } else {
      LOG(ERROR) << "unsupported feed type " << inputs[i].dtype;
      return false;
    }

229 230 231 232 233 234 235
    PADDLE_ENFORCE_NOT_NULL(input_ptr,
                            platform::errors::InvalidArgument(
                                "The input_ptr should not be nullptr."));
    PADDLE_ENFORCE_NOT_NULL(
        inputs[i].data.data(),
        platform::errors::InvalidArgument(
            "The data of input tensor should not be null."));
236 237 238 239
    if (platform::is_cpu_place(place_)) {
      // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
      std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
                  inputs[i].data.length());
240 241 242 243 244
    } else if (platform::is_gpu_place(place_)) {
      PADDLE_ENFORCE_EQ(
          platform::is_xpu_place(place_), false,
          platform::errors::InvalidArgument(
              "Only one choice can be made between CPU and XPU."));
245
#ifdef PADDLE_WITH_CUDA
Q
qingqing01 已提交
246 247 248 249
      platform::DeviceContextPool &pool =
          platform::DeviceContextPool::Instance();
      auto *dev_ctx =
          static_cast<const platform::CUDADeviceContext *>(pool.Get(place_));
250
      auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, place_);
251 252
      memory::Copy(dst_gpu_place, static_cast<void *>(input_ptr),
                   platform::CPUPlace(), inputs[i].data.data(),
Q
qingqing01 已提交
253
                   inputs[i].data.length(), dev_ctx->stream());
254
#else
255 256
      PADDLE_THROW(platform::errors::Unavailable(
          "Not compile with CUDA, should not reach here."));
257 258 259 260 261 262 263 264 265 266
#endif
    } else {
#ifdef PADDLE_WITH_XPU
      auto dst_xpu_place = BOOST_GET_CONST(platform::XPUPlace, place_);
      memory::Copy(dst_xpu_place, static_cast<void *>(input_ptr),
                   platform::CPUPlace(), inputs[i].data.data(),
                   inputs[i].data.length());
#else
      PADDLE_THROW(platform::errors::Unavailable(
          "Not compile with XPU, should not reach here."));
267 268 269
#endif
    }

Y
Yan Chunwei 已提交
270 271 272 273 274 275
    // TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
    framework::LoD lod;
    for (auto &level : inputs[i].lod) {
      lod.emplace_back(level);
    }
    input.set_lod(lod);
276 277
    int idx = -1;
    if (config_.specify_input_name) {
X
polish  
Xin Pan 已提交
278
      idx = feed_names_[inputs[i].name];
279
    } else {
280
      idx = BOOST_GET_CONST(int, feeds_[i]->GetAttr("col"));
281 282
    }
    framework::SetFeedVariable(scope, input, "feed", idx);
X
Xin Pan 已提交
283 284 285
  }
  return true;
}
L
luotao1 已提交
286 287 288
template <typename T>
void NativePaddlePredictor::GetFetchOne(const framework::LoDTensor &fetch,
                                        PaddleTensor *output) {
289 290 291 292 293 294 295 296 297 298 299 300 301 302
  // set shape.
  auto shape = framework::vectorize(fetch.dims());
  output->shape.assign(shape.begin(), shape.end());
  // set data.
  const T *data = fetch.data<T>();
  int num_elems = inference::VecReduceToInt(shape);
  output->data.Resize(num_elems * sizeof(T));
  // The fetched tensor output by fetch op, should always in CPU memory, so just
  // copy.
  memcpy(output->data.data(), data, num_elems * sizeof(T));
  // set lod
  output->lod.clear();
  for (auto &level : fetch.lod()) {
    output->lod.emplace_back(level.begin(), level.end());
L
luotao1 已提交
303 304
  }
}
X
Xin Pan 已提交
305

306 307
bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
                                     framework::Scope *scope) {
308
  VLOG(3) << "Predictor::get_fetch";
309 310
  outputs->resize(fetchs_.size());
  for (size_t i = 0; i < fetchs_.size(); ++i) {
311
    int idx = BOOST_GET_CONST(int, fetchs_[i]->GetAttr("col"));
312 313 314 315 316
    PADDLE_ENFORCE_EQ(
        static_cast<size_t>(idx), i,
        platform::errors::InvalidArgument(
            "Fetch op's col attr(%d) should be equal to the index(%d)", idx,
            i));
317
    framework::FetchType &fetch_var =
318
        framework::GetFetchVariable(*scope, "fetch", idx);
319
    auto fetch = BOOST_GET_CONST(framework::LoDTensor, fetch_var);
L
luotao1 已提交
320 321
    auto type = fetch.type();
    auto output = &(outputs->at(i));
322
    output->name = fetchs_[idx]->Input("X")[0];
323
    if (type == framework::DataTypeTrait<float>::DataType()) {
L
luotao1 已提交
324 325
      GetFetchOne<float>(fetch, output);
      output->dtype = PaddleDType::FLOAT32;
326
    } else if (type == framework::DataTypeTrait<int64_t>::DataType()) {
L
luotao1 已提交
327 328
      GetFetchOne<int64_t>(fetch, output);
      output->dtype = PaddleDType::INT64;
329
    } else if (type == framework::DataTypeTrait<int32_t>::DataType()) {
330 331
      GetFetchOne<int32_t>(fetch, output);
      output->dtype = PaddleDType::INT32;
X
Xin Pan 已提交
332
    } else {
333
      LOG(ERROR) << "unknown type, only support float32, int64 and int32 now.";
Y
Yan Chunwei 已提交
334
    }
X
Xin Pan 已提交
335 336 337 338
  }
  return true;
}

339
template <>
340 341
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
    NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) {
W
Wilber 已提交
342 343
  // TODO(NHZlX): Should add the link to the doc of
  // paddle_infer::CreatePredictor<paddle_infer::Config>
344
  VLOG(3) << "create NativePaddlePredictor";
Y
Yan Chunwei 已提交
345
  if (config.use_gpu) {
S
Sylwester Fraczek 已提交
346
    // 1. GPU memory
347 348 349 350 351 352 353 354 355
    PADDLE_ENFORCE_GE(config.fraction_of_gpu_memory, 0.f,
                      platform::errors::InvalidArgument(
                          "fraction_of_gpu_memory in the config should be set "
                          "to range (0., 1.]"));
    PADDLE_ENFORCE_GE(config.device, 0,
                      platform::errors::PreconditionNotMet(
                          "Invalid device id %d, the device id should be "
                          "greater than or equal to 0.",
                          config.device));
Y
Yan Chunwei 已提交
356 357 358 359 360
    std::vector<std::string> flags;
    if (config.fraction_of_gpu_memory >= 0.0f ||
        config.fraction_of_gpu_memory <= 0.95f) {
      flags.push_back("dummpy");
      std::string flag = "--fraction_of_gpu_memory_to_use=" +
361
                         num2str<float>(config.fraction_of_gpu_memory);
Y
Yan Chunwei 已提交
362
      flags.push_back(flag);
363
      VLOG(3) << "set flag: " << flag;
Y
Yan Chunwei 已提交
364 365
      framework::InitGflags(flags);
    }
X
Xin Pan 已提交
366
  }
367

Y
Yan Chunwei 已提交
368
  std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
L
liuwei1031 已提交
369
  PADDLE_ENFORCE_NOT_NULL(
370 371 372
      dynamic_cast<NativePaddlePredictor *>(predictor.get()),
      platform::errors::PreconditionNotMet(
          "Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
T
tensor-tang 已提交
373
  if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) {
X
Xin Pan 已提交
374 375
    return nullptr;
  }
J
Fix mac  
JiabinYang 已提交
376
#ifdef __clang__
J
Jiabin Yang 已提交
377
  // fix clang compile error
J
Fix mac  
JiabinYang 已提交
378 379
  return predictor;
#else
380
  return std::move(predictor);
J
Fix mac  
JiabinYang 已提交
381
#endif
X
Xin Pan 已提交
382 383
}

Y
Yan Chunwei 已提交
384 385 386
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<NativeConfig>(
    const NativeConfig &config) {
W
Wilber 已提交
387
  LOG(WARNING) << "Deprecated. Please use CreatePredictor instead.";
Y
Yan Chunwei 已提交
388 389 390
  return CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
}

X
Xin Pan 已提交
391
}  // namespace paddle