api_impl.cc 13.9 KB
Newer Older
X
Xin Pan 已提交
1 2
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Y
Yan Chunwei 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
X
Xin Pan 已提交
6

Y
Yan Chunwei 已提交
7
http://www.apache.org/licenses/LICENSE-2.0
X
Xin Pan 已提交
8

Y
Yan Chunwei 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
X
Xin Pan 已提交
14

F
flame 已提交
15
#include <glog/logging.h>
X
Xin Pan 已提交
16 17
#include <algorithm>
#include <map>
W
Wilber 已提交
18
#include <memory>
X
Xin Pan 已提交
19 20 21 22 23 24
#include <set>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

25
#include "paddle/fluid/framework/feed_fetch_method.h"
L
Luo Tao 已提交
26
#include "paddle/fluid/inference/api/api_impl.h"
Y
Yan Chunwei 已提交
27
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
28
#include "paddle/fluid/inference/api/helper.h"
W
Wilber 已提交
29
#include "paddle/fluid/inference/api/paddle_inference_api.h"
30
#include "paddle/fluid/memory/memcpy.h"
31
#include "paddle/fluid/platform/cpu_helper.h"
32 33 34
#include "paddle/fluid/platform/profiler.h"

DEFINE_bool(profile, false, "Turn on profiler for fluid");
X
Xin Pan 已提交
35 36

namespace paddle {
37 38 39 40 41 42 43 44 45 46
namespace {
using paddle::inference::Timer;

template <class T>
std::string num2str(T a) {
  std::stringstream istr;
  istr << a;
  return istr.str();
}
}  // namespace
X
Xin Pan 已提交
47

48 49 50
void NativePaddlePredictor::PrepareFeedFetch() {
  for (auto *op : inference_program_->Block(0).AllOps()) {
    if (op->Type() == "feed") {
51
      int idx = BOOST_GET_CONST(int, op->GetAttr("col"));
T
tensor-tang 已提交
52
      if (feeds_.size() <= static_cast<size_t>(idx)) {
53 54 55 56 57
        feeds_.resize(idx + 1);
      }
      feeds_[idx] = op;
      feed_names_[op->Output("Out")[0]] = idx;
    } else if (op->Type() == "fetch") {
58
      int idx = BOOST_GET_CONST(int, op->GetAttr("col"));
T
tensor-tang 已提交
59
      if (fetchs_.size() <= static_cast<size_t>(idx)) {
60 61 62 63 64 65 66
        fetchs_.resize(idx + 1);
      }
      fetchs_[idx] = op;
    }
  }
}

T
tensor-tang 已提交
67 68
bool NativePaddlePredictor::Init(
    std::shared_ptr<framework::Scope> parent_scope) {
69
  VLOG(3) << "Predictor::init()";
70 71 72 73 74 75 76 77 78
  if (FLAGS_profile) {
    LOG(WARNING) << "Profiler is actived, might affect the performance";
    LOG(INFO) << "You can turn off by set gflags '-profile false'";

    auto tracking_device = config_.use_gpu ? platform::ProfilerState::kAll
                                           : platform::ProfilerState::kCPU;
    platform::EnableProfiler(tracking_device);
  }

79
  // no matter with or without MKLDNN
L
luotao1 已提交
80
  paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
81

Y
Yan Chunwei 已提交
82
  if (config_.use_gpu) {
X
Xin Pan 已提交
83 84 85 86
    place_ = paddle::platform::CUDAPlace(config_.device);
  } else {
    place_ = paddle::platform::CPUPlace();
  }
T
tensor-tang 已提交
87 88 89
  if (parent_scope) {
    scope_ = parent_scope;
    sub_scope_ = &(parent_scope->NewScope());
90 91 92
    PADDLE_ENFORCE_NOT_NULL(sub_scope_,
                            platform::errors::PreconditionNotMet(
                                "The sub_scope should not be nullptr."));
93 94 95 96
  } else {
    paddle::framework::InitDevices(false);
    scope_.reset(new paddle::framework::Scope());
  }
97

X
Xin Pan 已提交
98
  executor_.reset(new paddle::framework::Executor(place_));
99

X
Xin Pan 已提交
100 101 102 103
  // Initialize the inference program
  if (!config_.model_dir.empty()) {
    // Parameters are saved in separate files sited in
    // the specified `dirname`.
104 105
    inference_program_ = paddle::inference::Load(executor_.get(), scope_.get(),
                                                 config_.model_dir);
X
Xin Pan 已提交
106 107 108 109 110 111 112
  } else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
    // All parameters are saved in a single file.
    // The file names should be consistent with that used
    // in Python API `fluid.io.save_inference_model`.
    inference_program_ = paddle::inference::Load(
        executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
  } else {
Y
Yan Chunwei 已提交
113
    LOG(ERROR) << "fail to load inference model from " << config_.model_dir;
X
Xin Pan 已提交
114 115
    return false;
  }
116

X
Xin Pan 已提交
117
  ctx_ = executor_->Prepare(*inference_program_, 0);
118 119
  executor_->CreateVariables(*inference_program_,
                             sub_scope_ ? sub_scope_ : scope_.get(), 0);
Y
Yan Chunwei 已提交
120

X
Xin Pan 已提交
121
  // Get the feed_target_names and fetch_target_names
122
  PrepareFeedFetch();
X
Xin Pan 已提交
123 124 125
  return true;
}

126
NativePaddlePredictor::~NativePaddlePredictor() {
127 128 129 130
  if (FLAGS_profile) {
    platform::DisableProfiler(platform::EventSortingKey::kTotal,
                              "./profile.log");
  }
131 132 133
  if (sub_scope_) {
    scope_->DeleteScope(sub_scope_);
  }
L
Luo Tao 已提交
134
}
135

Y
Yan Chunwei 已提交
136
bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
137 138
                                std::vector<PaddleTensor> *output_data,
                                int batch_size) {
F
flame 已提交
139 140 141 142 143 144 145 146 147 148
#ifndef PADDLE_ON_INFERENCE
  LOG_FIRST_N(WARNING, 5) << "The NaiveExecutor can not work properly if the "
                             "cmake flag ON_INFER is not set.";
  LOG_FIRST_N(WARNING, 5) << "Unlike the training phase, all the scopes and "
                             "variables will be reused to save the allocation "
                             "overhead.";
  LOG_FIRST_N(WARNING, 5) << "Please re-compile the inference library by "
                             "setting the cmake flag ON_INFER=ON if you are "
                             "running Paddle Inference";
#endif  // PADDLE_ON_INFERENCE
L
luotao1 已提交
149 150 151
  if (UNLIKELY(config_.cpu_math_library_num_threads() > 1)) {
    paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
  }
152
  VLOG(3) << "Predictor::predict";
X
Xin Pan 已提交
153 154 155
  Timer timer;
  timer.tic();
  // set feed variable
156 157
  framework::Scope *scope = sub_scope_ != nullptr ? sub_scope_ : scope_.get();
  if (!SetFeed(inputs, scope)) {
X
Xin Pan 已提交
158 159 160 161 162
    LOG(ERROR) << "fail to set feed";
    return false;
  }
  // Run the inference program
  // if share variables, we need not create variables
163
  VLOG(4) << "Run prepared context";
164 165
  executor_->RunPreparedContext(ctx_.get(), scope,
                                false, /* don't create local scope each time*/
166
                                false /* don't create variable each time */);
167
  VLOG(4) << "Finish prepared context";
168 169
  // get fetch variable
  if (!GetFetch(output_data, scope)) {
170
    LOG(ERROR) << "fail to get fetches";
X
Xin Pan 已提交
171 172
    return false;
  }
M
minqiyang 已提交
173
  VLOG(3) << "predict cost: " << timer.toc() << "ms";
Y
Yan Chunwei 已提交
174

Y
Yan Chunwei 已提交
175 176 177
  // For some other vector like containers not cleaned after each batch.
  tensor_array_batch_cleaner_.CollectNoTensorVars(scope_.get());
  tensor_array_batch_cleaner_.ResetNoTensorVars();
X
Xin Pan 已提交
178 179 180
  return true;
}

Y
Yan Chunwei 已提交
181
std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
Y
Yan Chunwei 已提交
182 183
  std::lock_guard<std::mutex> lk(clone_mutex_);
  VLOG(3) << "Predictor::clone";
Y
Yan Chunwei 已提交
184
  std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
Y
Yan Chunwei 已提交
185 186
  // Hot fix the bug that result diff in multi-thread.
  // TODO(Superjomn) re-implement a real clone here.
187 188 189 190
  PADDLE_ENFORCE_NOT_NULL(
      dynamic_cast<NativePaddlePredictor *>(cls.get()),
      platform::errors::PreconditionNotMet(
          "Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
Y
Yan Chunwei 已提交
191
  if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(nullptr)) {
Y
Yan Chunwei 已提交
192
    LOG(ERROR) << "fail to call Init";
X
Xin Pan 已提交
193 194
    return nullptr;
  }
Y
Yan Chunwei 已提交
195

J
Fix mac  
JiabinYang 已提交
196 197 198 199
#ifdef __clang__
  // fix clang compile error
  return cls;
#else
200 201
  // fix manylinux compile error.
  return std::move(cls);
J
Fix mac  
JiabinYang 已提交
202
#endif
X
Xin Pan 已提交
203 204
}

Y
Yan Chunwei 已提交
205
bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
206
                                    framework::Scope *scope) {
207
  VLOG(3) << "Predictor::set_feed";
208
  if (inputs.size() != feeds_.size()) {
209 210
    LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get "
               << inputs.size();
X
Xin Pan 已提交
211 212
    return false;
  }
213 214 215 216

  // Cache the inputs memory for better concurrency performance.
  feed_tensors_.resize(inputs.size());

217
  for (size_t i = 0; i < inputs.size(); ++i) {
218
    auto &input = feed_tensors_[i];
219
    framework::DDim ddim = framework::make_ddim(inputs[i].shape);
X
Xin Pan 已提交
220 221
    void *input_ptr;
    if (inputs[i].dtype == PaddleDType::INT64) {
222
      input_ptr = input.mutable_data<int64_t>(ddim, place_);
X
Xin Pan 已提交
223
    } else if (inputs[i].dtype == PaddleDType::FLOAT32) {
224
      input_ptr = input.mutable_data<float>(ddim, place_);
225 226
    } else if (inputs[i].dtype == PaddleDType::INT32) {
      input_ptr = input.mutable_data<int32_t>(ddim, place_);
X
Xin Pan 已提交
227 228 229 230 231
    } else {
      LOG(ERROR) << "unsupported feed type " << inputs[i].dtype;
      return false;
    }

232 233 234 235 236 237 238
    PADDLE_ENFORCE_NOT_NULL(input_ptr,
                            platform::errors::InvalidArgument(
                                "The input_ptr should not be nullptr."));
    PADDLE_ENFORCE_NOT_NULL(
        inputs[i].data.data(),
        platform::errors::InvalidArgument(
            "The data of input tensor should not be null."));
239 240 241 242 243 244
    if (platform::is_cpu_place(place_)) {
      // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
      std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
                  inputs[i].data.length());
    } else {
#ifdef PADDLE_WITH_CUDA
Q
qingqing01 已提交
245 246 247 248
      platform::DeviceContextPool &pool =
          platform::DeviceContextPool::Instance();
      auto *dev_ctx =
          static_cast<const platform::CUDADeviceContext *>(pool.Get(place_));
249
      auto dst_gpu_place = BOOST_GET_CONST(platform::CUDAPlace, place_);
250 251
      memory::Copy(dst_gpu_place, static_cast<void *>(input_ptr),
                   platform::CPUPlace(), inputs[i].data.data(),
Q
qingqing01 已提交
252
                   inputs[i].data.length(), dev_ctx->stream());
253
#else
254 255
      PADDLE_THROW(platform::errors::Unavailable(
          "Not compile with CUDA, should not reach here."));
256 257 258
#endif
    }

Y
Yan Chunwei 已提交
259 260 261 262 263 264
    // TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
    framework::LoD lod;
    for (auto &level : inputs[i].lod) {
      lod.emplace_back(level);
    }
    input.set_lod(lod);
265 266
    int idx = -1;
    if (config_.specify_input_name) {
X
polish  
Xin Pan 已提交
267
      idx = feed_names_[inputs[i].name];
268
    } else {
269
      idx = BOOST_GET_CONST(int, feeds_[i]->GetAttr("col"));
270 271
    }
    framework::SetFeedVariable(scope, input, "feed", idx);
X
Xin Pan 已提交
272 273 274
  }
  return true;
}
L
luotao1 已提交
275 276 277
template <typename T>
void NativePaddlePredictor::GetFetchOne(const framework::LoDTensor &fetch,
                                        PaddleTensor *output) {
278 279 280 281 282 283 284 285 286 287 288 289 290 291
  // set shape.
  auto shape = framework::vectorize(fetch.dims());
  output->shape.assign(shape.begin(), shape.end());
  // set data.
  const T *data = fetch.data<T>();
  int num_elems = inference::VecReduceToInt(shape);
  output->data.Resize(num_elems * sizeof(T));
  // The fetched tensor output by fetch op, should always in CPU memory, so just
  // copy.
  memcpy(output->data.data(), data, num_elems * sizeof(T));
  // set lod
  output->lod.clear();
  for (auto &level : fetch.lod()) {
    output->lod.emplace_back(level.begin(), level.end());
L
luotao1 已提交
292 293
  }
}
X
Xin Pan 已提交
294

295 296
bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
                                     framework::Scope *scope) {
297
  VLOG(3) << "Predictor::get_fetch";
298 299
  outputs->resize(fetchs_.size());
  for (size_t i = 0; i < fetchs_.size(); ++i) {
300
    int idx = BOOST_GET_CONST(int, fetchs_[i]->GetAttr("col"));
301 302 303 304 305
    PADDLE_ENFORCE_EQ(
        static_cast<size_t>(idx), i,
        platform::errors::InvalidArgument(
            "Fetch op's col attr(%d) should be equal to the index(%d)", idx,
            i));
306
    framework::FetchType &fetch_var =
307
        framework::GetFetchVariable(*scope, "fetch", idx);
308
    auto fetch = BOOST_GET_CONST(framework::LoDTensor, fetch_var);
L
luotao1 已提交
309 310
    auto type = fetch.type();
    auto output = &(outputs->at(i));
311
    output->name = fetchs_[idx]->Input("X")[0];
312
    if (type == framework::DataTypeTrait<float>::DataType()) {
L
luotao1 已提交
313 314
      GetFetchOne<float>(fetch, output);
      output->dtype = PaddleDType::FLOAT32;
315
    } else if (type == framework::DataTypeTrait<int64_t>::DataType()) {
L
luotao1 已提交
316 317
      GetFetchOne<int64_t>(fetch, output);
      output->dtype = PaddleDType::INT64;
318
    } else if (type == framework::DataTypeTrait<int32_t>::DataType()) {
319 320
      GetFetchOne<int32_t>(fetch, output);
      output->dtype = PaddleDType::INT32;
X
Xin Pan 已提交
321
    } else {
322
      LOG(ERROR) << "unknown type, only support float32, int64 and int32 now.";
Y
Yan Chunwei 已提交
323
    }
X
Xin Pan 已提交
324 325 326 327
  }
  return true;
}

328
template <>
329 330
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
    NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) {
W
Wilber 已提交
331 332
  // TODO(NHZlX): Should add the link to the doc of
  // paddle_infer::CreatePredictor<paddle_infer::Config>
333
  VLOG(3) << "create NativePaddlePredictor";
Y
Yan Chunwei 已提交
334
  if (config.use_gpu) {
S
Sylwester Fraczek 已提交
335
    // 1. GPU memory
336 337 338 339 340 341 342 343 344
    PADDLE_ENFORCE_GE(config.fraction_of_gpu_memory, 0.f,
                      platform::errors::InvalidArgument(
                          "fraction_of_gpu_memory in the config should be set "
                          "to range (0., 1.]"));
    PADDLE_ENFORCE_GE(config.device, 0,
                      platform::errors::PreconditionNotMet(
                          "Invalid device id %d, the device id should be "
                          "greater than or equal to 0.",
                          config.device));
Y
Yan Chunwei 已提交
345 346 347 348 349
    std::vector<std::string> flags;
    if (config.fraction_of_gpu_memory >= 0.0f ||
        config.fraction_of_gpu_memory <= 0.95f) {
      flags.push_back("dummpy");
      std::string flag = "--fraction_of_gpu_memory_to_use=" +
350
                         num2str<float>(config.fraction_of_gpu_memory);
Y
Yan Chunwei 已提交
351
      flags.push_back(flag);
352
      VLOG(3) << "set flag: " << flag;
Y
Yan Chunwei 已提交
353 354
      framework::InitGflags(flags);
    }
X
Xin Pan 已提交
355
  }
356

Y
Yan Chunwei 已提交
357
  std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
L
liuwei1031 已提交
358
  PADDLE_ENFORCE_NOT_NULL(
359 360 361
      dynamic_cast<NativePaddlePredictor *>(predictor.get()),
      platform::errors::PreconditionNotMet(
          "Dynamic_cast from PaddlePredictor to NativePaddlePredictor failed"));
T
tensor-tang 已提交
362
  if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) {
X
Xin Pan 已提交
363 364
    return nullptr;
  }
J
Fix mac  
JiabinYang 已提交
365
#ifdef __clang__
J
Jiabin Yang 已提交
366
  // fix clang compile error
J
Fix mac  
JiabinYang 已提交
367 368
  return predictor;
#else
369
  return std::move(predictor);
J
Fix mac  
JiabinYang 已提交
370
#endif
X
Xin Pan 已提交
371 372
}

Y
Yan Chunwei 已提交
373 374 375
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<NativeConfig>(
    const NativeConfig &config) {
W
Wilber 已提交
376
  LOG(WARNING) << "Deprecated. Please use CreatePredictor instead.";
Y
Yan Chunwei 已提交
377 378 379
  return CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
}

X
Xin Pan 已提交
380
}  // namespace paddle