api_impl.cc 12.5 KB
Newer Older
X
Xin Pan 已提交
1 2
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Y
Yan Chunwei 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
X
Xin Pan 已提交
6

Y
Yan Chunwei 已提交
7
http://www.apache.org/licenses/LICENSE-2.0
X
Xin Pan 已提交
8

Y
Yan Chunwei 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
X
Xin Pan 已提交
14

F
flame 已提交
15
#include <glog/logging.h>
X
Xin Pan 已提交
16 17 18 19 20 21 22 23
#include <algorithm>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

24
#include "paddle/fluid/framework/feed_fetch_method.h"
L
Luo Tao 已提交
25
#include "paddle/fluid/inference/api/api_impl.h"
Y
Yan Chunwei 已提交
26
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
27
#include "paddle/fluid/inference/api/helper.h"
28
#include "paddle/fluid/memory/memcpy.h"
29
#include "paddle/fluid/platform/cpu_helper.h"
30 31 32
#include "paddle/fluid/platform/profiler.h"

DEFINE_bool(profile, false, "Turn on profiler for fluid");
X
Xin Pan 已提交
33 34

namespace paddle {
35 36 37 38 39 40 41 42 43 44
namespace {
using paddle::inference::Timer;

template <class T>
std::string num2str(T a) {
  std::stringstream istr;
  istr << a;
  return istr.str();
}
}  // namespace
X
Xin Pan 已提交
45

46 47 48 49
void NativePaddlePredictor::PrepareFeedFetch() {
  for (auto *op : inference_program_->Block(0).AllOps()) {
    if (op->Type() == "feed") {
      int idx = boost::get<int>(op->GetAttr("col"));
T
tensor-tang 已提交
50
      if (feeds_.size() <= static_cast<size_t>(idx)) {
51 52 53 54 55 56
        feeds_.resize(idx + 1);
      }
      feeds_[idx] = op;
      feed_names_[op->Output("Out")[0]] = idx;
    } else if (op->Type() == "fetch") {
      int idx = boost::get<int>(op->GetAttr("col"));
T
tensor-tang 已提交
57
      if (fetchs_.size() <= static_cast<size_t>(idx)) {
58 59 60 61 62 63 64
        fetchs_.resize(idx + 1);
      }
      fetchs_[idx] = op;
    }
  }
}

T
tensor-tang 已提交
65 66
bool NativePaddlePredictor::Init(
    std::shared_ptr<framework::Scope> parent_scope) {
67
  VLOG(3) << "Predictor::init()";
68 69 70 71 72 73 74 75 76
  if (FLAGS_profile) {
    LOG(WARNING) << "Profiler is actived, might affect the performance";
    LOG(INFO) << "You can turn off by set gflags '-profile false'";

    auto tracking_device = config_.use_gpu ? platform::ProfilerState::kAll
                                           : platform::ProfilerState::kCPU;
    platform::EnableProfiler(tracking_device);
  }

77
  // no matter with or without MKLDNN
L
luotao1 已提交
78
  paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
79

Y
Yan Chunwei 已提交
80
  if (config_.use_gpu) {
X
Xin Pan 已提交
81 82 83 84
    place_ = paddle::platform::CUDAPlace(config_.device);
  } else {
    place_ = paddle::platform::CPUPlace();
  }
T
tensor-tang 已提交
85 86 87
  if (parent_scope) {
    scope_ = parent_scope;
    sub_scope_ = &(parent_scope->NewScope());
T
tensor-tang 已提交
88
    PADDLE_ENFORCE_NOT_NULL(sub_scope_, "create sub scope fail");
89 90 91 92
  } else {
    paddle::framework::InitDevices(false);
    scope_.reset(new paddle::framework::Scope());
  }
93

X
Xin Pan 已提交
94
  executor_.reset(new paddle::framework::Executor(place_));
95

X
Xin Pan 已提交
96 97 98 99
  // Initialize the inference program
  if (!config_.model_dir.empty()) {
    // Parameters are saved in separate files sited in
    // the specified `dirname`.
100 101
    inference_program_ = paddle::inference::Load(executor_.get(), scope_.get(),
                                                 config_.model_dir);
X
Xin Pan 已提交
102 103 104 105 106 107 108
  } else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
    // All parameters are saved in a single file.
    // The file names should be consistent with that used
    // in Python API `fluid.io.save_inference_model`.
    inference_program_ = paddle::inference::Load(
        executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
  } else {
Y
Yan Chunwei 已提交
109
    LOG(ERROR) << "fail to load inference model from " << config_.model_dir;
X
Xin Pan 已提交
110 111
    return false;
  }
112

X
Xin Pan 已提交
113
  ctx_ = executor_->Prepare(*inference_program_, 0);
114 115
  executor_->CreateVariables(*inference_program_,
                             sub_scope_ ? sub_scope_ : scope_.get(), 0);
Y
Yan Chunwei 已提交
116

X
Xin Pan 已提交
117
  // Get the feed_target_names and fetch_target_names
118
  PrepareFeedFetch();
X
Xin Pan 已提交
119 120 121
  return true;
}

122
NativePaddlePredictor::~NativePaddlePredictor() {
123 124 125 126
  if (FLAGS_profile) {
    platform::DisableProfiler(platform::EventSortingKey::kTotal,
                              "./profile.log");
  }
127 128 129
  if (sub_scope_) {
    scope_->DeleteScope(sub_scope_);
  }
L
Luo Tao 已提交
130
}
131

Y
Yan Chunwei 已提交
132
bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
133 134
                                std::vector<PaddleTensor> *output_data,
                                int batch_size) {
F
flame 已提交
135 136 137 138 139 140 141 142 143 144
#ifndef PADDLE_ON_INFERENCE
  LOG_FIRST_N(WARNING, 5) << "The NaiveExecutor can not work properly if the "
                             "cmake flag ON_INFER is not set.";
  LOG_FIRST_N(WARNING, 5) << "Unlike the training phase, all the scopes and "
                             "variables will be reused to save the allocation "
                             "overhead.";
  LOG_FIRST_N(WARNING, 5) << "Please re-compile the inference library by "
                             "setting the cmake flag ON_INFER=ON if you are "
                             "running Paddle Inference";
#endif  // PADDLE_ON_INFERENCE
L
luotao1 已提交
145 146 147
  if (UNLIKELY(config_.cpu_math_library_num_threads() > 1)) {
    paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
  }
148
  VLOG(3) << "Predictor::predict";
X
Xin Pan 已提交
149 150 151
  Timer timer;
  timer.tic();
  // set feed variable
152 153
  framework::Scope *scope = sub_scope_ != nullptr ? sub_scope_ : scope_.get();
  if (!SetFeed(inputs, scope)) {
X
Xin Pan 已提交
154 155 156 157 158
    LOG(ERROR) << "fail to set feed";
    return false;
  }
  // Run the inference program
  // if share variables, we need not create variables
159
  VLOG(4) << "Run prepared context";
160 161
  executor_->RunPreparedContext(ctx_.get(), scope,
                                false, /* don't create local scope each time*/
162
                                false /* don't create variable each time */);
163
  VLOG(4) << "Finish prepared context";
164 165
  // get fetch variable
  if (!GetFetch(output_data, scope)) {
166
    LOG(ERROR) << "fail to get fetches";
X
Xin Pan 已提交
167 168
    return false;
  }
M
minqiyang 已提交
169
  VLOG(3) << "predict cost: " << timer.toc() << "ms";
Y
Yan Chunwei 已提交
170

Y
Yan Chunwei 已提交
171 172 173
  // For some other vector like containers not cleaned after each batch.
  tensor_array_batch_cleaner_.CollectNoTensorVars(scope_.get());
  tensor_array_batch_cleaner_.ResetNoTensorVars();
X
Xin Pan 已提交
174 175 176
  return true;
}

Y
Yan Chunwei 已提交
177
std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
Y
Yan Chunwei 已提交
178 179
  std::lock_guard<std::mutex> lk(clone_mutex_);
  VLOG(3) << "Predictor::clone";
Y
Yan Chunwei 已提交
180
  std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
Y
Yan Chunwei 已提交
181 182
  // Hot fix the bug that result diff in multi-thread.
  // TODO(Superjomn) re-implement a real clone here.
L
liuwei1031 已提交
183
  PADDLE_ENFORCE_NOT_NULL(dynamic_cast<NativePaddlePredictor *>(cls.get()));
Y
Yan Chunwei 已提交
184
  if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(nullptr)) {
Y
Yan Chunwei 已提交
185
    LOG(ERROR) << "fail to call Init";
X
Xin Pan 已提交
186 187
    return nullptr;
  }
Y
Yan Chunwei 已提交
188

J
Fix mac  
JiabinYang 已提交
189 190 191 192
#ifdef __clang__
  // fix clang compile error
  return cls;
#else
193 194
  // fix manylinux compile error.
  return std::move(cls);
J
Fix mac  
JiabinYang 已提交
195
#endif
X
Xin Pan 已提交
196 197
}

Y
Yan Chunwei 已提交
198
bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
199
                                    framework::Scope *scope) {
200
  VLOG(3) << "Predictor::set_feed";
201
  if (inputs.size() != feeds_.size()) {
202 203
    LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get "
               << inputs.size();
X
Xin Pan 已提交
204 205
    return false;
  }
206 207 208 209

  // Cache the inputs memory for better concurrency performance.
  feed_tensors_.resize(inputs.size());

210
  for (size_t i = 0; i < inputs.size(); ++i) {
211
    auto &input = feed_tensors_[i];
212
    framework::DDim ddim = framework::make_ddim(inputs[i].shape);
X
Xin Pan 已提交
213 214
    void *input_ptr;
    if (inputs[i].dtype == PaddleDType::INT64) {
215
      input_ptr = input.mutable_data<int64_t>(ddim, place_);
X
Xin Pan 已提交
216
    } else if (inputs[i].dtype == PaddleDType::FLOAT32) {
217
      input_ptr = input.mutable_data<float>(ddim, place_);
218 219
    } else if (inputs[i].dtype == PaddleDType::INT32) {
      input_ptr = input.mutable_data<int32_t>(ddim, place_);
X
Xin Pan 已提交
220 221 222 223 224
    } else {
      LOG(ERROR) << "unsupported feed type " << inputs[i].dtype;
      return false;
    }

L
liuwei1031 已提交
225 226
    PADDLE_ENFORCE_NOT_NULL(input_ptr);
    PADDLE_ENFORCE_NOT_NULL(inputs[i].data.data());
227 228 229 230 231 232
    if (platform::is_cpu_place(place_)) {
      // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
      std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
                  inputs[i].data.length());
    } else {
#ifdef PADDLE_WITH_CUDA
Q
qingqing01 已提交
233 234 235 236
      platform::DeviceContextPool &pool =
          platform::DeviceContextPool::Instance();
      auto *dev_ctx =
          static_cast<const platform::CUDADeviceContext *>(pool.Get(place_));
237 238 239
      auto dst_gpu_place = boost::get<platform::CUDAPlace>(place_);
      memory::Copy(dst_gpu_place, static_cast<void *>(input_ptr),
                   platform::CPUPlace(), inputs[i].data.data(),
Q
qingqing01 已提交
240
                   inputs[i].data.length(), dev_ctx->stream());
241 242 243 244 245
#else
      PADDLE_THROW("Not compile with CUDA, should not reach here.");
#endif
    }

Y
Yan Chunwei 已提交
246 247 248 249 250 251
    // TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
    framework::LoD lod;
    for (auto &level : inputs[i].lod) {
      lod.emplace_back(level);
    }
    input.set_lod(lod);
252 253
    int idx = -1;
    if (config_.specify_input_name) {
X
polish  
Xin Pan 已提交
254
      idx = feed_names_[inputs[i].name];
255 256 257 258
    } else {
      idx = boost::get<int>(feeds_[i]->GetAttr("col"));
    }
    framework::SetFeedVariable(scope, input, "feed", idx);
X
Xin Pan 已提交
259 260 261
  }
  return true;
}
L
luotao1 已提交
262 263 264
template <typename T>
void NativePaddlePredictor::GetFetchOne(const framework::LoDTensor &fetch,
                                        PaddleTensor *output) {
265 266 267 268 269 270 271 272 273 274 275 276 277 278
  // set shape.
  auto shape = framework::vectorize(fetch.dims());
  output->shape.assign(shape.begin(), shape.end());
  // set data.
  const T *data = fetch.data<T>();
  int num_elems = inference::VecReduceToInt(shape);
  output->data.Resize(num_elems * sizeof(T));
  // The fetched tensor output by fetch op, should always in CPU memory, so just
  // copy.
  memcpy(output->data.data(), data, num_elems * sizeof(T));
  // set lod
  output->lod.clear();
  for (auto &level : fetch.lod()) {
    output->lod.emplace_back(level.begin(), level.end());
L
luotao1 已提交
279 280
  }
}
X
Xin Pan 已提交
281

282 283
bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
                                     framework::Scope *scope) {
284
  VLOG(3) << "Predictor::get_fetch";
285 286 287
  outputs->resize(fetchs_.size());
  for (size_t i = 0; i < fetchs_.size(); ++i) {
    int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
L
luotao1 已提交
288
    PADDLE_ENFORCE((size_t)idx == i);
289
    framework::FetchType &fetch_var =
290
        framework::GetFetchVariable(*scope, "fetch", idx);
291
    auto fetch = boost::get<framework::LoDTensor>(fetch_var);
L
luotao1 已提交
292 293
    auto type = fetch.type();
    auto output = &(outputs->at(i));
294
    output->name = fetchs_[idx]->Input("X")[0];
295
    if (type == framework::DataTypeTrait<float>::DataType()) {
L
luotao1 已提交
296 297
      GetFetchOne<float>(fetch, output);
      output->dtype = PaddleDType::FLOAT32;
298
    } else if (type == framework::DataTypeTrait<int64_t>::DataType()) {
L
luotao1 已提交
299 300
      GetFetchOne<int64_t>(fetch, output);
      output->dtype = PaddleDType::INT64;
301
    } else if (type == framework::DataTypeTrait<int32_t>::DataType()) {
302 303
      GetFetchOne<int32_t>(fetch, output);
      output->dtype = PaddleDType::INT32;
X
Xin Pan 已提交
304
    } else {
305
      LOG(ERROR) << "unknown type, only support float32, int64 and int32 now.";
Y
Yan Chunwei 已提交
306
    }
X
Xin Pan 已提交
307 308 309 310
  }
  return true;
}

311
template <>
312 313
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
    NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) {
314
  VLOG(3) << "create NativePaddlePredictor";
Y
Yan Chunwei 已提交
315
  if (config.use_gpu) {
S
Sylwester Fraczek 已提交
316
    // 1. GPU memory
317
    PADDLE_ENFORCE_GE(
318
        config.fraction_of_gpu_memory, 0.f,
319
        "fraction_of_gpu_memory in the config should be set to range (0., 1.]");
320
    PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);
Y
Yan Chunwei 已提交
321 322 323 324 325
    std::vector<std::string> flags;
    if (config.fraction_of_gpu_memory >= 0.0f ||
        config.fraction_of_gpu_memory <= 0.95f) {
      flags.push_back("dummpy");
      std::string flag = "--fraction_of_gpu_memory_to_use=" +
326
                         num2str<float>(config.fraction_of_gpu_memory);
Y
Yan Chunwei 已提交
327
      flags.push_back(flag);
328
      VLOG(3) << "set flag: " << flag;
Y
Yan Chunwei 已提交
329 330
      framework::InitGflags(flags);
    }
X
Xin Pan 已提交
331
  }
332

Y
Yan Chunwei 已提交
333
  std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
L
liuwei1031 已提交
334 335
  PADDLE_ENFORCE_NOT_NULL(
      dynamic_cast<NativePaddlePredictor *>(predictor.get()));
T
tensor-tang 已提交
336
  if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) {
X
Xin Pan 已提交
337 338
    return nullptr;
  }
J
Fix mac  
JiabinYang 已提交
339
#ifdef __clang__
J
Jiabin Yang 已提交
340
  // fix clang compile error
J
Fix mac  
JiabinYang 已提交
341 342
  return predictor;
#else
343
  return std::move(predictor);
J
Fix mac  
JiabinYang 已提交
344
#endif
X
Xin Pan 已提交
345 346
}

Y
Yan Chunwei 已提交
347 348 349 350 351 352
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<NativeConfig>(
    const NativeConfig &config) {
  return CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
}

X
Xin Pan 已提交
353
}  // namespace paddle