api_impl.cc 11.8 KB
Newer Older
X
Xin Pan 已提交
1 2
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Y
Yan Chunwei 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
X
Xin Pan 已提交
6

Y
Yan Chunwei 已提交
7
http://www.apache.org/licenses/LICENSE-2.0
X
Xin Pan 已提交
8

Y
Yan Chunwei 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
X
Xin Pan 已提交
14 15 16 17 18 19 20 21 22

#include <algorithm>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

23
#include "paddle/fluid/framework/feed_fetch_method.h"
L
Luo Tao 已提交
24
#include "paddle/fluid/inference/api/api_impl.h"
Y
Yan Chunwei 已提交
25
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
26
#include "paddle/fluid/inference/api/helper.h"
27
#include "paddle/fluid/memory/memcpy.h"
28
#include "paddle/fluid/platform/cpu_helper.h"
29 30 31
#include "paddle/fluid/platform/profiler.h"

DEFINE_bool(profile, false, "Turn on profiler for fluid");
X
Xin Pan 已提交
32 33

namespace paddle {
34 35 36 37 38 39 40 41 42 43
namespace {
using paddle::inference::Timer;

template <class T>
std::string num2str(T a) {
  std::stringstream istr;
  istr << a;
  return istr.str();
}
}  // namespace
X
Xin Pan 已提交
44

45 46 47 48
void NativePaddlePredictor::PrepareFeedFetch() {
  for (auto *op : inference_program_->Block(0).AllOps()) {
    if (op->Type() == "feed") {
      int idx = boost::get<int>(op->GetAttr("col"));
T
tensor-tang 已提交
49
      if (feeds_.size() <= static_cast<size_t>(idx)) {
50 51 52 53 54 55
        feeds_.resize(idx + 1);
      }
      feeds_[idx] = op;
      feed_names_[op->Output("Out")[0]] = idx;
    } else if (op->Type() == "fetch") {
      int idx = boost::get<int>(op->GetAttr("col"));
T
tensor-tang 已提交
56
      if (fetchs_.size() <= static_cast<size_t>(idx)) {
57 58 59 60 61 62 63
        fetchs_.resize(idx + 1);
      }
      fetchs_[idx] = op;
    }
  }
}

T
tensor-tang 已提交
64 65
bool NativePaddlePredictor::Init(
    std::shared_ptr<framework::Scope> parent_scope) {
66
  VLOG(3) << "Predictor::init()";
67 68 69 70 71 72 73 74 75
  if (FLAGS_profile) {
    LOG(WARNING) << "Profiler is actived, might affect the performance";
    LOG(INFO) << "You can turn off by set gflags '-profile false'";

    auto tracking_device = config_.use_gpu ? platform::ProfilerState::kAll
                                           : platform::ProfilerState::kCPU;
    platform::EnableProfiler(tracking_device);
  }

76
  // no matter with or without MKLDNN
L
luotao1 已提交
77
  paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
78

Y
Yan Chunwei 已提交
79
  if (config_.use_gpu) {
X
Xin Pan 已提交
80 81 82 83
    place_ = paddle::platform::CUDAPlace(config_.device);
  } else {
    place_ = paddle::platform::CPUPlace();
  }
T
tensor-tang 已提交
84 85 86
  if (parent_scope) {
    scope_ = parent_scope;
    sub_scope_ = &(parent_scope->NewScope());
T
tensor-tang 已提交
87
    PADDLE_ENFORCE_NOT_NULL(sub_scope_, "create sub scope fail");
88 89 90 91
  } else {
    paddle::framework::InitDevices(false);
    scope_.reset(new paddle::framework::Scope());
  }
92

X
Xin Pan 已提交
93
  executor_.reset(new paddle::framework::Executor(place_));
94

X
Xin Pan 已提交
95 96 97 98
  // Initialize the inference program
  if (!config_.model_dir.empty()) {
    // Parameters are saved in separate files sited in
    // the specified `dirname`.
99 100
    inference_program_ = paddle::inference::Load(executor_.get(), scope_.get(),
                                                 config_.model_dir);
X
Xin Pan 已提交
101 102 103 104 105 106 107
  } else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
    // All parameters are saved in a single file.
    // The file names should be consistent with that used
    // in Python API `fluid.io.save_inference_model`.
    inference_program_ = paddle::inference::Load(
        executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
  } else {
Y
Yan Chunwei 已提交
108
    LOG(ERROR) << "fail to load inference model from " << config_.model_dir;
X
Xin Pan 已提交
109 110
    return false;
  }
111

X
Xin Pan 已提交
112
  ctx_ = executor_->Prepare(*inference_program_, 0);
113 114
  executor_->CreateVariables(*inference_program_,
                             sub_scope_ ? sub_scope_ : scope_.get(), 0);
Y
Yan Chunwei 已提交
115

X
Xin Pan 已提交
116
  // Get the feed_target_names and fetch_target_names
117
  PrepareFeedFetch();
X
Xin Pan 已提交
118 119 120
  return true;
}

121
NativePaddlePredictor::~NativePaddlePredictor() {
122 123 124 125
  if (FLAGS_profile) {
    platform::DisableProfiler(platform::EventSortingKey::kTotal,
                              "./profile.log");
  }
126 127 128
  if (sub_scope_) {
    scope_->DeleteScope(sub_scope_);
  }
L
Luo Tao 已提交
129
}
130

Y
Yan Chunwei 已提交
131
bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
132 133
                                std::vector<PaddleTensor> *output_data,
                                int batch_size) {
L
luotao1 已提交
134 135 136
  if (UNLIKELY(config_.cpu_math_library_num_threads() > 1)) {
    paddle::platform::SetNumThreads(config_.cpu_math_library_num_threads());
  }
137
  VLOG(3) << "Predictor::predict";
X
Xin Pan 已提交
138 139 140
  Timer timer;
  timer.tic();
  // set feed variable
141 142
  framework::Scope *scope = sub_scope_ != nullptr ? sub_scope_ : scope_.get();
  if (!SetFeed(inputs, scope)) {
X
Xin Pan 已提交
143 144 145 146 147
    LOG(ERROR) << "fail to set feed";
    return false;
  }
  // Run the inference program
  // if share variables, we need not create variables
148
  VLOG(4) << "Run prepared context";
149 150
  executor_->RunPreparedContext(ctx_.get(), scope,
                                false, /* don't create local scope each time*/
151
                                false /* don't create variable each time */);
152
  VLOG(4) << "Finish prepared context";
153 154
  // get fetch variable
  if (!GetFetch(output_data, scope)) {
155
    LOG(ERROR) << "fail to get fetches";
X
Xin Pan 已提交
156 157
    return false;
  }
M
minqiyang 已提交
158
  VLOG(3) << "predict cost: " << timer.toc() << "ms";
Y
Yan Chunwei 已提交
159

Y
Yan Chunwei 已提交
160 161 162
  // For some other vector like containers not cleaned after each batch.
  tensor_array_batch_cleaner_.CollectNoTensorVars(scope_.get());
  tensor_array_batch_cleaner_.ResetNoTensorVars();
X
Xin Pan 已提交
163 164 165
  return true;
}

Y
Yan Chunwei 已提交
166
std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
Y
Yan Chunwei 已提交
167 168
  std::lock_guard<std::mutex> lk(clone_mutex_);
  VLOG(3) << "Predictor::clone";
Y
Yan Chunwei 已提交
169
  std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
Y
Yan Chunwei 已提交
170 171
  // Hot fix the bug that result diff in multi-thread.
  // TODO(Superjomn) re-implement a real clone here.
L
liuwei1031 已提交
172
  PADDLE_ENFORCE_NOT_NULL(dynamic_cast<NativePaddlePredictor *>(cls.get()));
Y
Yan Chunwei 已提交
173
  if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(nullptr)) {
Y
Yan Chunwei 已提交
174
    LOG(ERROR) << "fail to call Init";
X
Xin Pan 已提交
175 176
    return nullptr;
  }
Y
Yan Chunwei 已提交
177

J
Fix mac  
JiabinYang 已提交
178 179 180 181
#ifdef __clang__
  // fix clang compile error
  return cls;
#else
182 183
  // fix manylinux compile error.
  return std::move(cls);
J
Fix mac  
JiabinYang 已提交
184
#endif
X
Xin Pan 已提交
185 186
}

Y
Yan Chunwei 已提交
187
bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
188
                                    framework::Scope *scope) {
189
  VLOG(3) << "Predictor::set_feed";
190
  if (inputs.size() != feeds_.size()) {
191 192
    LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get "
               << inputs.size();
X
Xin Pan 已提交
193 194
    return false;
  }
195 196 197 198

  // Cache the inputs memory for better concurrency performance.
  feed_tensors_.resize(inputs.size());

199
  for (size_t i = 0; i < inputs.size(); ++i) {
200
    auto &input = feed_tensors_[i];
201
    framework::DDim ddim = framework::make_ddim(inputs[i].shape);
X
Xin Pan 已提交
202 203
    void *input_ptr;
    if (inputs[i].dtype == PaddleDType::INT64) {
204
      input_ptr = input.mutable_data<int64_t>(ddim, place_);
X
Xin Pan 已提交
205
    } else if (inputs[i].dtype == PaddleDType::FLOAT32) {
206
      input_ptr = input.mutable_data<float>(ddim, place_);
207 208
    } else if (inputs[i].dtype == PaddleDType::INT32) {
      input_ptr = input.mutable_data<int32_t>(ddim, place_);
X
Xin Pan 已提交
209 210 211 212 213
    } else {
      LOG(ERROR) << "unsupported feed type " << inputs[i].dtype;
      return false;
    }

L
liuwei1031 已提交
214 215
    PADDLE_ENFORCE_NOT_NULL(input_ptr);
    PADDLE_ENFORCE_NOT_NULL(inputs[i].data.data());
216 217 218 219 220 221
    if (platform::is_cpu_place(place_)) {
      // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
      std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
                  inputs[i].data.length());
    } else {
#ifdef PADDLE_WITH_CUDA
Q
qingqing01 已提交
222 223 224 225
      platform::DeviceContextPool &pool =
          platform::DeviceContextPool::Instance();
      auto *dev_ctx =
          static_cast<const platform::CUDADeviceContext *>(pool.Get(place_));
226 227 228
      auto dst_gpu_place = boost::get<platform::CUDAPlace>(place_);
      memory::Copy(dst_gpu_place, static_cast<void *>(input_ptr),
                   platform::CPUPlace(), inputs[i].data.data(),
Q
qingqing01 已提交
229
                   inputs[i].data.length(), dev_ctx->stream());
230 231 232 233 234
#else
      PADDLE_THROW("Not compile with CUDA, should not reach here.");
#endif
    }

Y
Yan Chunwei 已提交
235 236 237 238 239 240
    // TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
    framework::LoD lod;
    for (auto &level : inputs[i].lod) {
      lod.emplace_back(level);
    }
    input.set_lod(lod);
241 242
    int idx = -1;
    if (config_.specify_input_name) {
X
polish  
Xin Pan 已提交
243
      idx = feed_names_[inputs[i].name];
244 245 246 247
    } else {
      idx = boost::get<int>(feeds_[i]->GetAttr("col"));
    }
    framework::SetFeedVariable(scope, input, "feed", idx);
X
Xin Pan 已提交
248 249 250
  }
  return true;
}
L
luotao1 已提交
251 252 253
template <typename T>
void NativePaddlePredictor::GetFetchOne(const framework::LoDTensor &fetch,
                                        PaddleTensor *output) {
254 255 256 257 258 259 260 261 262 263 264 265 266 267
  // set shape.
  auto shape = framework::vectorize(fetch.dims());
  output->shape.assign(shape.begin(), shape.end());
  // set data.
  const T *data = fetch.data<T>();
  int num_elems = inference::VecReduceToInt(shape);
  output->data.Resize(num_elems * sizeof(T));
  // The fetched tensor output by fetch op, should always in CPU memory, so just
  // copy.
  memcpy(output->data.data(), data, num_elems * sizeof(T));
  // set lod
  output->lod.clear();
  for (auto &level : fetch.lod()) {
    output->lod.emplace_back(level.begin(), level.end());
L
luotao1 已提交
268 269
  }
}
X
Xin Pan 已提交
270

271 272
bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
                                     framework::Scope *scope) {
273
  VLOG(3) << "Predictor::get_fetch";
274 275 276
  outputs->resize(fetchs_.size());
  for (size_t i = 0; i < fetchs_.size(); ++i) {
    int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
L
luotao1 已提交
277 278
    PADDLE_ENFORCE((size_t)idx == i);
    framework::LoDTensor &fetch =
279
        framework::GetFetchVariable(*scope, "fetch", idx);
L
luotao1 已提交
280 281
    auto type = fetch.type();
    auto output = &(outputs->at(i));
282
    output->name = fetchs_[idx]->Input("X")[0];
283
    if (type == framework::DataTypeTrait<float>::DataType()) {
L
luotao1 已提交
284 285
      GetFetchOne<float>(fetch, output);
      output->dtype = PaddleDType::FLOAT32;
286
    } else if (type == framework::DataTypeTrait<int64_t>::DataType()) {
L
luotao1 已提交
287 288
      GetFetchOne<int64_t>(fetch, output);
      output->dtype = PaddleDType::INT64;
289
    } else if (type == framework::DataTypeTrait<int32_t>::DataType()) {
290 291
      GetFetchOne<int32_t>(fetch, output);
      output->dtype = PaddleDType::INT32;
X
Xin Pan 已提交
292
    } else {
293
      LOG(ERROR) << "unknown type, only support float32, int64 and int32 now.";
Y
Yan Chunwei 已提交
294
    }
X
Xin Pan 已提交
295 296 297 298
  }
  return true;
}

299
template <>
300 301
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
    NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) {
302
  VLOG(3) << "create NativePaddlePredictor";
Y
Yan Chunwei 已提交
303
  if (config.use_gpu) {
S
Sylwester Fraczek 已提交
304
    // 1. GPU memory
305
    PADDLE_ENFORCE_GE(
306
        config.fraction_of_gpu_memory, 0.f,
307
        "fraction_of_gpu_memory in the config should be set to range (0., 1.]");
308
    PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);
Y
Yan Chunwei 已提交
309 310 311 312 313
    std::vector<std::string> flags;
    if (config.fraction_of_gpu_memory >= 0.0f ||
        config.fraction_of_gpu_memory <= 0.95f) {
      flags.push_back("dummpy");
      std::string flag = "--fraction_of_gpu_memory_to_use=" +
314
                         num2str<float>(config.fraction_of_gpu_memory);
Y
Yan Chunwei 已提交
315
      flags.push_back(flag);
316
      VLOG(3) << "set flag: " << flag;
Y
Yan Chunwei 已提交
317 318
      framework::InitGflags(flags);
    }
X
Xin Pan 已提交
319
  }
320

Y
Yan Chunwei 已提交
321
  std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
L
liuwei1031 已提交
322 323
  PADDLE_ENFORCE_NOT_NULL(
      dynamic_cast<NativePaddlePredictor *>(predictor.get()));
T
tensor-tang 已提交
324
  if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) {
X
Xin Pan 已提交
325 326
    return nullptr;
  }
J
Fix mac  
JiabinYang 已提交
327
#ifdef __clang__
J
Jiabin Yang 已提交
328
  // fix clang compile error
J
Fix mac  
JiabinYang 已提交
329 330
  return predictor;
#else
331
  return std::move(predictor);
J
Fix mac  
JiabinYang 已提交
332
#endif
X
Xin Pan 已提交
333 334
}

Y
Yan Chunwei 已提交
335 336 337 338 339 340
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<NativeConfig>(
    const NativeConfig &config) {
  return CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
}

X
Xin Pan 已提交
341
}  // namespace paddle