api_impl.cc 10.6 KB
Newer Older
X
Xin Pan 已提交
1 2
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Y
Yan Chunwei 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
X
Xin Pan 已提交
6

Y
Yan Chunwei 已提交
7
http://www.apache.org/licenses/LICENSE-2.0
X
Xin Pan 已提交
8

Y
Yan Chunwei 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
X
Xin Pan 已提交
14 15 16 17 18 19 20 21 22

#include <algorithm>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

23
#include "paddle/fluid/framework/feed_fetch_method.h"
L
Luo Tao 已提交
24
#include "paddle/fluid/inference/api/api_impl.h"
D
dzhwinter 已提交
25
#include "paddle/fluid/inference/api/timer.h"
26 27 28
#include "paddle/fluid/platform/profiler.h"

DEFINE_bool(profile, false, "Turn on profiler for fluid");
X
Xin Pan 已提交
29 30 31

namespace paddle {

32 33 34 35
void NativePaddlePredictor::PrepareFeedFetch() {
  for (auto *op : inference_program_->Block(0).AllOps()) {
    if (op->Type() == "feed") {
      int idx = boost::get<int>(op->GetAttr("col"));
L
luotao1 已提交
36
      if (feeds_.size() <= (size_t)idx) {
37 38 39 40 41 42
        feeds_.resize(idx + 1);
      }
      feeds_[idx] = op;
      feed_names_[op->Output("Out")[0]] = idx;
    } else if (op->Type() == "fetch") {
      int idx = boost::get<int>(op->GetAttr("col"));
L
luotao1 已提交
43
      if (fetchs_.size() <= (size_t)idx) {
44 45 46 47 48 49 50
        fetchs_.resize(idx + 1);
      }
      fetchs_[idx] = op;
    }
  }
}

T
tensor-tang 已提交
51 52
bool NativePaddlePredictor::Init(
    std::shared_ptr<framework::Scope> parent_scope) {
X
Xin Pan 已提交
53
  VLOG(3) << "Predictor::init()";
D
dzhwinter 已提交
54
#if !defined(_WIN32)
55 56 57 58 59 60 61 62
  if (FLAGS_profile) {
    LOG(WARNING) << "Profiler is actived, might affect the performance";
    LOG(INFO) << "You can turn off by set gflags '-profile false'";

    auto tracking_device = config_.use_gpu ? platform::ProfilerState::kAll
                                           : platform::ProfilerState::kCPU;
    platform::EnableProfiler(tracking_device);
  }
D
dzhwinter 已提交
63
#endif
64

Y
Yan Chunwei 已提交
65
  if (config_.use_gpu) {
X
Xin Pan 已提交
66 67 68 69
    place_ = paddle::platform::CUDAPlace(config_.device);
  } else {
    place_ = paddle::platform::CPUPlace();
  }
T
tensor-tang 已提交
70 71 72
  if (parent_scope) {
    scope_ = parent_scope;
    sub_scope_ = &(parent_scope->NewScope());
T
tensor-tang 已提交
73
    PADDLE_ENFORCE_NOT_NULL(sub_scope_, "create sub scope fail");
74 75 76 77 78
  } else {
    paddle::framework::InitDevices(false);
    scope_.reset(new paddle::framework::Scope());
  }

X
Xin Pan 已提交
79 80 81 82 83 84
  executor_.reset(new paddle::framework::Executor(place_));

  // Initialize the inference program
  if (!config_.model_dir.empty()) {
    // Parameters are saved in separate files sited in
    // the specified `dirname`.
85 86
    inference_program_ = paddle::inference::Load(executor_.get(), scope_.get(),
                                                 config_.model_dir);
X
Xin Pan 已提交
87 88 89 90 91 92 93 94 95 96
  } else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
    // All parameters are saved in a single file.
    // The file names should be consistent with that used
    // in Python API `fluid.io.save_inference_model`.
    inference_program_ = paddle::inference::Load(
        executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
  } else {
    LOG(ERROR) << "fail to load inference model.";
    return false;
  }
97

X
Xin Pan 已提交
98
  ctx_ = executor_->Prepare(*inference_program_, 0);
99 100
  executor_->CreateVariables(*inference_program_,
                             sub_scope_ ? sub_scope_ : scope_.get(), 0);
Y
Yan Chunwei 已提交
101

X
Xin Pan 已提交
102
  // Get the feed_target_names and fetch_target_names
103
  PrepareFeedFetch();
X
Xin Pan 已提交
104 105 106
  return true;
}

107
NativePaddlePredictor::~NativePaddlePredictor() {
D
dzhwinter 已提交
108
#if !defined(_WIN32)
109 110 111 112
  if (FLAGS_profile) {
    platform::DisableProfiler(platform::EventSortingKey::kTotal,
                              "./profile.log");
  }
D
dzhwinter 已提交
113
#endif
114 115 116
  if (sub_scope_) {
    scope_->DeleteScope(sub_scope_);
  }
L
Luo Tao 已提交
117
}
118

Y
Yan Chunwei 已提交
119
bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
120 121
                                std::vector<PaddleTensor> *output_data,
                                int batch_size) {
X
Xin Pan 已提交
122
  VLOG(3) << "Predictor::predict";
D
dzhwinter 已提交
123
  using Timer = paddle::inference::Timer;
X
Xin Pan 已提交
124 125 126
  Timer timer;
  timer.tic();
  // set feed variable
127
  std::vector<framework::LoDTensor> feeds;
128 129
  framework::Scope *scope = sub_scope_ != nullptr ? sub_scope_ : scope_.get();
  if (!SetFeed(inputs, scope)) {
X
Xin Pan 已提交
130 131 132 133 134
    LOG(ERROR) << "fail to set feed";
    return false;
  }
  // Run the inference program
  // if share variables, we need not create variables
135
  VLOG(4) << "Run prepared context";
136 137 138
  executor_->RunPreparedContext(ctx_.get(), scope,
                                false, /* don't create local scope each time*/
                                false /* don't create variable eatch time */);
139
  VLOG(4) << "Finish prepared context";
140 141
  // get fetch variable
  if (!GetFetch(output_data, scope)) {
142
    LOG(ERROR) << "fail to get fetches";
X
Xin Pan 已提交
143 144 145 146 147 148
    return false;
  }
  VLOG(3) << "predict cost: " << timer.toc() << "ms";
  return true;
}

Y
Yan Chunwei 已提交
149
std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
X
Xin Pan 已提交
150
  VLOG(3) << "Predictor::clone";
Y
Yan Chunwei 已提交
151 152
  std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));

153
  if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(scope_)) {
Y
Yan Chunwei 已提交
154
    LOG(ERROR) << "fail to call Init";
X
Xin Pan 已提交
155 156
    return nullptr;
  }
J
Fix mac  
JiabinYang 已提交
157 158 159 160
#ifdef __clang__
  // fix clang compile error
  return cls;
#else
161 162
  // fix manylinux compile error.
  return std::move(cls);
J
Fix mac  
JiabinYang 已提交
163
#endif
X
Xin Pan 已提交
164 165
}

Y
Yan Chunwei 已提交
166
bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
167
                                    framework::Scope *scope) {
X
Xin Pan 已提交
168
  VLOG(3) << "Predictor::set_feed";
169
  if (inputs.size() != feeds_.size()) {
X
Xin Pan 已提交
170 171 172
    LOG(ERROR) << "wrong feed input size.";
    return false;
  }
173
  for (size_t i = 0; i < inputs.size(); ++i) {
174 175
    framework::LoDTensor input;
    framework::DDim ddim = framework::make_ddim(inputs[i].shape);
X
Xin Pan 已提交
176 177
    void *input_ptr;
    if (inputs[i].dtype == PaddleDType::INT64) {
178
      input_ptr = input.mutable_data<int64_t>(ddim, platform::CPUPlace());
X
Xin Pan 已提交
179
    } else if (inputs[i].dtype == PaddleDType::FLOAT32) {
180
      input_ptr = input.mutable_data<float>(ddim, platform::CPUPlace());
X
Xin Pan 已提交
181 182 183 184 185 186
    } else {
      LOG(ERROR) << "unsupported feed type " << inputs[i].dtype;
      return false;
    }

    // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
187
    std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
188
                inputs[i].data.length());
Y
Yan Chunwei 已提交
189 190 191 192 193 194
    // TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
    framework::LoD lod;
    for (auto &level : inputs[i].lod) {
      lod.emplace_back(level);
    }
    input.set_lod(lod);
195 196
    int idx = -1;
    if (config_.specify_input_name) {
X
polish  
Xin Pan 已提交
197
      idx = feed_names_[inputs[i].name];
198 199 200 201
    } else {
      idx = boost::get<int>(feeds_[i]->GetAttr("col"));
    }
    framework::SetFeedVariable(scope, input, "feed", idx);
X
Xin Pan 已提交
202 203 204
  }
  return true;
}
L
luotao1 已提交
205 206 207 208 209 210 211 212 213 214 215 216 217
template <typename T>
void NativePaddlePredictor::GetFetchOne(const framework::LoDTensor &fetch,
                                        PaddleTensor *output) {
  std::vector<int> shape;
  auto dims_i = fetch.dims();
  auto lod = fetch.lod();
  const T *output_ptr = fetch.data<T>();
  auto num = fetch.numel();
  std::vector<T> data;
  if (0 == lod.size()) {
    std::copy(output_ptr, output_ptr + num, std::back_inserter(data));
    for (int j = 0; j < dims_i.size(); ++j) {
      shape.push_back(dims_i[j]);
X
Xin Pan 已提交
218
    }
L
luotao1 已提交
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
  } else {
    // for batch detection
    // image[0] -> output[0] shape {145, 6}
    // image[1] -> output[1] shape {176, 6}
    // then,
    // the batch output shape {321, 6}
    // the lod {{0, 145, 321}}
    // so we should append output[0] to {176, 6}
    size_t max_dim = 0;
    for (size_t j = 1; j < lod[0].size(); j++) {
      max_dim = std::max(max_dim, lod[0][j] - lod[0][j - 1]);
    }
    size_t common_dim = lod[0].back() == 0 ? 0 : num / lod[0].back();
    if (max_dim > 0) {
      data.resize((lod[0].size() - 1) * max_dim * common_dim, 0);
    }
    for (size_t j = 1; j < lod[0].size(); j++) {
      size_t start = lod[0][j - 1] * common_dim;
      size_t end = lod[0][j] * common_dim;
      if (end > start) {
        std::copy(output_ptr + start, output_ptr + end,
                  data.begin() + (j - 1) * max_dim * common_dim);
X
Xin Pan 已提交
241 242
      }
    }
L
luotao1 已提交
243 244 245 246
    shape.push_back(lod[0].size() - 1);
    shape.push_back(max_dim);
    for (int j = 1; j < dims_i.size(); ++j) {
      shape.push_back(dims_i[j]);
247
    }
L
luotao1 已提交
248 249 250 251 252 253 254 255 256 257 258 259 260
  }

  output->shape = shape;
  auto &buffer = output->data;
  if (buffer.empty() || buffer.length() < sizeof(T) * data.size()) {
    buffer.Resize(sizeof(T) * data.size());
  }
  std::memcpy(buffer.data(), data.data(), buffer.length());
  // copy LoD
  for (const auto &level : fetch.lod()) {
    output->lod.emplace_back(level);
  }
}
X
Xin Pan 已提交
261

262 263
bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
                                     framework::Scope *scope) {
X
Xin Pan 已提交
264
  VLOG(3) << "Predictor::get_fetch";
265 266 267
  outputs->resize(fetchs_.size());
  for (size_t i = 0; i < fetchs_.size(); ++i) {
    int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
L
luotao1 已提交
268 269
    PADDLE_ENFORCE((size_t)idx == i);
    framework::LoDTensor &fetch =
270
        framework::GetFetchVariable(*scope, "fetch", idx);
L
luotao1 已提交
271 272 273 274 275 276 277 278
    auto type = fetch.type();
    auto output = &(outputs->at(i));
    if (type == typeid(float)) {
      GetFetchOne<float>(fetch, output);
      output->dtype = PaddleDType::FLOAT32;
    } else if (type == typeid(int64_t)) {
      GetFetchOne<int64_t>(fetch, output);
      output->dtype = PaddleDType::INT64;
X
Xin Pan 已提交
279
    } else {
L
luotao1 已提交
280
      LOG(ERROR) << "unknown type, only support float32 and int64 now.";
Y
Yan Chunwei 已提交
281
    }
X
Xin Pan 已提交
282 283 284 285
  }
  return true;
}

286
template <>
287 288
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
    NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) {
Y
Yan Chunwei 已提交
289 290 291
  VLOG(3) << "create NativePaddlePredictor";
  if (config.use_gpu) {
    // 1. GPU memeroy
292
    PADDLE_ENFORCE_GT(
293
        config.fraction_of_gpu_memory, 0.f,
Y
Yan Chunwei 已提交
294
        "fraction_of_gpu_memory in the config should be set to range (0., 1.]");
295
    PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);
Y
Yan Chunwei 已提交
296 297 298 299 300
    std::vector<std::string> flags;
    if (config.fraction_of_gpu_memory >= 0.0f ||
        config.fraction_of_gpu_memory <= 0.95f) {
      flags.push_back("dummpy");
      std::string flag = "--fraction_of_gpu_memory_to_use=" +
D
dzhwinter 已提交
301
                         std::to_string(config.fraction_of_gpu_memory);
Y
Yan Chunwei 已提交
302 303 304 305
      flags.push_back(flag);
      VLOG(3) << "set flag: " << flag;
      framework::InitGflags(flags);
    }
X
Xin Pan 已提交
306 307
  }

Y
Yan Chunwei 已提交
308
  std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
T
tensor-tang 已提交
309
  if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) {
X
Xin Pan 已提交
310 311
    return nullptr;
  }
J
Fix mac  
JiabinYang 已提交
312
#ifdef __clang__
J
Jiabin Yang 已提交
313
  // fix clang compile error
J
Fix mac  
JiabinYang 已提交
314 315
  return predictor;
#else
316
  return std::move(predictor);
J
Fix mac  
JiabinYang 已提交
317
#endif
X
Xin Pan 已提交
318 319 320
}

}  // namespace paddle