api_impl.cc 10.2 KB
Newer Older
X
Xin Pan 已提交
1 2
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Y
Yan Chunwei 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
X
Xin Pan 已提交
6

Y
Yan Chunwei 已提交
7
http://www.apache.org/licenses/LICENSE-2.0
X
Xin Pan 已提交
8

Y
Yan Chunwei 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
X
Xin Pan 已提交
14 15 16 17 18 19 20 21 22

#include <algorithm>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

23
#include "paddle/fluid/framework/feed_fetch_method.h"
L
Luo Tao 已提交
24
#include "paddle/fluid/inference/api/api_impl.h"
Y
Yan Chunwei 已提交
25
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
26
#include "paddle/fluid/inference/api/helper.h"
27
#include "paddle/fluid/platform/cpu_helper.h"
28 29 30
#include "paddle/fluid/platform/profiler.h"

DEFINE_bool(profile, false, "Turn on profiler for fluid");
31
DECLARE_int32(paddle_num_threads);
X
Xin Pan 已提交
32 33

namespace paddle {
34 35 36 37 38 39 40 41 42 43
namespace {
using paddle::inference::Timer;

template <class T>
std::string num2str(T a) {
  std::stringstream istr;
  istr << a;
  return istr.str();
}
}  // namespace
X
Xin Pan 已提交
44

45 46 47 48
void NativePaddlePredictor::PrepareFeedFetch() {
  for (auto *op : inference_program_->Block(0).AllOps()) {
    if (op->Type() == "feed") {
      int idx = boost::get<int>(op->GetAttr("col"));
T
tensor-tang 已提交
49
      if (feeds_.size() <= static_cast<size_t>(idx)) {
50 51 52 53 54 55
        feeds_.resize(idx + 1);
      }
      feeds_[idx] = op;
      feed_names_[op->Output("Out")[0]] = idx;
    } else if (op->Type() == "fetch") {
      int idx = boost::get<int>(op->GetAttr("col"));
T
tensor-tang 已提交
56
      if (fetchs_.size() <= static_cast<size_t>(idx)) {
57 58 59 60 61 62 63
        fetchs_.resize(idx + 1);
      }
      fetchs_[idx] = op;
    }
  }
}

T
tensor-tang 已提交
64 65
bool NativePaddlePredictor::Init(
    std::shared_ptr<framework::Scope> parent_scope) {
66
  VLOG(3) << "Predictor::init()";
67 68 69 70 71 72 73 74 75
  if (FLAGS_profile) {
    LOG(WARNING) << "Profiler is actived, might affect the performance";
    LOG(INFO) << "You can turn off by set gflags '-profile false'";

    auto tracking_device = config_.use_gpu ? platform::ProfilerState::kAll
                                           : platform::ProfilerState::kCPU;
    platform::EnableProfiler(tracking_device);
  }

76 77 78
  // no matter with or without MKLDNN
  paddle::platform::SetNumThreads(FLAGS_paddle_num_threads);

Y
Yan Chunwei 已提交
79
  if (config_.use_gpu) {
X
Xin Pan 已提交
80 81 82 83
    place_ = paddle::platform::CUDAPlace(config_.device);
  } else {
    place_ = paddle::platform::CPUPlace();
  }
T
tensor-tang 已提交
84 85 86
  if (parent_scope) {
    scope_ = parent_scope;
    sub_scope_ = &(parent_scope->NewScope());
T
tensor-tang 已提交
87
    PADDLE_ENFORCE_NOT_NULL(sub_scope_, "create sub scope fail");
88 89 90 91
  } else {
    paddle::framework::InitDevices(false);
    scope_.reset(new paddle::framework::Scope());
  }
92

X
Xin Pan 已提交
93
  executor_.reset(new paddle::framework::Executor(place_));
94

X
Xin Pan 已提交
95 96 97 98
  // Initialize the inference program
  if (!config_.model_dir.empty()) {
    // Parameters are saved in separate files sited in
    // the specified `dirname`.
99 100
    inference_program_ = paddle::inference::Load(executor_.get(), scope_.get(),
                                                 config_.model_dir);
X
Xin Pan 已提交
101 102 103 104 105 106 107
  } else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
    // All parameters are saved in a single file.
    // The file names should be consistent with that used
    // in Python API `fluid.io.save_inference_model`.
    inference_program_ = paddle::inference::Load(
        executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
  } else {
Y
Yan Chunwei 已提交
108
    LOG(ERROR) << "fail to load inference model from " << config_.model_dir;
X
Xin Pan 已提交
109 110
    return false;
  }
111

X
Xin Pan 已提交
112
  ctx_ = executor_->Prepare(*inference_program_, 0);
113 114
  executor_->CreateVariables(*inference_program_,
                             sub_scope_ ? sub_scope_ : scope_.get(), 0);
Y
Yan Chunwei 已提交
115

X
Xin Pan 已提交
116
  // Get the feed_target_names and fetch_target_names
117
  PrepareFeedFetch();
X
Xin Pan 已提交
118 119 120
  return true;
}

121
NativePaddlePredictor::~NativePaddlePredictor() {
122 123 124 125
  if (FLAGS_profile) {
    platform::DisableProfiler(platform::EventSortingKey::kTotal,
                              "./profile.log");
  }
126 127 128
  if (sub_scope_) {
    scope_->DeleteScope(sub_scope_);
  }
L
Luo Tao 已提交
129
}
130

Y
Yan Chunwei 已提交
131
bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
132 133
                                std::vector<PaddleTensor> *output_data,
                                int batch_size) {
134
  VLOG(3) << "Predictor::predict";
X
Xin Pan 已提交
135 136 137
  Timer timer;
  timer.tic();
  // set feed variable
138
  std::vector<framework::LoDTensor> feeds;
139 140
  framework::Scope *scope = sub_scope_ != nullptr ? sub_scope_ : scope_.get();
  if (!SetFeed(inputs, scope)) {
X
Xin Pan 已提交
141 142 143 144 145
    LOG(ERROR) << "fail to set feed";
    return false;
  }
  // Run the inference program
  // if share variables, we need not create variables
146
  VLOG(4) << "Run prepared context";
147 148
  executor_->RunPreparedContext(ctx_.get(), scope,
                                false, /* don't create local scope each time*/
149
                                false /* don't create variable each time */);
150
  VLOG(4) << "Finish prepared context";
151 152
  // get fetch variable
  if (!GetFetch(output_data, scope)) {
153
    LOG(ERROR) << "fail to get fetches";
X
Xin Pan 已提交
154 155
    return false;
  }
156
  VLOG(30) << "predict cost: " << timer.toc() << "ms";
Y
Yan Chunwei 已提交
157 158 159 160

  // Fix TensorArray reuse not cleaned bug.
  tensor_array_batch_cleaner_.CollectTensorArrays(scope_.get());
  tensor_array_batch_cleaner_.ResetTensorArray();
X
Xin Pan 已提交
161 162 163
  return true;
}

Y
Yan Chunwei 已提交
164
std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
165
  VLOG(3) << "Predictor::clone";
Y
Yan Chunwei 已提交
166 167
  std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));

168
  if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(scope_)) {
Y
Yan Chunwei 已提交
169
    LOG(ERROR) << "fail to call Init";
X
Xin Pan 已提交
170 171
    return nullptr;
  }
J
Fix mac  
JiabinYang 已提交
172 173 174 175
#ifdef __clang__
  // fix clang compile error
  return cls;
#else
176 177
  // fix manylinux compile error.
  return std::move(cls);
J
Fix mac  
JiabinYang 已提交
178
#endif
X
Xin Pan 已提交
179 180
}

Y
Yan Chunwei 已提交
181
bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
182
                                    framework::Scope *scope) {
183
  VLOG(3) << "Predictor::set_feed";
184
  if (inputs.size() != feeds_.size()) {
185 186
    LOG(ERROR) << "wrong feed input size, need " << feeds_.size() << " but get "
               << inputs.size();
X
Xin Pan 已提交
187 188
    return false;
  }
189
  for (size_t i = 0; i < inputs.size(); ++i) {
190 191
    framework::LoDTensor input;
    framework::DDim ddim = framework::make_ddim(inputs[i].shape);
X
Xin Pan 已提交
192 193
    void *input_ptr;
    if (inputs[i].dtype == PaddleDType::INT64) {
194
      input_ptr = input.mutable_data<int64_t>(ddim, platform::CPUPlace());
X
Xin Pan 已提交
195
    } else if (inputs[i].dtype == PaddleDType::FLOAT32) {
196
      input_ptr = input.mutable_data<float>(ddim, platform::CPUPlace());
X
Xin Pan 已提交
197 198 199 200 201 202
    } else {
      LOG(ERROR) << "unsupported feed type " << inputs[i].dtype;
      return false;
    }

    // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
203
    std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
204
                inputs[i].data.length());
Y
Yan Chunwei 已提交
205 206 207 208 209 210
    // TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
    framework::LoD lod;
    for (auto &level : inputs[i].lod) {
      lod.emplace_back(level);
    }
    input.set_lod(lod);
211 212
    int idx = -1;
    if (config_.specify_input_name) {
X
polish  
Xin Pan 已提交
213
      idx = feed_names_[inputs[i].name];
214 215 216 217
    } else {
      idx = boost::get<int>(feeds_[i]->GetAttr("col"));
    }
    framework::SetFeedVariable(scope, input, "feed", idx);
X
Xin Pan 已提交
218 219 220
  }
  return true;
}
L
luotao1 已提交
221 222 223
template <typename T>
void NativePaddlePredictor::GetFetchOne(const framework::LoDTensor &fetch,
                                        PaddleTensor *output) {
224 225 226 227 228 229 230 231 232 233 234 235 236 237
  // set shape.
  auto shape = framework::vectorize(fetch.dims());
  output->shape.assign(shape.begin(), shape.end());
  // set data.
  const T *data = fetch.data<T>();
  int num_elems = inference::VecReduceToInt(shape);
  output->data.Resize(num_elems * sizeof(T));
  // The fetched tensor output by fetch op, should always in CPU memory, so just
  // copy.
  memcpy(output->data.data(), data, num_elems * sizeof(T));
  // set lod
  output->lod.clear();
  for (auto &level : fetch.lod()) {
    output->lod.emplace_back(level.begin(), level.end());
L
luotao1 已提交
238 239
  }
}
X
Xin Pan 已提交
240

241 242
bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
                                     framework::Scope *scope) {
243
  VLOG(3) << "Predictor::get_fetch";
244 245 246
  outputs->resize(fetchs_.size());
  for (size_t i = 0; i < fetchs_.size(); ++i) {
    int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
L
luotao1 已提交
247 248
    PADDLE_ENFORCE((size_t)idx == i);
    framework::LoDTensor &fetch =
249
        framework::GetFetchVariable(*scope, "fetch", idx);
L
luotao1 已提交
250 251 252 253 254 255 256 257
    auto type = fetch.type();
    auto output = &(outputs->at(i));
    if (type == typeid(float)) {
      GetFetchOne<float>(fetch, output);
      output->dtype = PaddleDType::FLOAT32;
    } else if (type == typeid(int64_t)) {
      GetFetchOne<int64_t>(fetch, output);
      output->dtype = PaddleDType::INT64;
X
Xin Pan 已提交
258
    } else {
L
luotao1 已提交
259
      LOG(ERROR) << "unknown type, only support float32 and int64 now.";
Y
Yan Chunwei 已提交
260
    }
X
Xin Pan 已提交
261 262 263 264
  }
  return true;
}

265
template <>
266 267
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
    NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) {
268
  VLOG(3) << "create NativePaddlePredictor";
Y
Yan Chunwei 已提交
269 270
  if (config.use_gpu) {
    // 1. GPU memeroy
271
    PADDLE_ENFORCE_GT(
272
        config.fraction_of_gpu_memory, 0.f,
273
        "fraction_of_gpu_memory in the config should be set to range (0., 1.]");
274
    PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);
Y
Yan Chunwei 已提交
275 276 277 278 279
    std::vector<std::string> flags;
    if (config.fraction_of_gpu_memory >= 0.0f ||
        config.fraction_of_gpu_memory <= 0.95f) {
      flags.push_back("dummpy");
      std::string flag = "--fraction_of_gpu_memory_to_use=" +
280
                         num2str<float>(config.fraction_of_gpu_memory);
Y
Yan Chunwei 已提交
281
      flags.push_back(flag);
282
      VLOG(3) << "set flag: " << flag;
Y
Yan Chunwei 已提交
283 284
      framework::InitGflags(flags);
    }
X
Xin Pan 已提交
285
  }
286

Y
Yan Chunwei 已提交
287
  std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
T
tensor-tang 已提交
288
  if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) {
X
Xin Pan 已提交
289 290
    return nullptr;
  }
J
Fix mac  
JiabinYang 已提交
291
#ifdef __clang__
J
Jiabin Yang 已提交
292
  // fix clang compile error
J
Fix mac  
JiabinYang 已提交
293 294
  return predictor;
#else
295
  return std::move(predictor);
J
Fix mac  
JiabinYang 已提交
296
#endif
X
Xin Pan 已提交
297 298
}

Y
Yan Chunwei 已提交
299 300 301 302 303 304
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<NativeConfig>(
    const NativeConfig &config) {
  return CreatePaddlePredictor<NativeConfig, PaddleEngineKind::kNative>(config);
}

X
Xin Pan 已提交
305
}  // namespace paddle