api_impl.cc 11.0 KB
Newer Older
X
Xin Pan 已提交
1 2
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.

Y
Yan Chunwei 已提交
3 4 5
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
X
Xin Pan 已提交
6

Y
Yan Chunwei 已提交
7
http://www.apache.org/licenses/LICENSE-2.0
X
Xin Pan 已提交
8

Y
Yan Chunwei 已提交
9 10 11 12 13
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
X
Xin Pan 已提交
14 15 16 17 18 19 20 21 22 23

#include <sys/time.h>
#include <algorithm>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <utility>
#include <vector>

24
#include "paddle/fluid/framework/feed_fetch_method.h"
L
Luo Tao 已提交
25
#include "paddle/fluid/inference/api/api_impl.h"
26 27 28
#include "paddle/fluid/platform/profiler.h"

DEFINE_bool(profile, false, "Turn on profiler for fluid");
X
Xin Pan 已提交
29 30 31 32 33 34

namespace paddle {
namespace {

// Timer for timer
class Timer {
W
Wu Yi 已提交
35
 public:
X
Xin Pan 已提交
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
  double start;
  double startu;
  void tic() {
    struct timeval tp;
    gettimeofday(&tp, NULL);
    start = tp.tv_sec;
    startu = tp.tv_usec;
  }
  double toc() {
    struct timeval tp;
    gettimeofday(&tp, NULL);
    double used_time_ms =
        (tp.tv_sec - start) * 1000.0 + (tp.tv_usec - startu) / 1000.0;
    return used_time_ms;
  }
};

template <class T>
std::string num2str(T a) {
  std::stringstream istr;
  istr << a;
  return istr.str();
}
}  // namespace

61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
void NativePaddlePredictor::PrepareFeedFetch() {
  for (auto *op : inference_program_->Block(0).AllOps()) {
    if (op->Type() == "feed") {
      int idx = boost::get<int>(op->GetAttr("col"));
      if (feeds_.size() <= idx) {
        feeds_.resize(idx + 1);
      }
      feeds_[idx] = op;
      feed_names_[op->Output("Out")[0]] = idx;
      LOG(ERROR) << "feed " << idx << " " << op->Output("Out")[0];
    } else if (op->Type() == "fetch") {
      int idx = boost::get<int>(op->GetAttr("col"));
      if (fetchs_.size() <= idx) {
        fetchs_.resize(idx + 1);
      }
      fetchs_[idx] = op;
      LOG(ERROR) << "fetch " << idx << " " << op->Input("X")[0];
    }
  }
}

T
tensor-tang 已提交
82 83
bool NativePaddlePredictor::Init(
    std::shared_ptr<framework::Scope> parent_scope) {
X
Xin Pan 已提交
84 85
  VLOG(3) << "Predictor::init()";

86 87 88 89 90 91 92 93 94
  if (FLAGS_profile) {
    LOG(WARNING) << "Profiler is actived, might affect the performance";
    LOG(INFO) << "You can turn off by set gflags '-profile false'";

    auto tracking_device = config_.use_gpu ? platform::ProfilerState::kAll
                                           : platform::ProfilerState::kCPU;
    platform::EnableProfiler(tracking_device);
  }

Y
Yan Chunwei 已提交
95
  if (config_.use_gpu) {
X
Xin Pan 已提交
96 97 98 99
    place_ = paddle::platform::CUDAPlace(config_.device);
  } else {
    place_ = paddle::platform::CPUPlace();
  }
T
tensor-tang 已提交
100 101 102
  if (parent_scope) {
    scope_ = parent_scope;
    sub_scope_ = &(parent_scope->NewScope());
T
tensor-tang 已提交
103
    PADDLE_ENFORCE_NOT_NULL(sub_scope_, "create sub scope fail");
104 105 106 107 108
  } else {
    paddle::framework::InitDevices(false);
    scope_.reset(new paddle::framework::Scope());
  }

X
Xin Pan 已提交
109 110 111 112 113 114
  executor_.reset(new paddle::framework::Executor(place_));

  // Initialize the inference program
  if (!config_.model_dir.empty()) {
    // Parameters are saved in separate files sited in
    // the specified `dirname`.
115 116
    inference_program_ = paddle::inference::Load(executor_.get(), scope_.get(),
                                                 config_.model_dir);
X
Xin Pan 已提交
117 118 119 120 121 122 123 124 125 126
  } else if (!config_.prog_file.empty() && !config_.param_file.empty()) {
    // All parameters are saved in a single file.
    // The file names should be consistent with that used
    // in Python API `fluid.io.save_inference_model`.
    inference_program_ = paddle::inference::Load(
        executor_.get(), scope_.get(), config_.prog_file, config_.param_file);
  } else {
    LOG(ERROR) << "fail to load inference model.";
    return false;
  }
127

X
Xin Pan 已提交
128
  ctx_ = executor_->Prepare(*inference_program_, 0);
129 130
  executor_->CreateVariables(*inference_program_,
                             sub_scope_ ? sub_scope_ : scope_.get(), 0);
Y
Yan Chunwei 已提交
131

X
Xin Pan 已提交
132
  // Get the feed_target_names and fetch_target_names
133
  PrepareFeedFetch();
X
Xin Pan 已提交
134 135 136
  return true;
}

137
NativePaddlePredictor::~NativePaddlePredictor() {
138 139 140 141
  if (FLAGS_profile) {
    platform::DisableProfiler(platform::EventSortingKey::kTotal,
                              "./profile.log");
  }
142 143 144
  if (sub_scope_) {
    scope_->DeleteScope(sub_scope_);
  }
L
Luo Tao 已提交
145
}
146

Y
Yan Chunwei 已提交
147
bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
148 149
                                std::vector<PaddleTensor> *output_data,
                                int batch_size) {
X
Xin Pan 已提交
150 151 152 153
  VLOG(3) << "Predictor::predict";
  Timer timer;
  timer.tic();
  // set feed variable
154
  std::vector<framework::LoDTensor> feeds;
155 156
  framework::Scope *scope = sub_scope_ != nullptr ? sub_scope_ : scope_.get();
  if (!SetFeed(inputs, scope)) {
X
Xin Pan 已提交
157 158 159 160 161
    LOG(ERROR) << "fail to set feed";
    return false;
  }
  // Run the inference program
  // if share variables, we need not create variables
162
  VLOG(4) << "Run prepared context";
163 164 165
  executor_->RunPreparedContext(ctx_.get(), scope,
                                false, /* don't create local scope each time*/
                                false /* don't create variable eatch time */);
166
  VLOG(4) << "Finish prepared context";
167 168
  // get fetch variable
  if (!GetFetch(output_data, scope)) {
169
    LOG(ERROR) << "fail to get fetches";
X
Xin Pan 已提交
170 171 172 173 174 175
    return false;
  }
  VLOG(3) << "predict cost: " << timer.toc() << "ms";
  return true;
}

Y
Yan Chunwei 已提交
176
std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
X
Xin Pan 已提交
177
  VLOG(3) << "Predictor::clone";
Y
Yan Chunwei 已提交
178 179
  std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));

180
  if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(scope_)) {
Y
Yan Chunwei 已提交
181
    LOG(ERROR) << "fail to call Init";
X
Xin Pan 已提交
182 183
    return nullptr;
  }
184 185
  // fix manylinux compile error.
  return std::move(cls);
X
Xin Pan 已提交
186 187
}

Y
Yan Chunwei 已提交
188
bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
189
                                    framework::Scope *scope) {
X
Xin Pan 已提交
190
  VLOG(3) << "Predictor::set_feed";
191
  if (inputs.size() != feeds_.size()) {
X
Xin Pan 已提交
192 193 194
    LOG(ERROR) << "wrong feed input size.";
    return false;
  }
195
  for (size_t i = 0; i < inputs.size(); ++i) {
196 197
    framework::LoDTensor input;
    framework::DDim ddim = framework::make_ddim(inputs[i].shape);
X
Xin Pan 已提交
198 199
    void *input_ptr;
    if (inputs[i].dtype == PaddleDType::INT64) {
200
      input_ptr = input.mutable_data<int64_t>(ddim, platform::CPUPlace());
X
Xin Pan 已提交
201
    } else if (inputs[i].dtype == PaddleDType::FLOAT32) {
202
      input_ptr = input.mutable_data<float>(ddim, platform::CPUPlace());
X
Xin Pan 已提交
203 204 205 206 207 208
    } else {
      LOG(ERROR) << "unsupported feed type " << inputs[i].dtype;
      return false;
    }

    // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
209
    std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
210
                inputs[i].data.length());
Y
Yan Chunwei 已提交
211 212 213 214 215 216
    // TODO(Superjomn) Low performance, need optimization for heavy LoD copy.
    framework::LoD lod;
    for (auto &level : inputs[i].lod) {
      lod.emplace_back(level);
    }
    input.set_lod(lod);
217 218 219 220 221 222 223 224
    int idx = -1;
    if (config_.specify_input_name) {
      idx =
          boost::get<int>(feeds_[feed_names_[inputs[i].name]]->GetAttr("col"));
    } else {
      idx = boost::get<int>(feeds_[i]->GetAttr("col"));
    }
    framework::SetFeedVariable(scope, input, "feed", idx);
X
Xin Pan 已提交
225 226 227 228
  }
  return true;
}

229 230
bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
                                     framework::Scope *scope) {
X
Xin Pan 已提交
231
  VLOG(3) << "Predictor::get_fetch";
232 233 234 235 236 237 238
  outputs->resize(fetchs_.size());
  for (size_t i = 0; i < fetchs_.size(); ++i) {
    std::string fetch_target_name = fetchs_[i]->Input("X")[0];
    int idx = boost::get<int>(fetchs_[i]->GetAttr("col"));
    PADDLE_ENFORCE(idx == i);
    framework::LoDTensor &output =
        framework::GetFetchVariable(*scope, "fetch", idx);
X
Xin Pan 已提交
239
    // TODO(panyx0718): Support fetch of other types.
240
    if (output.type() != typeid(float)) {
X
Xin Pan 已提交
241 242 243
      LOG(ERROR) << "only support fetching float now.";
      return false;
    }
244

X
Xin Pan 已提交
245
    std::vector<int> shape;
246 247 248
    auto dims_i = output.dims();
    auto lod = output.lod();
    const float *output_ptr = output.data<float>();
X
Xin Pan 已提交
249
    // const int64_t* output_ptr = fetchs[i].data<int64_t>();
250
    auto num = output.numel();
X
Xin Pan 已提交
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
    std::vector<float> data;
    if (0 == lod.size()) {
      std::copy(output_ptr, output_ptr + num, std::back_inserter(data));
      for (int j = 0; j < dims_i.size(); ++j) {
        shape.push_back(dims_i[j]);
      }
    } else {
      // for batch detection
      // image[0] -> output[0] shape {145, 6}
      // image[1] -> output[1] shape {176, 6}
      // then,
      // the batch output shape {321, 6}
      // the lod {{0, 145, 321}}
      // so we should append output[0] to {176, 6}
      size_t max_dim = 0;
      for (size_t j = 1; j < lod[0].size(); j++) {
        max_dim = std::max(max_dim, lod[0][j] - lod[0][j - 1]);
      }
      size_t common_dim = lod[0].back() == 0 ? 0 : num / lod[0].back();
      if (max_dim > 0) {
        data.resize((lod[0].size() - 1) * max_dim * common_dim, 0);
      }
      for (size_t j = 1; j < lod[0].size(); j++) {
        size_t start = lod[0][j - 1] * common_dim;
        size_t end = lod[0][j] * common_dim;
        if (end > start) {
277
          std::copy(output_ptr + start, output_ptr + end,
X
Xin Pan 已提交
278 279 280 281 282 283 284 285 286 287 288
                    data.begin() + (j - 1) * max_dim * common_dim);
        }
      }
      shape.push_back(lod[0].size() - 1);
      shape.push_back(max_dim);
      for (int j = 1; j < dims_i.size(); ++j) {
        shape.push_back(dims_i[j]);
      }
    }

    outputs->at(i).shape = shape;
289 290 291 292 293
    auto &buffer = outputs->at(i).data;
    if (buffer.empty() || buffer.length() < sizeof(float) * data.size()) {
      buffer.Resize(sizeof(float) * data.size());
    }
    std::memcpy(buffer.data(), data.data(), buffer.length());
Y
Yan Chunwei 已提交
294
    // copy LoD
295
    for (const auto &level : output.lod()) {
Y
Yan Chunwei 已提交
296 297
      outputs->at(i).lod.emplace_back(level);
    }
X
Xin Pan 已提交
298 299 300 301 302 303
    outputs->at(i).dtype = PaddleDType::FLOAT32;
    // TODO(panyx0718): support other types? fill tensor name? avoid a copy.
  }
  return true;
}

304
template <>
305 306
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
    NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) {
Y
Yan Chunwei 已提交
307 308 309
  VLOG(3) << "create NativePaddlePredictor";
  if (config.use_gpu) {
    // 1. GPU memeroy
310
    PADDLE_ENFORCE_GT(
311
        config.fraction_of_gpu_memory, 0.f,
Y
Yan Chunwei 已提交
312
        "fraction_of_gpu_memory in the config should be set to range (0., 1.]");
313
    PADDLE_ENFORCE_GE(config.device, 0, "Invalid device id %d", config.device);
Y
Yan Chunwei 已提交
314 315 316 317 318 319 320 321 322 323
    std::vector<std::string> flags;
    if (config.fraction_of_gpu_memory >= 0.0f ||
        config.fraction_of_gpu_memory <= 0.95f) {
      flags.push_back("dummpy");
      std::string flag = "--fraction_of_gpu_memory_to_use=" +
                         num2str<float>(config.fraction_of_gpu_memory);
      flags.push_back(flag);
      VLOG(3) << "set flag: " << flag;
      framework::InitGflags(flags);
    }
X
Xin Pan 已提交
324 325
  }

Y
Yan Chunwei 已提交
326
  std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
T
tensor-tang 已提交
327
  if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) {
X
Xin Pan 已提交
328 329
    return nullptr;
  }
330
  return std::move(predictor);
X
Xin Pan 已提交
331 332 333
}

}  // namespace paddle