paddle_api.h 9.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

16 17 18
/*! \file paddle_api.h
 */

19 20 21 22 23
#include <cassert>
#include <memory>
#include <string>
#include <vector>

24 25
/*! \namespace paddle
 */
26 27
namespace paddle {

28 29
/** paddle data type.
 */
30 31 32 33 34 35
enum PaddleDType {
  FLOAT32,
  INT64,
  // TODO(Superjomn) support more data types if needed.
};

36 37
/**
 *\brief Memory menager for PaddleTensor.
38
 *
39 40 41
 *The PaddleBuf holds a buffer for data input or output. The memory can be
 *allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf
 *should be reused for better performance.
42
 *
43 44 45 46 47 48 49 50 51 52 53 54 55 56
 *For user allocated memory, the following API can be used:
 *- PaddleBuf(void* data, size_t length) to set an external memory by
 *specifying
 *  the memory address and length.
 *- Reset(void* data, size_t length) to reset the PaddleBuf with an external
 *memory.
 *ATTENTION, for user allocated memory, deallocation should be done by users
 *externally after the program finished. The PaddleBuf won't do any allocation
 *or deallocation.
 *
 *To have the PaddleBuf allocate and manage the memory:
 *- PaddleBuf(size_t length) will allocate a memory of size `length`.
 *- Resize(size_t length) resize the memory to no less than `length`, ATTENTION
 *  if the allocated memory is larger than `length`, nothing will done.
57 58 59
 */
class PaddleBuf {
 public:
60 61
  /** PaddleBuf allocate memory internally, and manage it.
   */
62 63
  explicit PaddleBuf(size_t length)
      : data_(new char[length]), length_(length), memory_owned_(true) {}
64 65
  /** Set external memory, the PaddleBuf won't manage it.
   */
66 67
  PaddleBuf(void* data, size_t length)
      : data_(data), length_(length), memory_owned_{false} {}
68 69
  /** Copy only available when memory is managed externally.
   */
70 71
  explicit PaddleBuf(const PaddleBuf&);

72 73
  /** Resize the memory.
   */
74
  void Resize(size_t length);
75 76
  /** Reset to external memory, with address and length set.
   */
77
  void Reset(void* data, size_t length);
78 79
  /** Tell whether the buffer is empty.
   */
80
  bool empty() const { return length_ == 0; }
81 82
  /** Get the memory address.
   */
83
  void* data() const { return data_; }
84 85
  /** Get the memory length.
   */
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
  size_t length() const { return length_; }

  ~PaddleBuf() { Free(); }
  PaddleBuf& operator=(const PaddleBuf&);
  PaddleBuf& operator=(PaddleBuf&&);
  PaddleBuf() = default;
  PaddleBuf(PaddleBuf&& other);

 private:
  void Free();
  void* data_{nullptr};  // pointer to the data memory.
  size_t length_{0};     // number of memory bytes.
  bool memory_owned_{true};
};

101 102
/** Basic input and output data structure for PaddlePredictor.
 */
103 104 105 106 107 108 109 110 111 112
struct PaddleTensor {
  PaddleTensor() = default;
  std::string name;  // variable name.
  std::vector<int> shape;
  PaddleBuf data;  // blob of data.
  PaddleDType dtype;
  std::vector<std::vector<size_t>> lod;  // Tensor+LoD equals LoDTensor
};

enum class PaddlePlace { kUNK = -1, kCPU, kGPU };
113 114
/** Tensor without copy, currently only supports AnalysisPredictor.
 */
115 116 117 118
class ZeroCopyTensor {
 public:
  void Reshape(const std::vector<int>& shape);

119 120 121 122 123
  /** Get the memory in CPU or GPU with specific data type, should Reshape first
   * to tell the data size.
   * Once can directly call this data to feed the data.
   * This is for write the input tensor.
   */
124 125
  template <typename T>
  T* mutable_data(PaddlePlace place);
T
tensor-tang 已提交
126 127
  /** Get the memory directly, will return the place and element size by
   * pointer.
128 129
   * This is for reading the output tensor.
   */
130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
  template <typename T>
  T* data(PaddlePlace* place, int* size) const;

  std::vector<int64_t> shape() const;

  void SetLoD(const std::vector<std::vector<size_t>>& x);
  std::vector<std::vector<size_t>> lod() const;
  const std::string& name() const { return name_; }

 protected:
  explicit ZeroCopyTensor(void* scope) : scope_{scope} {}
  void SetName(const std::string& name) { name_ = name; }
  void* FindTensor() const;

 private:
  std::string name_;
  bool input_or_output_;
  friend class AnalysisPredictor;
  void* scope_{nullptr};
149 150 151
  // The corresponding tensor pointer inside Paddle workspace is cached for
  // performance.
  mutable void* tensor_{nullptr};
152 153
};

154
/** A simple Inference API for Paddle.
155 156 157 158 159 160 161 162
 */
class PaddlePredictor {
 public:
  struct Config;
  PaddlePredictor() = default;
  PaddlePredictor(const PaddlePredictor&) = delete;
  PaddlePredictor& operator=(const PaddlePredictor&) = delete;

163 164 165 166 167 168
  /** Predict an record.
   * The caller should be responsible for allocating and releasing the memory of
   * `inputs`. `inputs` should be available until Run returns. Caller should be
   * responsible for the output tensor's buffer, either allocated or passed from
   * outside.
   */
169 170 171 172
  virtual bool Run(const std::vector<PaddleTensor>& inputs,
                   std::vector<PaddleTensor>* output_data,
                   int batch_size = -1) = 0;

173 174 175 176 177 178 179
  /** \brief Get a mutable tensor directly.
   *
   * NOTE Only works in AnalysisPredictor.
   *
   * One can also use this to modify any temporary variable related tensors in
   * the predictor.
   *
180
   */
181 182 183 184
  virtual std::unique_ptr<ZeroCopyTensor> GetInputTensor(
      const std::string& name) {
    return nullptr;
  }
185 186 187 188 189 190 191
  /**
   * \brief Get an immutable tensor without copy.
   *
   * NOTE Only works in AnalysisPredictor.
   * One can use this API to get any temporary tensors in the predictor and
   * read it.
   */
192 193 194 195
  virtual std::unique_ptr<ZeroCopyTensor> GetOutputTensor(
      const std::string& name) {
    return nullptr;
  }
196 197 198 199 200 201 202 203 204 205 206
  /**
   * \brief Run the predictor with zero-copied inputs and outputs.
   *
   * NOTE Only works in AnalysisPredictor.
   *
   * This will save the IO copy for transfering inputs and outputs to predictor
   * workspace and get some performance improvement.
   * To use it, one should call the `AnalysisConfig.SwitchUseFeedFetchOp(true)`
   * and then use the `GetInputTensor` and `GetOutputTensor` to directly write
   * or read the input/output tensors.
   */
207 208
  virtual bool ZeroCopyRun() { return false; }

209 210 211
  /** Clone a predictor that share the model weights, the Cloned predictor
   * should be thread-safe.
   */
212 213
  virtual std::unique_ptr<PaddlePredictor> Clone() = 0;

214 215
  /** Destroy the Predictor.
   */
216 217
  virtual ~PaddlePredictor() = default;

Y
Yan Chunwei 已提交
218 219 220 221 222 223 224 225
  /** \brief Get the serialized model program that executes in inference phase.
   * Its data type is ProgramDesc, which is a protobuf message.
   */
  virtual std::string GetSeriazlizedProgram() const {
    assert(false);  // Force raise error.
    return "NotImplemented";
  };

226 227
  /** The common configs for all the predictors.
   */
228
  struct Config {
229
    std::string model_dir; /*!< path to the model directory. */
230 231 232 233 234 235 236
  };
};

struct NativeConfig : public PaddlePredictor::Config {
  // GPU related fields.
  bool use_gpu{false};
  int device{0};
237 238
  float fraction_of_gpu_memory{
      -1.f}; /*!< Change to a float in (0,1] if needed. */
239 240 241 242 243

  // Specify the exact path of program and parameter files.
  std::string prog_file;
  std::string param_file;

244 245 246 247
  /** Specify the variable's name of each input if input tensors don't follow
   * the
   * `feeds` and `fetches` of the phase `save_inference_model`.
   */
248
  bool specify_input_name{false};
L
luotao1 已提交
249

250 251
  /** Set and get the number of cpu math library threads.
   */
L
luotao1 已提交
252 253 254 255 256
  void SetCpuMathLibraryNumThreads(int cpu_math_library_num_threads) {
    cpu_math_library_num_threads_ = cpu_math_library_num_threads;
  }
  int cpu_math_library_num_threads() const {
    return cpu_math_library_num_threads_;
L
luotao1 已提交
257 258 259
  }

 protected:
L
luotao1 已提交
260 261 262
  // number of cpu math library (such as MKL, OpenBlas) threads for each
  // instance.
  int cpu_math_library_num_threads_{1};
263 264
};

265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
/*! \fn std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT&
 * config);
 *
 * \brief A factory to help create different predictors.
 *
 * Usage:
 *
 * NativeConfig config;
 * ... // change the configs.
 * auto native_predictor = CreatePaddlePredictor(config);
 *
 * FOR EXTENSION DEVELOPER:
 * Different predictors are designated by config type. Similar configs can be
 * merged, but there shouldn't be a huge config containing different fields for
 * more than one kind of predictors.
 */
281 282 283
template <typename ConfigT>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);

284 285 286
/** NOTE The following APIs are too trivial, we will discard it in the following
 * versions.
 */
287
enum class PaddleEngineKind {
288 289 290 291
  kNative = 0,        /*!< Use the native Fluid facility. */
  kAutoMixedTensorRT, /*!< Automatically mix Fluid with TensorRT. */
  kAnalysis,          /*!< More optimization. */
  kAnakin             /*!< Use Anakin for inference, not mature yet. */
292 293 294 295 296 297 298 299
};

template <typename ConfigT, PaddleEngineKind engine>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);

int PaddleDtypeSize(PaddleDType dtype);

}  // namespace paddle