paddle_api.h 12.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once

16 17 18
/*! \file paddle_api.h
 */

Y
Yan Chunwei 已提交
19 20 21 22 23 24
/*! \mainpage Paddle Inference APIs
 * \section intro_sec Introduction
 * The Paddle inference library aims to offer an high performance inference SDK
 * for Paddle users.
 */

25
#include <cassert>
26
#include <map>
27 28 29 30
#include <memory>
#include <string>
#include <vector>

31 32
/*! \namespace paddle
 */
33 34
namespace paddle {

35 36
/** paddle data type.
 */
37 38 39
enum PaddleDType {
  FLOAT32,
  INT64,
40
  INT32,
41
  UINT8,
42 43 44
  // TODO(Superjomn) support more data types if needed.
};

45
/**
Y
Yan Chunwei 已提交
46
 * \brief Memory manager for `PaddleTensor`.
47
 *
Y
Yan Chunwei 已提交
48 49 50
 * The PaddleBuf holds a buffer for data input or output. The memory can be
 * allocated by user or by PaddleBuf itself, but in any case, the PaddleBuf
 * should be reused for better performance.
51
 *
Y
Yan Chunwei 已提交
52 53 54 55
 * For user allocated memory, the following API can be used:
 * - PaddleBuf(void* data, size_t length) to set an external memory by
 * specifying the memory address and length.
 * - Reset(void* data, size_t length) to reset the PaddleBuf with an external
56
 *memory.
Y
Yan Chunwei 已提交
57
 * ATTENTION, for user allocated memory, deallocation should be done by users
58 59 60
 *externally after the program finished. The PaddleBuf won't do any allocation
 *or deallocation.
 *
Y
Yan Chunwei 已提交
61 62 63
 * To have the PaddleBuf allocate and manage the memory:
 * - PaddleBuf(size_t length) will allocate a memory of size `length`.
 * - Resize(size_t length) resize the memory to no less than `length`, ATTENTION
64
 *  if the allocated memory is larger than `length`, nothing will done.
Y
Yan Chunwei 已提交
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
 *
 * Usage:
 *
 * Let PaddleBuf manage the memory internally.
 * \code{cpp}
 * const int num_elements = 128;
 * PaddleBuf buf(num_elements * sizeof(float));
 * \endcode
 *
 * Or
 * \code{cpp}
 * PaddleBuf buf;
 * buf.Resize(num_elements * sizeof(float));
 * \endcode
 * Works the exactly the same.
 *
 * One can also make the `PaddleBuf` use the external memory.
 * \code{cpp}
 * PaddleBuf buf;
 * void* external_memory = new float[num_elements];
 * buf.Reset(external_memory, num_elements*sizeof(float));
 * ...
 * delete[] external_memory; // manage the memory lifetime outside.
 * \endcode
89 90 91
 */
class PaddleBuf {
 public:
92 93
  /** PaddleBuf allocate memory internally, and manage it.
   */
94 95
  explicit PaddleBuf(size_t length)
      : data_(new char[length]), length_(length), memory_owned_(true) {}
96 97
  /** Set external memory, the PaddleBuf won't manage it.
   */
98 99
  PaddleBuf(void* data, size_t length)
      : data_(data), length_(length), memory_owned_{false} {}
100 101
  /** Copy only available when memory is managed externally.
   */
102 103
  explicit PaddleBuf(const PaddleBuf&);

104 105
  /** Resize the memory.
   */
106
  void Resize(size_t length);
107 108
  /** Reset to external memory, with address and length set.
   */
109
  void Reset(void* data, size_t length);
110 111
  /** Tell whether the buffer is empty.
   */
112
  bool empty() const { return length_ == 0; }
Y
Yan Chunwei 已提交
113
  /** Get the data's memory address.
114
   */
115
  void* data() const { return data_; }
116 117
  /** Get the memory length.
   */
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
  size_t length() const { return length_; }

  ~PaddleBuf() { Free(); }
  PaddleBuf& operator=(const PaddleBuf&);
  PaddleBuf& operator=(PaddleBuf&&);
  PaddleBuf() = default;
  PaddleBuf(PaddleBuf&& other);

 private:
  void Free();
  void* data_{nullptr};  // pointer to the data memory.
  size_t length_{0};     // number of memory bytes.
  bool memory_owned_{true};
};

133 134
/** Basic input and output data structure for PaddlePredictor.
 */
135 136 137 138 139 140 141 142 143 144
struct PaddleTensor {
  PaddleTensor() = default;
  std::string name;  // variable name.
  std::vector<int> shape;
  PaddleBuf data;  // blob of data.
  PaddleDType dtype;
  std::vector<std::vector<size_t>> lod;  // Tensor+LoD equals LoDTensor
};

enum class PaddlePlace { kUNK = -1, kCPU, kGPU };
Y
Yan Chunwei 已提交
145

146 147 148 149 150 151 152 153
/// \brief Represents an n-dimensional array of values.
/// The ZeroCopyTensor is used to store the input or output of the network.
/// Zero copy means that the tensor supports direct copy of host or device data
/// to device,
/// eliminating additional CPU copy. ZeroCopyTensor is only used in the
/// AnalysisPredictor.
/// It is obtained through PaddlePredictor::GetinputTensor()
/// and PaddlePredictor::GetOutputTensor() interface.
154 155
class ZeroCopyTensor {
 public:
156 157 158 159
  /// \brief Reset the shape of the tensor.
  /// Generally it's only used for the input tensor.
  /// Reshape must be called before calling mutable_data() or copy_from_cpu()
  /// \param shape The shape to set.
160 161
  void Reshape(const std::vector<int>& shape);

162 163 164 165
  /// \brief Get the memory pointer in CPU or GPU with specific data type.
  /// Please Reshape the tensor first before call this.
  /// It's usually used to get input data pointer.
  /// \param place The place of the tensor.
166 167
  template <typename T>
  T* mutable_data(PaddlePlace place);
168 169 170 171 172 173

  /// \brief Get the memory pointer directly.
  /// It's usually used to get the output data pointer.
  /// \param[out] place To get the device type of the tensor.
  /// \param[out] size To get the data size of the tensor.
  /// \return The tensor data buffer pointer.
174 175 176
  template <typename T>
  T* data(PaddlePlace* place, int* size) const;

177 178 179
  /// \brief Copy the host memory to tensor data.
  /// It's usually used to set the input tensor data.
  /// \param data The pointer of the data, from which the tensor will copy.
N
nhzlx 已提交
180 181 182
  template <typename T>
  void copy_from_cpu(const T* data);

183 184 185
  /// \brief Copy the tensor data to the host memory.
  /// It's usually used to get the output tensor data.
  /// \param[out] data The tensor will copy the data to the address.
N
nhzlx 已提交
186 187 188
  template <typename T>
  void copy_to_cpu(T* data);

189
  /// \brief Return the shape of the Tensor.
N
nhzlx 已提交
190
  std::vector<int> shape() const;
191

192 193 194 195
  /// \brief Set lod info of the tensor.
  /// More about LOD can be seen here:
  ///  https://www.paddlepaddle.org.cn/documentation/docs/zh/beginners_guide/basic_concept/lod_tensor.html#lodtensor
  /// \param x the lod info.
196
  void SetLoD(const std::vector<std::vector<size_t>>& x);
197
  /// \brief Return the lod info of the tensor.
198
  std::vector<std::vector<size_t>> lod() const;
199
  /// \brief Return the name of the tensor.
200
  const std::string& name() const { return name_; }
N
nhzlx 已提交
201 202 203 204
  void SetPlace(PaddlePlace place, int device = -1) {
    place_ = place;
    device_ = device;
  }
205

206 207 208
  /// \brief Return the data type of the tensor.
  /// It's usually used to get the output tensor data type.
  /// \return The data type of the tensor.
N
nhzlx 已提交
209
  PaddleDType type() const;
210

211 212 213 214 215 216 217 218 219 220
 protected:
  explicit ZeroCopyTensor(void* scope) : scope_{scope} {}
  void SetName(const std::string& name) { name_ = name; }
  void* FindTensor() const;

 private:
  std::string name_;
  bool input_or_output_;
  friend class AnalysisPredictor;
  void* scope_{nullptr};
221 222 223
  // The corresponding tensor pointer inside Paddle workspace is cached for
  // performance.
  mutable void* tensor_{nullptr};
N
nhzlx 已提交
224
  PaddlePlace place_;
225
  PaddleDType dtype_;
N
nhzlx 已提交
226
  int device_;
227 228
};

229 230
/// \brief A Predictor for executing inference on a model.
/// Base class for AnalysisPredictor and NativePaddlePredictor.
231 232 233 234 235 236 237
class PaddlePredictor {
 public:
  struct Config;
  PaddlePredictor() = default;
  PaddlePredictor(const PaddlePredictor&) = delete;
  PaddlePredictor& operator=(const PaddlePredictor&) = delete;

238 239 240 241 242 243 244 245
  /// \brief This interface takes input and runs the network.
  /// There are redundant copies of data between hosts in this operation,
  /// so it is more recommended to use the zecopyrun interface
  /// \param[in] inputs An list of PaddleTensor as the input to the network.
  /// \param[out] output_data Pointer to the tensor list, which holds the output
  /// paddletensor
  /// \param[in] batch_size This setting has been discarded and can be ignored.
  /// \return Whether the run is successful
246 247 248 249
  virtual bool Run(const std::vector<PaddleTensor>& inputs,
                   std::vector<PaddleTensor>* output_data,
                   int batch_size = -1) = 0;

250 251 252
  /// \brief  Used to get the name of the network input.
  /// Be inherited by AnalysisPredictor, Only used in ZeroCopy scenarios.
  /// \return Input tensor names.
N
nhzlx 已提交
253 254
  virtual std::vector<std::string> GetInputNames() { return {}; }

255 256
  /// \brief Get the input shape of the model.
  /// \return A map contains all the input names and shape defined in the model.
257 258 259 260
  virtual std::map<std::string, std::vector<int64_t>> GetInputTensorShape() {
    return {};
  }

261 262 263
  /// \brief Used to get the name of the network output.
  /// Be inherited by AnalysisPredictor, Only used in ZeroCopy scenarios.
  /// \return Output tensor names.
N
nhzlx 已提交
264 265
  virtual std::vector<std::string> GetOutputNames() { return {}; }

266 267 268 269 270
  /// \brief Get the input ZeroCopyTensor by name.
  /// Be inherited by AnalysisPredictor, Only used in ZeroCopy scenarios.
  /// The name is obtained from the GetInputNames() interface.
  /// \param name The input tensor name.
  /// \return Return the corresponding input ZeroCopyTensor.
271 272 273 274
  virtual std::unique_ptr<ZeroCopyTensor> GetInputTensor(
      const std::string& name) {
    return nullptr;
  }
275 276 277 278 279 280

  /// \brief Get the output ZeroCopyTensor by name.
  /// Be inherited by AnalysisPredictor, Only used in ZeroCopy scenarios.
  /// The name is obtained from the GetOutputNames() interface.
  /// \param name The output tensor name.
  /// \return Return the corresponding output ZeroCopyTensor.
281 282 283 284
  virtual std::unique_ptr<ZeroCopyTensor> GetOutputTensor(
      const std::string& name) {
    return nullptr;
  }
285 286 287 288 289 290 291 292 293
  /// \brief Run the network with zero-copied inputs and outputs.
  /// Be inherited by AnalysisPredictor and only used in ZeroCopy scenarios.
  /// This will save the IO copy for transfering inputs and outputs to predictor
  /// workspace
  /// and get some performance improvement.
  /// To use it, one should call the AnalysisConfig.SwitchUseFeedFetchOp(true)
  /// and then use the `GetInputTensor` and `GetOutputTensor`
  /// to directly write or read the input/output tensors.
  /// \return Whether the run is successful
294 295
  virtual bool ZeroCopyRun() { return false; }

296 297 298 299
  /// \brief Clone an existing predictor
  /// When using clone, the same network will be created,
  /// and the parameters between them are shared.
  /// \return unique_ptr which contains the pointer of predictor
300 301
  virtual std::unique_ptr<PaddlePredictor> Clone() = 0;

302
  /// \brief Destroy the Predictor.
303 304
  virtual ~PaddlePredictor() = default;

305
  virtual std::string GetSerializedProgram() const {
Y
Yan Chunwei 已提交
306 307
    assert(false);  // Force raise error.
    return "NotImplemented";
308
  }
Y
Yan Chunwei 已提交
309

310
  /// \brief Base class for NativeConfig and AnalysisConfig.
311
  struct Config {
312
    std::string model_dir; /*!< path to the model directory. */
313 314 315 316 317 318 319
  };
};

struct NativeConfig : public PaddlePredictor::Config {
  // GPU related fields.
  bool use_gpu{false};
  int device{0};
320 321
  float fraction_of_gpu_memory{
      -1.f}; /*!< Change to a float in (0,1] if needed. */
322 323 324 325 326

  // Specify the exact path of program and parameter files.
  std::string prog_file;
  std::string param_file;

327 328 329 330
  /** Specify the variable's name of each input if input tensors don't follow
   * the
   * `feeds` and `fetches` of the phase `save_inference_model`.
   */
331
  bool specify_input_name{false};
L
luotao1 已提交
332

333 334
  /** Set and get the number of cpu math library threads.
   */
L
luotao1 已提交
335 336 337 338 339
  void SetCpuMathLibraryNumThreads(int cpu_math_library_num_threads) {
    cpu_math_library_num_threads_ = cpu_math_library_num_threads;
  }
  int cpu_math_library_num_threads() const {
    return cpu_math_library_num_threads_;
L
luotao1 已提交
340 341 342
  }

 protected:
L
luotao1 已提交
343 344 345
  // number of cpu math library (such as MKL, OpenBlas) threads for each
  // instance.
  int cpu_math_library_num_threads_{1};
346 347
};

348 349 350 351 352 353 354
/*! \fn std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT&
 * config);
 *
 * \brief A factory to help create different predictors.
 *
 * Usage:
 *
Y
Yan Chunwei 已提交
355
 * \code{.cpp}
356 357 358
 * NativeConfig config;
 * ... // change the configs.
 * auto native_predictor = CreatePaddlePredictor(config);
Y
Yan Chunwei 已提交
359
 * \endcode
360 361 362 363 364 365
 *
 * FOR EXTENSION DEVELOPER:
 * Different predictors are designated by config type. Similar configs can be
 * merged, but there shouldn't be a huge config containing different fields for
 * more than one kind of predictors.
 */
366 367 368
template <typename ConfigT>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);

369 370 371
/** NOTE The following APIs are too trivial, we will discard it in the following
 * versions.
 */
372
enum class PaddleEngineKind {
373 374 375
  kNative = 0,        /*!< Use the native Fluid facility. */
  kAutoMixedTensorRT, /*!< Automatically mix Fluid with TensorRT. */
  kAnalysis,          /*!< More optimization. */
376 377 378 379 380 381 382
};

template <typename ConfigT, PaddleEngineKind engine>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);

int PaddleDtypeSize(PaddleDType dtype);

Y
Yan Chunwei 已提交
383 384
std::string get_version();

385
}  // namespace paddle