From 6d371e452e880eb8dba515408517f8ac418358ed Mon Sep 17 00:00:00 2001 From: Yan Chunwei Date: Thu, 10 May 2018 20:04:44 +0800 Subject: [PATCH] init Inference top APIs (#10549) --- contrib/inference/README.md | 27 ++++++++++ contrib/inference/paddle_inference_api.h | 69 ++++++++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 contrib/inference/README.md create mode 100644 contrib/inference/paddle_inference_api.h diff --git a/contrib/inference/README.md b/contrib/inference/README.md new file mode 100644 index 00000000000..20969fac6c8 --- /dev/null +++ b/contrib/inference/README.md @@ -0,0 +1,27 @@ +# Embed Paddle Inference in Your Application + +Paddle inference offers the APIs in `C` and `C++` languages. + +One can easily deploy a model trained by Paddle following the steps as below: + +1. Optimize the native model; +2. Write some codes for deployment. + + +Let's explain the steps in detail. + +## Optimize the native Fluid Model + +The native model that get from the training phase needs to be optimized for that. + +- Clean the noise such as the cost operators that do not need inference; +- Prune unnecessary computation fork that has nothing to do with the output; +- Remove extraneous variables; +- Memory reuse for native Fluid executor; +- Translate the model storage format to some third-party engine's, so that the inference API can utilize the engine for acceleration; + +We have an official tool to do the optimization, call `paddle_inference_optimize --help` for more information. + +## Write some codes + +Read `paddle_inference_api.h` for more information. diff --git a/contrib/inference/paddle_inference_api.h b/contrib/inference/paddle_inference_api.h new file mode 100644 index 00000000000..dbaa7c95b97 --- /dev/null +++ b/contrib/inference/paddle_inference_api.h @@ -0,0 +1,69 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include + +namespace paddle { + +class Predictor { +public: + struct Attr; + Predictor() = default; + + // Build the network before inference. + bool Init(const Attr& attr); + + // Predict an record. + // Arguments: + // inputs: the name of the input variables. + // outputs: the name of the output varaibles. + // input_shapes: the shape of the input variables. + // output_shapes: the shape of the output variables. + // input_data: the data of the input variables. + // output_data: the data of the output variables. + bool Run(const std::vector& inputs, + const std::vector& outputs, + const std::vector>& input_shapes, + const std::vector>& output_shapes, + const std::vector>& input_data, + std::vector>* output_data); + + // Clone a predictor that share the model weights. + Predictor* Clone(); + + // Destroy the Predictor. + ~Predictor(); + + struct Attr { + enum class EngineKind; + + std::string model_dir; // path to the model directory. + bool enable_engine{false}; // Enable to execute (part of) the model on + // third-party engines. + EngineKind engine_kind{Attr::EngineKind::kNone}; + + enum class EngineKind { + kNone = -1, // Use the native Fluid facility. + kAnakin, // Use Anakin for inference. + kTensorRT, // Use TensorRT for inference. + kAutoMixedAnakin, // Automatically mix Fluid with Anakin. + kAutoMixedTensorRT, // Automatically mix Fluid with TensorRT. + }; + }; +}; + +} // namespace paddle -- GitLab