未验证 提交 0129f4b5 编写于 作者: W Wilber 提交者: GitHub

Add some inference API comments for AnalysisPredictor (#23242)

* add inference api doc. test=develop
上级 c8f9e66b
......@@ -30,6 +30,18 @@
#include <gtest/gtest.h>
#include <gtest/gtest_prod.h>
#endif
///
/// \file analysis_predictor.h
///
/// \brief Compared to NativePredictor, AnalysisPredictor is a high-performance
/// predictor that includes many optimizations
///
/// \author paddle-infer@baidu.com
/// \date 2020-01-01
/// \since 1.7.0
///
namespace paddle {
using inference::analysis::Argument;
......@@ -37,95 +49,298 @@ using inference::analysis::Analyzer;
using framework::proto::ProgramDesc;
using framework::NaiveExecutor;
/** \brief This predictor is based on the original native predictor with IR and
* Analysis support.
*
* It will optimize IR and Parameters in the runtime.
*
* TODO(Superjomn) Replace the Navive predictor?
*/
///
/// \class AnalysisPredictor
///
/// \brief The analysis predictor is based on the original native predictor with
/// IR and Analysis support. It will optimize IR and Parameters in the runtime.
///
/// The predictor has the following typical uses:
///
/// Get predictor
/// \code{cpp}
/// auto predictor = CreatePaddlePredictor(config);
/// \endcode
///
/// Get input or output names
/// \code{cpp}
/// auto input_names = predictor->GetInputNames();
/// auto output_names = predictor->GetOutputNames();
/// \endcode
///
/// Get input or output tensors
/// \code{cpp}
/// auto input_t = predictor->GetInputTensor(input_names[0]);
/// auto output_t = predictor->GetOutputTensor(output_names[0]);
/// \endcode
///
/// Run predictor
/// \code{cpp}
/// predictor->ZeroCopyRun();
/// \endcode
///
class AnalysisPredictor : public PaddlePredictor {
public:
///
/// \brief Construct a new Analysis Predictor object
///
/// \param[in] AnalysisConfig config
///
explicit AnalysisPredictor(const AnalysisConfig &config) : config_(config) {
predictor_id_ = inference::GetUniqueId();
}
///
/// \brief Destroy the Analysis Predictor object
///
~AnalysisPredictor();
///
/// \brief Initialize predictor
///
/// Initializing predictor mainly includes the following tasks:
/// preparing scope, creating executor, preparing program, initializing the
/// variables required by the executor, getting the feed_target_names and
/// fetch_target_names, etc.
///
/// \param[in] parent_scope parent scope
/// \param[in] program program
/// \return Whether the init function executed successfully
///
bool Init(const std::shared_ptr<framework::Scope> &parent_scope,
const std::shared_ptr<framework::ProgramDesc> &program = nullptr);
///
/// \brief Run the prediction engine. Deprecated. Please refer to ZeroCopyRun
///
/// \param[in] inputs input tensors
/// \param[out] output_data output tensors
/// \param[in] batch_size data's batch size
/// \return Whether the function executed successfully
///
bool Run(const std::vector<PaddleTensor> &inputs,
std::vector<PaddleTensor> *output_data,
int batch_size = -1) override;
///
/// \brief Get the input names
///
/// \return input names
///
std::vector<std::string> GetInputNames();
///
/// \brief Get the output names
///
/// \return output names
///
std::vector<std::string> GetOutputNames();
///
/// \brief Get the Input Tensor object
///
/// \param[in] name input name
/// \return input tensor
///
std::unique_ptr<ZeroCopyTensor> GetInputTensor(
const std::string &name) override;
///
/// \brief Get the Output Tensor object
///
/// \param[in] name otuput name
/// \return output tensor
///
std::unique_ptr<ZeroCopyTensor> GetOutputTensor(
const std::string &name) override;
///
/// \brief Get all input names and their corresponding shapes
///
/// \return the map of input names and shapes
///
std::map<std::string, std::vector<int64_t>> GetInputTensorShape() override;
///
/// \brief Run the prediction engine
///
/// \return Whether the function executed successfully
///
bool ZeroCopyRun() override;
///
/// \brief Create feed fetch variables
///
/// \param[in] scope Scope needed to create variables
///
void CreateFeedFetchVar(framework::Scope *scope);
///
/// \brief Determine the model's inputs and outputs based on the program's
/// feed fetch op
///
void PrepareFeedFetch();
///
/// \brief Set predictor's argument according to config, which mainly includes
/// execution information and graph optimization related pass information
///
void PrepareArgument();
///
/// \brief According to argument information, execute the relevant pass
/// to get the optimized model program
///
void OptimizeInferenceProgram();
///
/// \brief Get the argument used by predictor
///
/// \return the argument obtained by config
///
Argument &analysis_argument() { return argument_; }
///
/// \brief Clone to get the new predictor. thread safe.
///
/// \return get a new predictor
///
std::unique_ptr<PaddlePredictor> Clone() override;
///
/// \brief Get the scope used by predictor
///
/// \return scope
///
framework::Scope *scope() { return scope_.get(); }
///
/// \brief Get the inference program
///
/// \return the inference program
///
framework::ProgramDesc &program() { return *inference_program_; }
///
/// \brief Get the serialized program
///
/// \return the serialized program
///
std::string GetSerializedProgram() const override;
///
/// \brief Initialize mkldnn quantizer and execute mkldnn quantization pass
///
/// \return Whether the function executed successfully
///
bool MkldnnQuantize();
// save program to model
// save parameters to params
///
/// \brief save program to model and save parameters to params
///
/// \param[in] dir path to save the model
///
void SaveOptimModel(const std::string &dir);
protected:
///
/// \brief Prepare predictor's required programs, including loading model
/// information, graph optimization, and executor creation variables, etc.
///
/// \param[in] program paddle program
/// \return Whether the function executed successfully
///
bool PrepareProgram(const std::shared_ptr<framework::ProgramDesc> &program);
///
/// \brief Prepare scope environment, each predictor has its own scope
///
/// \param[in] parent_scope The scope of the predictor to be cloned, or null
/// \return Whether the function executed successfully
///
bool PrepareScope(const std::shared_ptr<framework::Scope> &parent_scope);
///
/// \brief Create an Executor object
///
/// \return Whether the function executed successfully
///
bool CreateExecutor();
///
/// \brief According to the model's program, the executor creates ops
///
/// \return Whether the function executed successfully
///
bool PrepareExecutor();
///
/// \brief Load model program.
///
/// \return Whether the function executed successfully
///
bool LoadProgramDesc();
///
/// \brief Load model parameters.
///
/// \return Whether the function executed successfully
///
bool LoadParameters();
///
/// \brief Prepare input data, only used in Run()
///
/// \param[in] input_datas inpute tensors
/// \param[in] scope the scope used by predictor
/// \return Whether the function executed successfully
///
bool SetFeed(const std::vector<PaddleTensor> &input_datas,
framework::Scope *scope);
///
/// \brief Get the output data, only used in Run()
///
/// \param[out] output_data output tensors
/// \param[in] scope the scope used by predictor
/// \return Whether the function executed successfully
///
bool GetFetch(std::vector<PaddleTensor> *output_data,
framework::Scope *scope);
///
/// \brief Get the output data, only used in GetFetch()
///
/// \param[in] tensor for fetch op
/// \param[out] output_data output tensor
///
template <typename T>
void GetFetchOne(const framework::LoDTensor &fetchs,
PaddleTensor *output_data);
// PreSet and PostReset for Mkldnn multi-thread and dynamic shape input.
// Used in AnalysisPredictor::Run(), do not support
// AnalysisPredictor::ZeroRun() now.
///
/// \brief PreSet for Mkldnn multi-thread and dynamic shape input.
///
/// Used in AnalysisPredictor::Run(), do not support
/// AnalysisPredictor::ZeroCopyRun() now.
///
/// \param[in] inputs tensors
///
void MkldnnPreSet(const std::vector<PaddleTensor> &inputs);
///
/// \brief PostReset for Mkldnn multi-thread and dynamic shape input.
///
/// Used in AnalysisPredictor::Run(), do not support
/// AnalysisPredictor::ZeroCopyRun() now.
///
void MkldnnPostReset();
///
/// \brief Compute compatibility based on model version information and
/// operator version information
///
/// \return Compatible information
///
bool CheckOperatorCompatible();
#if PADDLE_WITH_TENSORRT
// When we use Paddle-TRT INT8 engine, we need to generate calibration table
// data first,
// the calibration table contains the range for each op's input and output,
// this whole process can be divided into several steps:
//
// 1. Builds a 32-bit engine, runs it on the calibration set, and records a
// histogram for each
// tensor of the distribution of activation values.
// 2. Builds a calibration table from the histograms.
//
// After step 2, we need to store the calibration table on disk
///
/// \brief save calibration table
///
/// When we use Paddle-TRT INT8 engine, we need to generate calibration table
/// data first,
/// the calibration table contains the range for each op's input and output,
/// this whole process can be divided into several steps:
/// 1. Builds a 32-bit engine, runs it on the calibration set, and records a
/// histogram for each tensor of the distribution of activation values.
/// 2. Builds a calibration table from the histograms.
/// After step 2, we need to store the calibration table on disk.
///
/// \return Whether the function executed successfully
///
bool SaveTrtCalibToDisk();
#endif
......
......@@ -11,6 +11,17 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///
/// \file paddle_mkldnn_quantizer_config.h
///
/// \brief Mkldnn quantizer config.
///
/// \author paddle-infer@baidu.com
/// \date 2020-01-01
/// \since 1.7.0
///
#pragma once
#include <cassert>
......@@ -24,75 +35,155 @@
namespace paddle {
// Algorithms for finding scale of quantized Tensors.
///
/// \brief Algorithms for finding scale of quantized Tensors.
///
enum class ScaleAlgo {
NONE, // Do not compute scale
MAX, // Find scale based on the max absolute value
MAX_CH, // Find scale based on the max absolute value per output channel
MAX_CH_T, // Find scale based on the max absolute value per output channel
// of a transposed tensor
KL, // Find scale based on KL Divergence
NONE, ///< Do not compute scale
MAX, ///< Find scale based on the max absolute value
MAX_CH, ///< Find scale based on the max absolute value per output channel
MAX_CH_T, ///< Find scale based on the max absolute value per output channel
///< of a transposed tensor
KL, ///< Find scale based on KL Divergence
};
///
/// \class MkldnnQuantizerConfig
///
/// \brief Config for mkldnn quantize.
///
/// The MkldnnQuantizerConfig is used to configure Mkldnn's quantization
/// parameters, including scale algorithm, warmup data, warmup batch size,
/// quantized op list, etc.
///
/// It is not recommended to use this config directly, please refer to
/// AnalysisConfig::mkldnn_quantizer_config()
///
struct MkldnnQuantizerConfig {
///
/// \brief Construct a new Mkldnn Quantizer Config object
///
MkldnnQuantizerConfig();
/** Specify a quantization algorithm for a connection (input/output) of the
* operator type.
* @param op_type_name the operator's name.
* @param conn_name name of the connection (input/output) of the operator.
* @param algo the algorithm for computing scale.
*/
///
/// \brief Set the scale algo
///
/// Specify a quantization algorithm for a connection (input/output) of the
/// operator type.
/// \param[in] op_type_name the operator's name.
/// \param[in] conn_name name of the connection (input/output) of the
/// operator.
/// \param[in] algo the algorithm for computing scale.
///
void SetScaleAlgo(std::string op_type_name, std::string conn_name,
ScaleAlgo algo) {
rules_[op_type_name][conn_name] = algo;
}
/** Get the quantization algorithm for a connection (input/output) of the
* operator type.
* @param op_type_name the operator's name.
* @param conn_name name of the connection (input/output) of the operator.
* @return the algorithm for computing scale.
*/
///
/// \brief Get the scale algo
///
/// Get the quantization algorithm for a connection (input/output) of the
/// operator type.
///
/// \param[in] op_type_name the operator's name.
/// \param[in] conn_name name of the connection (input/output) of the
/// operator.
/// \return the scale algo.
///
ScaleAlgo scale_algo(const std::string& op_type_name,
const std::string& conn_name) const;
/** Set the batch of data to be used for warm-up iteration.
* @param data batch of data.
*/
///
/// \brief Set the warmup data
///
/// Set the batch of data to be used for warm-up iteration.
///
/// \param[in] data batch of data.
///
void SetWarmupData(std::shared_ptr<std::vector<PaddleTensor>> data) {
warmup_data_ = data;
}
/** Get the batch of data used for warm-up iteration.
* @return batch of data.
*/
///
/// \brief Get the warmup data
///
/// Get the batch of data used for warm-up iteration.
///
/// \return the warm up data
///
std::shared_ptr<std::vector<PaddleTensor>> warmup_data() const {
return warmup_data_;
}
///
/// \brief Set the warmup batch size
///
/// Set the batch size for warm-up iteration.
///
/// \param[in] batch_size warm-up batch size
///
void SetWarmupBatchSize(int batch_size) { warmup_bs_ = batch_size; }
///
/// \brief Get the warmup batch size
///
/// Get the batch size for warm-up iteration.
///
/// \return the warm up batch size
int warmup_batch_size() const { return warmup_bs_; }
///
/// \brief Set quantized op list
///
/// In the quantization process, set the op list that supports quantization
///
/// \param[in] op_list List of quantized ops
///
void SetEnabledOpTypes(std::unordered_set<std::string> op_list) {
enabled_op_types_ = op_list;
}
///
/// \brief Get quantized op list
///
/// \return list of quantized ops
///
const std::unordered_set<std::string>& enabled_op_types() const {
return enabled_op_types_;
}
///
/// \brief Set the excluded op ids
///
/// \param[in] op_ids_list excluded op ids
///
void SetExcludedOpIds(std::unordered_set<int> op_ids_list) {
excluded_op_ids_ = op_ids_list;
}
///
/// \brief Get the excluded op ids
///
/// \return exclude op ids
///
const std::unordered_set<int>& excluded_op_ids() const {
return excluded_op_ids_;
}
///
/// \brief Set default scale algorithm
///
/// \param[in] algo Method for calculating scale in quantization process
///
void SetDefaultScaleAlgo(ScaleAlgo algo) { default_scale_algo_ = algo; }
///
/// \brief Get default scale algorithm
///
/// \return Method for calculating scale in quantization
/// process
///
ScaleAlgo default_scale_algo() const { return default_scale_algo_; }
protected:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册