Add some inference API comments for AnalysisPredictor (#23242)

* add inference api doc. test=develop

Add some inference API comments for AnalysisPredictor (#23242)
* add inference api doc. test=develop
0129f4b5 · Wilber · GitHub · c8f9e66b · 0129f4b5 · 0129f4b5
2 changed file
--- a/paddle/fluid/inference/api/analysis_predictor.h
+++ b/paddle/fluid/inference/api/analysis_predictor.h
@@ -30,6 +30,18 @@
 #include <gtest/gtest.h>
 #include <gtest/gtest_prod.h>
 #endif
+
+///
+/// \file analysis_predictor.h
+///
+/// \brief Compared to NativePredictor, AnalysisPredictor is a high-performance
+/// predictor that includes many optimizations
+///
+/// \author paddle-infer@baidu.com
+/// \date 2020-01-01
+/// \since 1.7.0
+///
+
 namespace paddle {

 using inference::analysis::Argument;
@@ -37,95 +49,298 @@ using inference::analysis::Analyzer;
 using framework::proto::ProgramDesc;
 using framework::NaiveExecutor;

-/** \brief This predictor is based on the original native predictor with IR and
- * Analysis support.
- *
- * It will optimize IR and Parameters in the runtime.
- *
- * TODO(Superjomn) Replace the Navive predictor?
- */
+///
+/// \class AnalysisPredictor
+///
+/// \brief The analysis predictor is based on the original native predictor with
+/// IR and Analysis support. It will optimize IR and Parameters in the runtime.
+///
+/// The predictor has the following typical uses:
+///
+/// Get predictor
+/// \code{cpp}
+///   auto predictor = CreatePaddlePredictor(config);
+/// \endcode
+///
+/// Get input or output names
+/// \code{cpp}
+///   auto input_names = predictor->GetInputNames();
+///   auto output_names = predictor->GetOutputNames();
+/// \endcode
+///
+/// Get input or output tensors
+/// \code{cpp}
+///   auto input_t = predictor->GetInputTensor(input_names[0]);
+///   auto output_t = predictor->GetOutputTensor(output_names[0]);
+/// \endcode
+///
+/// Run predictor
+/// \code{cpp}
+///   predictor->ZeroCopyRun();
+/// \endcode
+///
 class AnalysisPredictor : public PaddlePredictor {
 public:
+  ///
+  /// \brief Construct a new Analysis Predictor object
+  ///
+  /// \param[in] AnalysisConfig config
+  ///
  explicit AnalysisPredictor(const AnalysisConfig &config) : config_(config) {
    predictor_id_ = inference::GetUniqueId();
  }
+  ///
+  /// \brief Destroy the Analysis Predictor object
+  ///
  ~AnalysisPredictor();

+  ///
+  /// \brief Initialize predictor
+  ///
+  /// Initializing predictor mainly includes the following tasks:
+  /// preparing scope, creating executor, preparing program, initializing the
+  /// variables required by the executor, getting the feed_target_names and
+  /// fetch_target_names, etc.
+  ///
+  /// \param[in] parent_scope parent scope
+  /// \param[in] program program
+  /// \return Whether the init function executed successfully
+  ///
  bool Init(const std::shared_ptr<framework::Scope> &parent_scope,
            const std::shared_ptr<framework::ProgramDesc> &program = nullptr);

+  ///
+  /// \brief Run the prediction engine. Deprecated. Please refer to ZeroCopyRun
+  ///
+  /// \param[in] inputs input tensors
+  /// \param[out] output_data output tensors
+  /// \param[in] batch_size data's batch size
+  /// \return Whether the function executed successfully
+  ///
  bool Run(const std::vector<PaddleTensor> &inputs,
           std::vector<PaddleTensor> *output_data,
           int batch_size = -1) override;

+  ///
+  /// \brief Get the input names
+  ///
+  /// \return input names
+  ///
  std::vector<std::string> GetInputNames();
+  ///
+  /// \brief Get the output names
+  ///
+  /// \return output names
+  ///
  std::vector<std::string> GetOutputNames();

+  ///
+  /// \brief Get the Input Tensor object
+  ///
+  /// \param[in] name input name
+  /// \return input tensor
+  ///
  std::unique_ptr<ZeroCopyTensor> GetInputTensor(
      const std::string &name) override;
+  ///
+  /// \brief Get the Output Tensor object
+  ///
+  /// \param[in] name otuput name
+  /// \return output tensor
+  ///
  std::unique_ptr<ZeroCopyTensor> GetOutputTensor(
      const std::string &name) override;
-
+  ///
+  /// \brief Get all input names and their corresponding shapes
+  ///
+  /// \return the map of input names and shapes
+  ///
  std::map<std::string, std::vector<int64_t>> GetInputTensorShape() override;

+  ///
+  /// \brief Run the prediction engine
+  ///
+  /// \return Whether the function executed successfully
+  ///
  bool ZeroCopyRun() override;

+  ///
+  /// \brief Create feed fetch variables
+  ///
+  /// \param[in] scope Scope needed to create variables
+  ///
  void CreateFeedFetchVar(framework::Scope *scope);
+  ///
+  /// \brief Determine the model's inputs and outputs based on the program's
+  /// feed fetch op
+  ///
  void PrepareFeedFetch();

+  ///
+  /// \brief Set predictor's argument according to config, which mainly includes
+  /// execution information and graph optimization related pass information
+  ///
  void PrepareArgument();
+  ///
+  /// \brief According to argument information, execute the relevant pass
+  /// to get the optimized model program
+  ///
  void OptimizeInferenceProgram();

+  ///
+  /// \brief Get the argument used by predictor
+  ///
+  /// \return the argument obtained by config
+  ///
  Argument &analysis_argument() { return argument_; }
-
+  ///
+  /// \brief Clone to get the new predictor. thread safe.
+  ///
+  /// \return get a new predictor
+  ///
  std::unique_ptr<PaddlePredictor> Clone() override;
-
+  ///
+  /// \brief Get the scope used by predictor
+  ///
+  /// \return scope
+  ///
  framework::Scope *scope() { return scope_.get(); }
+  ///
+  /// \brief Get the inference program
+  ///
+  /// \return the inference program
+  ///
  framework::ProgramDesc &program() { return *inference_program_; }

+  ///
+  /// \brief Get the serialized program
+  ///
+  /// \return the serialized program
+  ///
  std::string GetSerializedProgram() const override;

+  ///
+  /// \brief Initialize mkldnn quantizer and execute mkldnn quantization pass
+  ///
+  /// \return Whether the function executed successfully
+  ///
  bool MkldnnQuantize();

-  // save program to  model
-  // save parameters to params
+  ///
+  /// \brief save program to model and save parameters to params
+  ///
+  /// \param[in] dir path to save the model
+  ///
  void SaveOptimModel(const std::string &dir);

 protected:
+  ///
+  /// \brief Prepare predictor's required programs, including loading model
+  /// information, graph optimization, and executor creation variables, etc.
+  ///
+  /// \param[in] program paddle program
+  /// \return Whether the function executed successfully
+  ///
  bool PrepareProgram(const std::shared_ptr<framework::ProgramDesc> &program);
+  ///
+  /// \brief Prepare scope environment, each predictor has its own scope
+  ///
+  /// \param[in] parent_scope The scope of the predictor to be cloned, or null
+  /// \return Whether the function executed successfully
+  ///
  bool PrepareScope(const std::shared_ptr<framework::Scope> &parent_scope);
+  ///
+  /// \brief Create an Executor object
+  ///
+  /// \return Whether the function executed successfully
+  ///
  bool CreateExecutor();
+  ///
+  /// \brief According to the model's program, the executor creates ops
+  ///
+  /// \return Whether the function executed successfully
+  ///
  bool PrepareExecutor();

+  ///
+  /// \brief Load model program.
+  ///
+  /// \return Whether the function executed successfully
+  ///
  bool LoadProgramDesc();
+  ///
+  /// \brief Load model parameters.
+  ///
+  /// \return Whether the function executed successfully
+  ///
  bool LoadParameters();

+  ///
+  /// \brief Prepare input data, only used in Run()
+  ///
+  /// \param[in] input_datas inpute tensors
+  /// \param[in] scope the scope used by predictor
+  /// \return Whether the function executed successfully
+  ///
  bool SetFeed(const std::vector<PaddleTensor> &input_datas,
               framework::Scope *scope);
+  ///
+  /// \brief Get the output data, only used in Run()
+  ///
+  /// \param[out] output_data output tensors
+  /// \param[in] scope the scope used by predictor
+  /// \return Whether the function executed successfully
+  ///
  bool GetFetch(std::vector<PaddleTensor> *output_data,
                framework::Scope *scope);
+  ///
+  /// \brief Get the output data, only used in GetFetch()
+  ///
+  /// \param[in] tensor for fetch op
+  /// \param[out] output_data output tensor
+  ///
  template <typename T>
  void GetFetchOne(const framework::LoDTensor &fetchs,
                   PaddleTensor *output_data);
-  // PreSet and PostReset for Mkldnn multi-thread and dynamic shape input.
-  // Used in AnalysisPredictor::Run(), do not support
-  // AnalysisPredictor::ZeroRun() now.
+  ///
+  /// \brief PreSet for Mkldnn multi-thread and dynamic shape input.
+  ///
+  /// Used in AnalysisPredictor::Run(), do not support
+  /// AnalysisPredictor::ZeroCopyRun() now.
+  ///
+  /// \param[in] inputs tensors
+  ///
  void MkldnnPreSet(const std::vector<PaddleTensor> &inputs);
+  ///
+  /// \brief PostReset for Mkldnn multi-thread and dynamic shape input.
+  ///
+  /// Used in AnalysisPredictor::Run(), do not support
+  /// AnalysisPredictor::ZeroCopyRun() now.
+  ///
  void MkldnnPostReset();
+  ///
+  /// \brief Compute compatibility based on model version information and
+  /// operator version information
+  ///
+  /// \return Compatible information
+  ///
  bool CheckOperatorCompatible();

 #if PADDLE_WITH_TENSORRT
-  // When we use Paddle-TRT INT8 engine, we need to generate calibration table
-  // data first,
-  // the calibration table contains the range for each op's input and output,
-  // this whole process can be divided into several steps:
-  //
-  // 1. Builds a 32-bit engine, runs it on the calibration set, and records a
-  // histogram for each
-  // tensor of the distribution of activation values.
-  // 2. Builds a calibration table from the histograms.
-  //
-  // After step 2, we need to store the calibration table on disk
+  ///
+  /// \brief save calibration table
+  ///
+  /// When we use Paddle-TRT INT8 engine, we need to generate calibration table
+  /// data first,
+  /// the calibration table contains the range for each op's input and output,
+  /// this whole process can be divided into several steps:
+  /// 1. Builds a 32-bit engine, runs it on the calibration set, and records a
+  ///  histogram for each tensor of the distribution of activation values.
+  /// 2. Builds a calibration table from the histograms.
+  /// After step 2, we need to store the calibration table on disk.
+  ///
+  /// \return Whether the function executed successfully
+  ///
  bool SaveTrtCalibToDisk();
 #endif


--- a/paddle/fluid/inference/api/paddle_mkldnn_quantizer_config.h
+++ b/paddle/fluid/inference/api/paddle_mkldnn_quantizer_config.h
@@ -11,6 +11,17 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+
+///
+/// \file paddle_mkldnn_quantizer_config.h
+///
+/// \brief Mkldnn quantizer config.
+///
+/// \author paddle-infer@baidu.com
+/// \date 2020-01-01
+/// \since 1.7.0
+///
+
 #pragma once

 #include <cassert>
@@ -24,75 +35,155 @@

 namespace paddle {

-// Algorithms for finding scale of quantized Tensors.
+///
+/// \brief Algorithms for finding scale of quantized Tensors.
+///
 enum class ScaleAlgo {
-  NONE,      // Do not compute scale
-  MAX,       // Find scale based on the max absolute value
-  MAX_CH,    // Find scale based on the max absolute value per output channel
-  MAX_CH_T,  // Find scale based on the max absolute value per output channel
-             // of a transposed tensor
-  KL,        // Find scale based on KL Divergence
+  NONE,      ///< Do not compute scale
+  MAX,       ///< Find scale based on the max absolute value
+  MAX_CH,    ///< Find scale based on the max absolute value per output channel
+  MAX_CH_T,  ///< Find scale based on the max absolute value per output channel
+             ///< of a transposed tensor
+  KL,        ///< Find scale based on KL Divergence
 };

+///
+/// \class MkldnnQuantizerConfig
+///
+/// \brief Config for mkldnn quantize.
+///
+/// The MkldnnQuantizerConfig is used to configure Mkldnn's quantization
+/// parameters, including scale algorithm, warmup data, warmup batch size,
+/// quantized op list, etc.
+///
+/// It is not recommended to use this config directly, please refer to
+/// AnalysisConfig::mkldnn_quantizer_config()
+///
 struct MkldnnQuantizerConfig {
+  ///
+  /// \brief Construct a new Mkldnn Quantizer Config object
+  ///
  MkldnnQuantizerConfig();

-  /** Specify a quantization algorithm for a connection (input/output) of the
-   * operator type.
-   * @param op_type_name the operator's name.
-   * @param conn_name name of the connection (input/output) of the operator.
-   * @param algo the algorithm for computing scale.
-   */
+  ///
+  /// \brief Set the scale algo
+  ///
+  /// Specify a quantization algorithm for a connection (input/output) of the
+  /// operator type.
+  /// \param[in] op_type_name the operator's name.
+  /// \param[in] conn_name name of the connection (input/output) of the
+  /// operator.
+  /// \param[in] algo the algorithm for computing scale.
+  ///
  void SetScaleAlgo(std::string op_type_name, std::string conn_name,
                    ScaleAlgo algo) {
    rules_[op_type_name][conn_name] = algo;
  }

-  /** Get the quantization algorithm for a connection (input/output) of the
-   * operator type.
-   * @param op_type_name the operator's name.
-   * @param conn_name name of the connection (input/output) of the operator.
-   * @return the algorithm for computing scale.
-   */
+  ///
+  /// \brief Get the scale algo
+  ///
+  /// Get the quantization algorithm for a connection (input/output) of the
+  /// operator type.
+  ///
+  /// \param[in] op_type_name the operator's name.
+  /// \param[in] conn_name name of the connection (input/output) of the
+  /// operator.
+  /// \return the scale algo.
+  ///
  ScaleAlgo scale_algo(const std::string& op_type_name,
                       const std::string& conn_name) const;

-  /** Set the batch of data to be used for warm-up iteration.
-   * @param data batch of data.
-   */
+  ///
+  /// \brief Set the warmup data
+  ///
+  /// Set the batch of data to be used for warm-up iteration.
+  ///
+  /// \param[in] data batch of data.
+  ///
  void SetWarmupData(std::shared_ptr<std::vector<PaddleTensor>> data) {
    warmup_data_ = data;
  }

-  /** Get the batch of data used for warm-up iteration.
-   * @return batch of data.
-   */
+  ///
+  /// \brief Get the warmup data
+  ///
+  /// Get the batch of data used for warm-up iteration.
+  ///
+  /// \return the warm up data
+  ///
  std::shared_ptr<std::vector<PaddleTensor>> warmup_data() const {
    return warmup_data_;
  }

+  ///
+  /// \brief Set the warmup batch size
+  ///
+  /// Set the batch size for warm-up iteration.
+  ///
+  /// \param[in] batch_size warm-up batch size
+  ///
  void SetWarmupBatchSize(int batch_size) { warmup_bs_ = batch_size; }

+  ///
+  /// \brief Get the warmup batch size
+  ///
+  /// Get the batch size for warm-up iteration.
+  ///
+  /// \return the warm up batch size
  int warmup_batch_size() const { return warmup_bs_; }

+  ///
+  /// \brief Set quantized op list
+  ///
+  /// In the quantization process, set the op list that supports quantization
+  ///
+  /// \param[in] op_list List of quantized ops
+  ///
  void SetEnabledOpTypes(std::unordered_set<std::string> op_list) {
    enabled_op_types_ = op_list;
  }

+  ///
+  /// \brief Get quantized op list
+  ///
+  /// \return list of quantized ops
+  ///
  const std::unordered_set<std::string>& enabled_op_types() const {
    return enabled_op_types_;
  }

+  ///
+  /// \brief Set the excluded op ids
+  ///
+  /// \param[in] op_ids_list excluded op ids
+  ///
  void SetExcludedOpIds(std::unordered_set<int> op_ids_list) {
    excluded_op_ids_ = op_ids_list;
  }

+  ///
+  /// \brief Get the excluded op ids
+  ///
+  /// \return exclude op ids
+  ///
  const std::unordered_set<int>& excluded_op_ids() const {
    return excluded_op_ids_;
  }

+  ///
+  /// \brief Set default scale algorithm
+  ///
+  /// \param[in] algo Method for calculating scale in quantization process
+  ///
  void SetDefaultScaleAlgo(ScaleAlgo algo) { default_scale_algo_ = algo; }

+  ///
+  /// \brief Get default scale algorithm
+  ///
+  /// \return Method for calculating scale in quantization
+  /// process
+  ///
  ScaleAlgo default_scale_algo() const { return default_scale_algo_; }

 protected: