add full paddle_analysis_config.h APIs. (#23215)

7e439780 · Pei Yang · GitHub · 17babe4d · 7e439780
隐藏空白更改
内联并排

Showing with 287 addition and 146 deletion

paddle/fluid/inference/api/paddle_analysis_config.h paddle/fluid/inference/api/paddle_analysis_config.h +287 -146

未找到文件。
--- a/paddle/fluid/inference/api/paddle_analysis_config.h
+++ b/paddle/fluid/inference/api/paddle_analysis_config.h
@@ -48,35 +48,35 @@ class AnalysisPredictor;
 struct MkldnnQuantizerConfig;

 ///
-/// \brief configuration manager for `AnalysisPredictor`.
+/// \brief configuration manager for AnalysisPredictor.
 /// \since 1.7.0
 ///
-/// `AnalysisConfig` manages configurations of `AnalysisPredictor`.
+/// AnalysisConfig manages configurations of AnalysisPredictor.
 /// During inference procedure, there are many parameters(model/params path,
 /// place of inference, etc.)
 /// to be specified, and various optimizations(subgraph fusion, memory
 /// optimazation, TensorRT engine, etc.)
 /// to be done. Users can manage these settings by creating and modifying an
-/// `AnalysisConfig`,
-/// and loading it into `AnalysisPredictor`.
+/// AnalysisConfig,
+/// and loading it into AnalysisPredictor.
 ///
 struct AnalysisConfig {
  AnalysisConfig() = default;
  ///
-  /// \brief Construct a new `AnalysisConfig` from another
-  /// `AnalysisConfig`.
+  /// \brief Construct a new AnalysisConfig from another
+  /// AnalysisConfig.
  ///
-  /// \param[in] other another `AnalysisConfig`
+  /// \param[in] other another AnalysisConfig
  ///
  explicit AnalysisConfig(const AnalysisConfig& other);
  ///
-  /// \brief Construct a new `AnalysisConfig` from a no-combined model.
+  /// \brief Construct a new AnalysisConfig from a no-combined model.
  ///
  /// \param[in] model_dir model directory of the no-combined model.
  ///
  explicit AnalysisConfig(const std::string& model_dir);
  ///
-  /// \brief Construct a new `AnalysisConfig` from a combined model.
+  /// \brief Construct a new AnalysisConfig from a combined model.
  ///
  /// \param[in] prog_file model file path of the combined model.
  /// \param[in] params_file params file path of the combined model.
@@ -129,233 +129,374 @@ struct AnalysisConfig {
  void SetOptimCacheDir(const std::string& opt_cache_dir) {
    opt_cache_dir_ = opt_cache_dir;
  }
-  /** Get the model directory path.
-   */
+  ///
+  /// \brief Get the model directory path.
+  ///
+  /// \return const std::string& The model directory path.
+  ///
  const std::string& model_dir() const { return model_dir_; }
-  /** Get the program file path.
-   */
+  ///
+  /// \brief Get the program file path.
+  ///
+  /// \return const std::string& The program file path.
+  ///
  const std::string& prog_file() const { return prog_file_; }
-  /** Get the composed parameters file.
-   */
+  ///
+  /// \brief Get the combined parameters file.
+  ///
+  /// \return const std::string& The combined parameters file.
+  ///
  const std::string& params_file() const { return params_file_; }

  // Padding related.
-  /** Turn off Padding.
- */
+
+  ///
+  /// \brief Turn off FC Padding.
+  ///
+  ///
  void DisableFCPadding();
-  /** A bool state telling whether padding is turned on.
-   */
+  ///
+  /// \brief A boolean state telling whether fc padding is used.
+  ///
+  /// \return bool Whether fc padding is used.
+  ///
  bool use_fc_padding() const { return use_fc_padding_; }

  // GPU related.

-  /**
-   * \brief Turn on GPU.
-   * @param memory_pool_init_size_mb initial size of the GPU memory pool in MB.
-   * @param device_id the GPU card to use (default is 0).
-   */
+  ///
+  /// \brief Turn on GPU.
+  ///
+  /// \param memory_pool_init_size_mb initial size of the GPU memory pool in MB.
+  /// \param device_id device_id the GPU card to use (default is 0).
+  ///
  void EnableUseGpu(uint64_t memory_pool_init_size_mb, int device_id = 0);
-  /** Turn off the GPU.
-   */
+  ///
+  /// \brief Turn off GPU.
+  ///
+  ///
  void DisableGpu();
-  /** A bool state telling whether the GPU is turned on.
-   */
+  ///
+  /// \brief A boolean state telling whether the GPU is turned on.
+  ///
+  /// \return bool Whether the GPU is turned on.
+  ///
  bool use_gpu() const { return use_gpu_; }
-  /** Get the GPU device id.
-   */
+  ///
+  /// \brief Get the GPU device id.
+  ///
+  /// \return int The GPU device id.
+  ///
  int gpu_device_id() const { return device_id_; }
-  /** Get the initial size in MB of the GPU memory pool.
-   */
+  ///
+  /// \brief Get the initial size in MB of the GPU memory pool.
+  ///
+  /// \return int The initial size in MB of the GPU memory pool.
+  ///
  int memory_pool_init_size_mb() const { return memory_pool_init_size_mb_; }
-  /** Get the proportion of the initial memory pool size compared to the device.
-   */
+  ///
+  /// \brief Get the proportion of the initial memory pool size compared to the
+  /// device.
+  ///
+  /// \return float The proportion of the initial memory pool size.
+  ///
  float fraction_of_gpu_memory_for_pool() const;

-  /** Turn on CUDNN
-   */
+  // CUDNN related.
+  ///
+  /// \brief Turn on CUDNN.
+  ///
+  ///
  void EnableCUDNN();
-  /** A boolean state telling whether to use cuDNN.
-   */
+  ///
+  /// \brief A boolean state telling whether to use CUDNN.
+  ///
+  /// \return bool Whether to use CUDNN.
+  ///
  bool cudnn_enabled() const { return use_cudnn_; }

-  /** \brief Control whether to perform IR graph optimization.
-   *
-   * If turned off, the AnalysisConfig will act just like a NativeConfig.
-   */
+  ///
+  /// \brief Control whether to perform IR graph optimization.
+  /// If turned off, the AnalysisConfig will act just like a NativeConfig.
+  ///
+  /// \param x Whether the ir graph optimization is actived.
+  ///
  void SwitchIrOptim(int x = true) { enable_ir_optim_ = x; }
-  /** A boolean state tell whether the ir graph optimization is actived.
-   */
+  ///
+  /// \brief A boolean state telling whether the ir graph optimization is
+  /// actived.
+  ///
+  /// \return bool Whether to use ir graph optimization.
+  ///
  bool ir_optim() const { return enable_ir_optim_; }

-  /** \brief INTERNAL Determine whether to use the feed and fetch operators.
-   * Just for internal development, not stable yet.
-   * When ZeroCopyTensor is used, this should turned off.
-   */
+  ///
+  /// \brief INTERNAL Determine whether to use the feed and fetch operators.
+  /// Just for internal development, not stable yet.
+  /// When ZeroCopyTensor is used, this should be turned off.
+  ///
+  /// \param x Whether to use the feed and fetch operators.
+  ///
  void SwitchUseFeedFetchOps(int x = true) { use_feed_fetch_ops_ = x; }
-  /** A boolean state telling whether to use the feed and fetch operators.
-   */
+  ///
+  /// \brief A boolean state telling whether to use the feed and fetch
+  /// operators.
+  ///
+  /// \return bool Whether to use the feed and fetch operators.
+  ///
  bool use_feed_fetch_ops_enabled() const { return use_feed_fetch_ops_; }

-  /** \brief Control whether to specify the inputs' names.
-   *
-   * The PaddleTensor type has a `name` member, assign it with the corresponding
-   * variable name. This is used only when the input PaddleTensors passed to the
-   * `PaddlePredictor.Run(...)` cannot follow the order in the training phase.
-   */
+  ///
+  /// \brief Control whether to specify the inputs' names.
+  /// The ZeroCopyTensor type has a name member, assign it with the
+  /// corresponding
+  /// variable name. This is used only when the input ZeroCopyTensors passed to
+  /// the
+  /// AnalysisPredictor.ZeroCopyRun() cannot follow the order in the training
+  /// phase.
+  ///
+  /// \param x Whether to specify the inputs' names.
+  ///
  void SwitchSpecifyInputNames(bool x = true) { specify_input_name_ = x; }
-
-  /** A boolean state tell whether the input PaddleTensor names specified should
-   * be used to reorder the inputs in `PaddlePredictor.Run(...)`.
-   */
+  ///
+  /// \brief A boolean state tell whether the input ZeroCopyTensor names
+  /// specified should
+  /// be used to reorder the inputs in AnalysisPredictor.ZeroCopyRun().
+  ///
+  /// \return bool Whether to specify the inputs' names.
+  ///
  bool specify_input_name() const { return specify_input_name_; }

-  /**
-   * \brief Turn on the TensorRT engine.
-   *
-   * The TensorRT engine will accelerate some subgraphes in the original Fluid
-   * computation graph. In some models such as TensorRT50, GoogleNet and so on,
-   * it gains significant performance acceleration.
-   *
-   * @param workspace_size the memory size(in byte) used for TensorRT workspace.
-   * @param max_batch_size the maximum batch size of this prediction task,
-   * better set as small as possible, or performance loss.
-   * @param min_subgrpah_size the minimum TensorRT subgraph size needed, if a
-   * subgraph is less than this, it will not transfer to TensorRT engine.
-   */
+  ///
+  /// \brief Turn on the TensorRT engine.
+  /// The TensorRT engine will accelerate some subgraphes in the original Fluid
+  /// computation graph. In some models such as resnet50, GoogleNet and so on,
+  /// it gains significant performance acceleration.
+  ///
+  /// \param workspace_size The memory size(in byte) used for TensorRT
+  /// workspace.
+  /// \param max_batch_size The maximum batch size of this prediction task,
+  /// better set as small as possible for less performance loss.
+  /// \param min_subgrpah_size The minimum TensorRT subgraph size needed, if a
+  /// subgraph is smaller than this, it will not be transferred to TensorRT
+  /// engine.
+  /// \param precision The precision used in TensorRT.
+  /// \param use_static Serialize optimization information to disk for reusing.
+  /// \param use_calib_mode Use TRT int8 calibration(post training
+  /// quantization).
+  ///
+  ///
  void EnableTensorRtEngine(int workspace_size = 1 << 20,
                            int max_batch_size = 1, int min_subgraph_size = 3,
                            Precision precision = Precision::kFloat32,
                            bool use_static = false,
                            bool use_calib_mode = true);
-
-  /** A boolean state telling whether the TensorRT engine is used.
-   */
+  ///
+  /// \brief A boolean state telling whether the TensorRT engine is used.
+  ///
+  /// \return bool Whether the TensorRT engine is used.
+  ///
  bool tensorrt_engine_enabled() const { return use_tensorrt_; }
-  /**
-   *  \brief Set min, max, opt shape for TensorRT Dynamic shape mode.
-   *  @param min_input_shape the min input shape of the subgraph input
-   *  @param max_input_shape the max input shape of the subgraph input
-   *  @param opt_input_shape the opt input shape of the subgraph input
-   *  @param disable_trt_plugin_fp16, setting this variable to true
-   *  means that TRT plugin will not run fp16
-   */
+  ///
+  /// \brief Set min, max, opt shape for TensorRT Dynamic shape mode.
+  /// \param min_input_shape The min input shape of the subgraph input.
+  /// \param max_input_shape The max input shape of the subgraph input.
+  /// \param opt_input_shape The opt input shape of the subgraph input.
+  /// \param disable_trt_plugin_fp16 Setting this parameter to true means that
+  /// TRT plugin will not run fp16.
+  ///
  void SetTRTDynamicShapeInfo(
      std::map<std::string, std::vector<int>> min_input_shape,
      std::map<std::string, std::vector<int>> max_input_shape,
      std::map<std::string, std::vector<int>> optim_input_shape,
      bool disable_trt_plugin_fp16 = false);
-
-  /**
-   *  \brief Turn on the usage of Lite sub-graph engine.
-   */
+  ///
+  /// \brief Turn on the usage of Lite sub-graph engine.
+  ///
+  /// \param precision_mode Precion used in Lite sub-graph engine.
+  /// \param passes_filter Set the passes used in Lite sub-graph engine.
+  /// \param ops_filter Operators not supported by Lite.
+  ///
  void EnableLiteEngine(
      AnalysisConfig::Precision precision_mode = Precision::kFloat32,
      const std::vector<std::string>& passes_filter = {},
      const std::vector<std::string>& ops_filter = {});

-  /** A boolean state indicating whether the Lite sub-graph engine is used.
-  */
+  ///
+  /// \brief A boolean state indicating whether the Lite sub-graph engine is
+  /// used.
+  ///
+  /// \return bool whether the Lite sub-graph engine is used.
+  ///
  bool lite_engine_enabled() const { return use_lite_; }

-  /** \brief Control whether to debug IR graph analysis phase.
-   *
-   * This will generate DOT files for visualizing the computation graph after
-   * each analysis pass applied.
-   */
+  ///
+  /// \brief Control whether to debug IR graph analysis phase.
+  /// This will generate DOT files for visualizing the computation graph after
+  /// each analysis pass applied.
+  ///
+  /// \param x whether to debug IR graph analysis phase.
+  ///
  void SwitchIrDebug(int x = true);

-  /** Turn on NGRAPH.
-   */
+  ///
+  /// \brief Turn on NGRAPH.
+  ///
+  ///
  void EnableNgraph();
-  /** A boolean state telling whether to use the NGRAPH.
-   */
+  ///
+  /// \brief A boolean state telling whether to use the NGRAPH.
+  ///
+  /// \return bool Whether to use the NGRAPH.
+  ///
  bool ngraph_enabled() const { return use_ngraph_; }

-  /** Turn on MKLDNN.
-   */
+  ///
+  /// \brief Turn on MKLDNN.
+  ///
+  ///
  void EnableMKLDNN();
-  /** set the cache capacity of different input shapes for MKLDNN.
-   *  Default 0 means don't cache any shape.
-   */
+  ///
+  /// \brief Set the cache capacity of different input shapes for MKLDNN.
+  /// Default value 0 means not caching any shape.
+  ///
+  /// \param capacity The cache capacity.
+  ///
  void SetMkldnnCacheCapacity(int capacity);
-  /** A boolean state telling whether to use the MKLDNN.
-   */
+  ///
+  /// \brief A boolean state telling whether to use the MKLDNN.
+  ///
+  /// \return bool Whether to use the MKLDNN.
+  ///
  bool mkldnn_enabled() const { return use_mkldnn_; }

-  /** Set and get the number of cpu math library threads.
-   */
+  ///
+  /// \brief Set the number of cpu math library threads.
+  ///
+  /// \param cpu_math_library_num_threads The number of cpu math library
+  /// threads.
+  ///
  void SetCpuMathLibraryNumThreads(int cpu_math_library_num_threads);
-  /** An int state telling how many threads are used in the CPU math library.
-   */
+  ///
+  /// \brief An int state telling how many threads are used in the CPU math
+  /// library.
+  ///
+  /// \return int The number of threads used in the CPU math library.
+  ///
  int cpu_math_library_num_threads() const {
    return cpu_math_library_num_threads_;
  }

-  /** Transform the AnalysisConfig to NativeConfig.
-   */
+  ///
+  /// \brief Transform the AnalysisConfig to NativeConfig.
+  ///
+  /// \return NativeConfig The NativeConfig transformed.
+  ///
  NativeConfig ToNativeConfig() const;
-  /** Specify the operator type list to use MKLDNN acceleration.
-   * @param op_list the operator type list.
-   */
+  ///
+  /// \brief Specify the operator type list to use MKLDNN acceleration.
+  ///
+  /// \param op_list The operator type list.
+  ///
  void SetMKLDNNOp(std::unordered_set<std::string> op_list) {
    mkldnn_enabled_op_types_ = op_list;
  }

-  /** Turn on quantization.
-   */
+  ///
+  /// \brief Turn on MKLDNN quantization.
+  ///
+  ///
  void EnableMkldnnQuantizer();

-  /** A boolean state telling whether the quantization is enabled.
-  */
+  ///
+  /// \brief A boolean state telling whether the MKLDNN quantization is enabled.
+  ///
+  /// \return bool Whether the MKLDNN quantization is enabled.
+  ///
  bool mkldnn_quantizer_enabled() const { return use_mkldnn_quantizer_; }

+  ///
+  /// \brief Get MKLDNN quantizer config.
+  ///
+  /// \return MkldnnQuantizerConfig* MKLDNN quantizer config.
+  ///
  MkldnnQuantizerConfig* mkldnn_quantizer_config() const;

-  /** Specify the memory buffer of program and parameter
-   * @param prog_buffer the memory buffer of program.
-   * @param prog_buffer_size the size of the data.
-   * @param params_buffer the memory buffer of the composed parameters file.
-   * @param params_buffer_size the size of the commposed parameters data.
-   */
+  ///
+  /// \brief Specify the memory buffer of program and parameter.
+  /// Used when model and params are loaded directly from memory.
+  ///
+  /// \param prog_buffer The memory buffer of program.
+  /// \param prog_buffer_size The size of the model data.
+  /// \param params_buffer The memory buffer of the combined parameters file.
+  /// \param params_buffer_size The size of the combined parameters data.
+  ///
  void SetModelBuffer(const char* prog_buffer, size_t prog_buffer_size,
                      const char* params_buffer, size_t params_buffer_size);
-  /** A boolean state telling whether the model is set from the CPU memory.
-   */
+  ///
+  /// \brief A boolean state telling whether the model is set from the CPU
+  /// memory.
+  ///
+  /// \return bool Whether model and params are loaded directly from memory.
+  ///
  bool model_from_memory() const { return model_from_memory_; }

-  /** Turn on memory optimize
-   * NOTE still in development, will release latter.
-   */
+  ///
+  /// \brief Turn on memory optimize
+  /// NOTE still in development.
+  ///
  void EnableMemoryOptim();
-  /** Tell whether the memory optimization is activated. */
+  ///
+  /// \brief A boolean state telling whether the memory optimization is
+  /// activated.
+  ///
+  /// \return bool Whether the memory optimization is activated.
+  ///
  bool enable_memory_optim() const;

-  /** \brief Turn on profiling report.
-   *
-   * If not turned on, no profiling report will be generateed.
-   */
+  ///
+  /// \brief Turn on profiling report.
+  /// If not turned on, no profiling report will be generated.
+  ///
  void EnableProfile();
-  /** A boolean state telling whether the profiler is activated.
-   */
+  ///
+  /// \brief A boolean state telling whether the profiler is activated.
+  ///
+  /// \return bool Whether the profiler is activated.
+  ///
  bool profile_enabled() const { return with_profile_; }

-  /** \brief Disable GLOG information output for security.
-   *
-   * If called, no LOG(INFO) logs will be generated.
-   */
+  ///
+  /// \brief Mute all logs in Paddle inference.
+  ///
  void DisableGlogInfo();
-  /** A boolean state telling whether the GLOG info is disabled.
-   */
+  ///
+  /// \brief A boolean state telling whether logs in Paddle inference are muted.
+  ///
+  /// \return bool Whether logs in Paddle inference are muted.
+  ///
  bool glog_info_disabled() const { return !with_glog_info_; }

+  ///
+  /// \brief Set the AnalysisConfig to be invalid.
+  /// This is to ensure that an AnalysisConfig can only be used in one
+  /// AnalysisPredictor.
+  ///
  void SetInValid() const { is_valid_ = false; }
+  ///
+  /// \brief A boolean state telling whether the AnalysisConfig is valid.
+  ///
+  /// \return bool Whether the AnalysisConfig is valid.
+  ///
  bool is_valid() const { return is_valid_; }

  friend class ::paddle::AnalysisPredictor;

-  /** NOTE just for developer, not an official API, easily to be broken.
-   * Get a pass builder for customize the passes in IR analysis phase.
-   */
+  ///
+  /// \brief Get a pass builder for customize the passes in IR analysis phase.
+  /// NOTE: Just for developer, not an official API, easy to be broken.
+  ///
+  ///
  PassStrategy* pass_builder() const;
  void PartiallyRelease();