Enabled Eager Dygraph AutoCodeGen for 500+ existing ops (#37753)

* Handled dispensable tensors in AutoCodeGen for Eager Dygraph * Enabled Eager Dygraph AutoCodeGen for 500+ existing ops

Enabled Eager Dygraph AutoCodeGen for 500+ existing ops (#37753)
* Handled dispensable tensors in AutoCodeGen for Eager Dygraph * Enabled Eager Dygraph AutoCodeGen for 500+ existing ops
9ecb7461 · Zhanlue Yang · GitHub · 7094251b · 9ecb7461 · 9ecb7461
6 changed file
--- a/paddle/fluid/eager/auto_code_generator/CMakeLists.txt
+++ b/paddle/fluid/eager/auto_code_generator/CMakeLists.txt
@@ -47,12 +47,12 @@ if(WIN32)
    endif()

    add_custom_target(eager_codegen
-      COMMAND "${eager_generator_path}/eager_generator.exe" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated" 
+      COMMAND "${eager_generator_path}/eager_generator.exe" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/op_list.txt"
      DEPENDS ${EAGER_CODEGEN_DEPS}
      VERBATIM)
 else()
    add_custom_target(eager_codegen
-          COMMAND "${CMAKE_CURRENT_BINARY_DIR}/eager_generator" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated"
+          COMMAND "${CMAKE_CURRENT_BINARY_DIR}/eager_generator" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/generated/fluid_generated" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/op_list.txt"
          DEPENDS eager_generator
          VERBATIM)
 endif()
--- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc
+++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc
@@ -12,6 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+#include <gflags/gflags.h>
 #include <algorithm>
 #include <fstream>
 #include <iostream>
@@ -26,6 +27,9 @@
 #include "paddle/fluid/pybind/pybind.h"
 #include "paddle/fluid/string/string_helper.h"

+DEFINE_bool(generate_all, false,
+            "Generate all operators currently registered in Paddle");
+
 static std::unordered_set<std::string> operators_to_skip = {
    "fused_elemwise_add_activation",  // No Default Attr
    "fused_elemwise_activation",      // No Default Attr
@@ -40,12 +44,10 @@ static std::unordered_set<std::string> operators_to_skip = {
    "pull_box_sparse",
    "fused_attention",
    "diag_v2",
-};
-
-static std::unordered_set<std::string> operators_to_codegen = {
-    "sigmoid",      "matmul_v2",   "reduce_sum", "elementwise_add",
-    "share_buffer", "var_conv_2d", "split"};
+    "transfer_dtype",
+    "c_split"};

+static std::unordered_set<std::string> operators_to_codegen = {};
 static std::unordered_set<std::string> skipped_operators = {};

 namespace paddle {
@@ -353,7 +355,10 @@ static bool CheckOpProto(proto::OpProto* op_proto) {
  // Only handle matmul_v2 for now
  VLOG(1) << "------ Analyzing Op ------: " << op_type;

+  if (!FLAGS_generate_all) {
    if (!operators_to_codegen.count(op_type)) return false;
+  }
+
  if (operators_to_skip.count(op_type)) return false;

  return true;
@@ -976,7 +981,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
          paddle::string::Sprintf(FWD_NUM_ARG_TEMPLATE, outnum);
      dygraph_function_args_str += arg_str;
      const char* FWD_OUTS_CONTENT_TEMPLATE =
-          "{ \"%s\", egr::ConstructDuplicableOutput(%s) },";
+          "{ \"%s\", egr::EagerUtils::ConstructDuplicableOutput(%s) },";
      outs_contents_str += paddle::string::Sprintf(FWD_OUTS_CONTENT_TEMPLATE,
                                                   output_name, outnum);
    } else {
@@ -1253,7 +1258,7 @@ static std::string GenerateGradNodeCCContents(

      if (duplicable_input_name_set.count(fwd_input_name)) {
        const char* GRAD_OUTS_CONTENT_TEMPLATE =
-            "{ \"%s\", egr::ConstructDuplicableOutput( "
+            "{ \"%s\", egr::EagerUtils::ConstructDuplicableOutput( "
            "this->OutputMeta()[%d].Size() ) },";
        outs_contents_str += paddle::string::Sprintf(
            GRAD_OUTS_CONTENT_TEMPLATE, grad_output_name, fwd_input_position);
@@ -1639,13 +1644,30 @@ static void DygraphCodeGeneration(const std::string& output_dir) {
 }  // namespace framework
 }  // namespace paddle

+static void CollectOperatorsToCodeGen(const std::string& op_list_path) {
+  std::string line;
+  std::ifstream op_list_file(op_list_path);
+  if (op_list_file.is_open()) {
+    while (getline(op_list_file, line)) {
+      operators_to_codegen.insert(line);
+    }
+    op_list_file.close();
+  } else {
+    PADDLE_THROW(
+        paddle::platform::errors::Fatal("Unable to open op_list.txt file"));
+  }
+}
+
 int main(int argc, char* argv[]) {
-  if (argc != 2) {
+  if (argc != 3) {
    std::cerr << "argc must be 2" << std::endl;
    return -1;
  }

  std::string eager_root = argv[1];
+  std::string op_list_path = argv[2];
+
+  CollectOperatorsToCodeGen(op_list_path);
  paddle::framework::DygraphCodeGeneration(eager_root);

  return 0;

--- a/paddle/fluid/eager/auto_code_generator/op_list.txt
+++ b/paddle/fluid/eager/auto_code_generator/op_list.txt
@@ -2,3 +2,504 @@ sigmoid
 matmul_v2
 reduce_sum
 elementwise_add
+rsqrt
+multihead_matmul
+addmm
+gru
+round
+rank_attention
+fused_embedding_fc_lstm
+where_index
+bicubic_interp
+arg_min
+tile
+bilinear_tensor_product
+ctc_align
+pow2_decay_with_linear_warmup
+split
+fc
+clear_float_status
+load
+elementwise_max
+adadelta
+chunk_eval
+check_finite_and_unscale
+sparse_momentum
+tan
+adam
+fsp
+where
+logical_xor
+multiclass_nms3
+one_hot_v2
+sequence_softmax
+affine_channel
+triangular_solve
+sequence_topk_avg_pooling
+space_to_depth
+reverse
+fused_embedding_eltwise_layernorm
+expand_v2
+lgamma
+solve
+deformable_psroi_pooling
+instance_norm
+decode_jpeg
+gather_nd
+reduce_prod
+matrix_rank
+asin
+lstmp
+iou_similarity
+huber_loss
+one_hot
+sequence_slice
+lookup_table
+softplus
+depthwise_conv2d
+fused_fc_elementwise_layernorm
+sigmoid_cross_entropy_with_logits
+exp
+scatter
+equal_all
+searchsorted
+fusion_squared_mat_sub
+unique
+log
+conv_shift
+smooth_l1_loss
+linear_interp_v2
+momentum
+temporal_shift
+nce
+mv
+proximal_gd
+memcpy_h2d
+add_position_encoding
+cosh
+hash
+grad_add
+sign
+prelu
+linspace
+fill_diagonal
+logsigmoid
+load_combine
+fetch_v2
+randperm
+sequence_scatter
+partial_sum
+relu6
+conv3d
+lstm_unit
+not_equal
+transpose2
+uniform_random_batch_size_like
+unfold
+lrn
+softmax_with_cross_entropy
+isfinite_v2
+bernoulli
+max_pool3d_with_index
+gaussian_random
+flatten2
+matmul
+cvm
+adamax
+masked_select
+range
+bitwise_not
+trace
+multinomial
+modified_huber_loss
+roll
+squared_l2_distance
+conv3d_transpose
+share_data
+fake_quantize_abs_max
+unique_with_counts
+fill
+concat
+fill_zeros_like
+hierarchical_sigmoid
+isinf_v2
+squeeze
+multiclass_nms2
+bpr_loss
+fft_c2c
+bicubic_interp_v2
+reshape
+coalesce_tensor
+roi_align
+reshape2
+reduce_any
+unstack
+scatter_nd_add
+sequence_reshape
+bilateral_slice
+fill_any_like
+empty
+pad_constant_like
+pool2d
+size
+imag
+eigh
+stack
+dgc_momentum
+lamb
+generate_proposals_v2
+bitwise_or
+gru_unit
+fake_channel_wise_quantize_dequantize_abs_max
+sampling_id
+unsqueeze2
+average_accumulates
+sequence_enumerate
+fusion_seqconv_eltadd_relu
+bce_loss
+generate_proposal_labels
+im2sequence
+isinf
+adagrad
+linear_chain_crf
+retinanet_target_assign
+fusion_group
+teacher_student_sigmoid_loss
+random_crop
+lookup_table_v2
+detection_map
+l1_norm
+sqrt
+fused_elemwise_activation
+slogdeterminant
+share_buffer
+bitwise_and
+diag_embed
+unbind
+dropout
+moving_average_abs_max_scale
+beam_search
+log_loss
+greater_than
+kron
+sigmoid_focal_loss
+rmsprop
+conv2d
+uniform_random_inplace
+maxout
+linear_interp
+auc
+logical_or
+batch_norm
+acos
+unpool
+cumprod
+sample_logits
+pull_box_extended_sparse
+crop_tensor
+fill_constant
+deformable_conv
+generate_mask_labels
+locality_aware_nms
+expand_as
+matrix_power
+greater_equal
+generate_proposals
+bilinear_interp
+inplace_abn
+softshrink
+mul
+data_norm
+get_tensor_from_selected_rows
+spp
+floor
+gelu
+retinanet_detection_output
+minus
+push_dense
+silu
+sequence_erase
+real
+nearest_interp_v2
+dgc_clip_by_norm
+squeeze2
+strided_slice
+conj
+precision_recall
+save
+fusion_seqexpand_concat_fc
+fake_quantize_range_abs_max
+depthwise_conv2d_transpose
+positive_negative_pair
+square
+var_conv_2d
+log1p
+fused_softmax_mask_upper_triangle
+clip_by_norm
+atan2
+box_decoder_and_assign
+fft_r2c
+roi_pool
+overlap_add
+fill_constant_batch_size_like
+fill_any
+dequantize_log
+max_pool2d_with_index
+pad3d
+norm
+viterbi_decode
+mish
+box_coder
+flatten
+elementwise_mod
+margin_cross_entropy
+pull_sparse
+logical_and
+pow
+stanh
+label_smooth
+merged_momentum
+ascend_trigger
+fused_feedforward
+rpn_target_assign
+roi_perspective_transform
+expand
+prroi_pool
+pool3d
+memcpy
+distribute_fpn_proposals
+frame
+bincount
+shape
+group_norm
+resnet_unit
+sequence_expand_as
+cos_sim
+eigvals
+save_combine
+class_center_sample
+read_file
+isfinite
+arg_max
+equal
+fake_dequantize_max_abs
+qr
+anchor_generator
+layer_norm
+merge_selected_rows
+less_equal
+rnn
+fusion_lstm
+lars_momentum
+hard_sigmoid
+isnan
+elementwise_floordiv
+correlation
+histogram
+gather_tree
+segment_pool
+sync_batch_norm
+fusion_repeated_fc_relu
+nop
+fused_attention
+expand_as_v2
+filter_by_instag
+diag_v2
+pull_box_sparse
+nll_loss
+dot
+scale
+ncclBcast
+shuffle_batch
+ncclReduce
+diag
+multiplex
+leaky_relu
+allclose
+adamw
+elementwise_pow
+prior_box
+p_norm
+unique_consecutive
+lod_reset
+pad
+sequence_conv
+log10
+set_value
+bitwise_xor
+center_loss
+randint
+attention_lstm
+uniform_random
+slice
+meshgrid
+hard_swish
+sin
+mean_iou
+pad2d
+inverse
+spectral_norm
+shuffle_channel
+psroi_pool
+seed
+ceil
+eig
+reduce_min
+cos
+ncclAllReduce
+cudnn_lstm
+digamma
+assign_value
+increment
+tdm_sampler
+fused_softmax_mask
+sequence_reverse
+eigvalsh
+diagonal
+trunc
+log2
+marker
+tanh
+yolov3_loss
+graph_send_recv
+accuracy
+atan
+less_than
+unsqueeze
+crf_decoding
+log_softmax
+ftrl
+matrix_nms
+top_k_v2
+cast
+tanh_shrink
+hard_shrink
+multiclass_nms
+fusion_transpose_flatten_concat
+sequence_unpad
+fused_elemwise_add_activation
+pull_sparse_v2
+frobenius_norm
+crop
+cross_entropy2
+skip_layernorm
+tdm_child
+fused_embedding_seq_pool
+erf
+conv2d_inception_fusion
+trilinear_interp
+logsumexp
+fusion_seqpool_concat
+alloc_float_status
+sequence_concat
+fusion_seqpool_cvm_concat
+similarity_focus
+argsort
+sequence_expand
+sgd
+fused_bn_add_activation
+bilinear_interp_v2
+clip
+deformable_conv_v1
+hinge_loss
+determinant
+conv2d_transpose
+memcpy_d2h
+softsign
+fake_quantize_dequantize_abs_max
+broadcast_tensors
+grid_sampler
+fft_c2r
+pyramid_hash
+fake_quantize_dequantize_moving_average_abs_max
+multi_dot
+sequence_pool
+transpose
+top_k
+dist
+affine_grid
+gaussian_random_batch_size_like
+fake_channel_wise_dequantize_max_abs
+reciprocal
+sequence_mask
+fill_diagonal_tensor
+abs
+partial_concat
+elu
+index_select
+row_conv
+cross
+elementwise_mul
+decayed_adagrad
+bipartite_match
+run_program
+fake_quantize_moving_average_abs_max
+mine_hard_examples
+target_assign
+lstm
+truncated_gaussian_random
+match_matrix_tensor
+elementwise_div
+kldiv_loss
+cumsum
+sum
+proximal_adagrad
+update_loss_scaling
+shard_index
+selu
+mean
+gumbel_softmax
+sequence_pad
+tree_conv
+assign
+flatten_contiguous_range
+tril_triu
+brelu
+celu
+reduce_mean
+sinh
+rank_loss
+reduce_max
+fusion_gru
+fill_zeros_like2
+expm1
+squared_l2_norm
+elementwise_sub
+margin_rank_loss
+faster_tokenizer
+relu
+is_empty
+reduce_all
+edit_distance
+bmm
+yolo_box
+soft_relu
+density_prior_box
+eye
+swish
+cross_entropy
+dpsgd
+cholesky
+batch_fc
+nearest_interp
+gather
+trilinear_interp_v2
+box_clip
+isnan_v2
+softmax
+conv2d_fusion
+fused_batch_norm_act
+get_float_status
+index_sample
+elementwise_min
+logical_not
+collect_fpn_proposals
+pixel_shuffle
+thresholded_relu
+polygon_box_transform
+lookup_table_dequant
+warpctc
+fake_channel_wise_quantize_abs_max
+dequantize_abs_max
+svd
+flip
--- a/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc
+++ b/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc
@@ -60,7 +60,7 @@ TEST(EagerUtils, AutoGradMeta) {
  std::vector<AutogradMeta*> autograd_metas =
      EagerUtils::multi_autograd_meta(&ets);
  std::vector<AutogradMeta*> unsafe_autograd_metas =
-      EagerUtils::unsafe_autograd_meta(&ets);
+      EagerUtils::unsafe_autograd_meta(ets);
  CHECK_NOTNULL(unsafe_autograd_metas[0]);
  CHECK_NOTNULL(unsafe_autograd_metas[1]);


--- a/paddle/fluid/eager/utils.cc
+++ b/paddle/fluid/eager/utils.cc
@@ -48,9 +48,9 @@ AutogradMeta* EagerUtils::unsafe_autograd_meta(const egr::EagerTensor& target) {
 }

 std::vector<AutogradMeta*> EagerUtils::unsafe_autograd_meta(
-    std::vector<egr::EagerTensor>* targets) {
+    const std::vector<egr::EagerTensor>& targets) {
  std::vector<AutogradMeta*> metas;
-  for (const egr::EagerTensor& t : *targets) {
+  for (const egr::EagerTensor& t : targets) {
    metas.push_back(unsafe_autograd_meta(t));
  }
  return metas;

--- a/paddle/fluid/eager/utils.h
+++ b/paddle/fluid/eager/utils.h
@@ -114,7 +114,7 @@ class EagerUtils {
  // This method will return an AutogradMeta pointer unsafely.
  static AutogradMeta* unsafe_autograd_meta(const egr::EagerTensor& target);
  static std::vector<AutogradMeta*> unsafe_autograd_meta(
-      std::vector<egr::EagerTensor>* targets);
+      const std::vector<egr::EagerTensor>& targets);

  template <typename T, typename... Args>
  static bool ComputeRequireGrad(T trace_backward, Args&&... args) {