From aff7397bbf2290c8cdff2e2b2202cbb286d3ae93 Mon Sep 17 00:00:00 2001 From: wanghuancoder Date: Wed, 8 Dec 2021 23:09:27 +0800 Subject: [PATCH] [Eager] coreops to 495 (#37926) * refine a test case, test=develop * publish python c api for eager, test=develop * revert modify about test_allclose_layer.py, test=develop * refine, test=develop * refine, test=develop * refine, test=develop * refine, test=develop * refine, test=develop * refine, test=develop * delete numpy includes, use pybind11 numpy.h, test=develop * refine, test=develop * refine, test=develop * refine, test=develop * suport eager error msg, and add grad test case, test=develop * refine, test=develop * refine, test=develop * generate eager core ops, only 4 ops, test=develop * Rearranged Eager AutoCodeGen directory structure * Removed USE_OP in Eager AutoCodeGen * refine, test=develop * refine, test=develop * refine, test=develop * refine, test=develop * open 500 list * refine, test=develop * refine, test=develop * refine, test=develop * fix auto code gen, test=develop * Enabled generation for Operators without Grad/Inputs/Outputs * refine, test=develop * refine, test=develop * refine, test=develop * add to pyobject, test=develop * Resolved operators without input * merge pr 37837 * refine * refine, test=develop * refine, test=develop * refine, test=develop * refine, test=develop * refine, test=develop * refine, test=develop * refine, test=develop * refine, test=develop * refine, test=develop * refine,test=develop Co-authored-by: jim19930609 --- .../tests/performance_tests/CMakeLists.txt | 2 +- .../pybind/eager_op_function_generator.cc | 518 +++++++++++++++++- paddle/fluid/pybind/eager_utils.cc | 9 + paddle/fluid/pybind/eager_utils.h | 1 + paddle/fluid/pybind/pybind.cc | 4 + 5 files changed, 517 insertions(+), 17 deletions(-) diff --git a/paddle/fluid/eager/tests/performance_tests/CMakeLists.txt b/paddle/fluid/eager/tests/performance_tests/CMakeLists.txt index 8811aa8ad3..516789cbb8 100644 --- a/paddle/fluid/eager/tests/performance_tests/CMakeLists.txt +++ b/paddle/fluid/eager/tests/performance_tests/CMakeLists.txt @@ -1,4 +1,4 @@ -cc_library(performance_benchmark_utils SRCS benchmark_utils.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node scale_op matmul_v2_op) +cc_library(performance_benchmark_utils SRCS benchmark_utils.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node scale_op matmul_v2_op dygraph_function) cc_test(test_egr_performance_benchmark_eager_cpu SRCS benchmark_eager_cpu.cc DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps}) cc_test(test_egr_performance_benchmark_fluid_cpu SRCS benchmark_fluid_cpu.cc DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps}) diff --git a/paddle/fluid/pybind/eager_op_function_generator.cc b/paddle/fluid/pybind/eager_op_function_generator.cc index 46d0bdcb46..b0d8c77689 100644 --- a/paddle/fluid/pybind/eager_op_function_generator.cc +++ b/paddle/fluid/pybind/eager_op_function_generator.cc @@ -32,8 +32,502 @@ #endif #include "paddle/fluid/pybind/op_function_generator.h" -std::set gen_list = {"elementwise_add", "reduce_sum", "matmul_v2", - "sigmoid"}; +std::set gen_list = { + "sigmoid", + "matmul_v2", + "reduce_sum", + "elementwise_add", + "rsqrt", + "multihead_matmul", + "addmm", + "gru", + "round", + "push_dense", + "rank_attention", + "fused_embedding_fc_lstm", + "where_index", + "bicubic_interp", + "arg_min", + "tile", + "bilinear_tensor_product", + "ctc_align", + "pow2_decay_with_linear_warmup", + "marker", + "split", + "fc", + "clear_float_status", + "load", + "elementwise_max", + "adadelta", + "sparse_momentum", + "tan", + "adam", + "fsp", + "where", + "logical_xor", + "multiclass_nms3", + "one_hot_v2", + "sequence_softmax", + "affine_channel", + "triangular_solve", + "sequence_topk_avg_pooling", + "space_to_depth", + "reverse", + "fused_embedding_eltwise_layernorm", + "expand_v2", + "lgamma", + "solve", + "deformable_psroi_pooling", + "instance_norm", + "decode_jpeg", + "gather_nd", + "reduce_prod", + "matrix_rank", + "asin", + "lstmp", + "iou_similarity", + "huber_loss", + "one_hot", + "sequence_slice", + "lookup_table", + "softplus", + "depthwise_conv2d", + "fused_fc_elementwise_layernorm", + "sigmoid_cross_entropy_with_logits", + "exp", + "scatter", + "equal_all", + "searchsorted", + "fusion_squared_mat_sub", + "unique", + "log", + "conv_shift", + "smooth_l1_loss", + "linear_interp_v2", + "momentum", + "temporal_shift", + "nce", + "mv", + "proximal_gd", + "memcpy_h2d", + "add_position_encoding", + "cosh", + "hash", + "grad_add", + "sign", + "prelu", + "linspace", + "fill_diagonal", + "logsigmoid", + "load_combine", + "fetch_v2", + "randperm", + "sequence_scatter", + "partial_sum", + "relu6", + "conv3d", + "lstm_unit", + "not_equal", + "transpose2", + "uniform_random_batch_size_like", + "unfold", + "lrn", + "softmax_with_cross_entropy", + "isfinite_v2", + "bernoulli", + "max_pool3d_with_index", + "gaussian_random", + "flatten2", + "matmul", + "cvm", + "adamax", + "masked_select", + "range", + "bitwise_not", + "trace", + "multinomial", + "modified_huber_loss", + "roll", + "squared_l2_distance", + "conv3d_transpose", + "share_data", + "fake_quantize_abs_max", + "unique_with_counts", + "fill", + "concat", + "fill_zeros_like", + "hierarchical_sigmoid", + "isinf_v2", + "squeeze", + "multiclass_nms2", + "bpr_loss", + "fft_c2c", + "bicubic_interp_v2", + "reshape", + "coalesce_tensor", + "roi_align", + "reshape2", + "reduce_any", + "unstack", + "scatter_nd_add", + "sequence_reshape", + "bilateral_slice", + "fill_any_like", + "empty", + "pad_constant_like", + "pool2d", + "size", + "imag", + "eigh", + "stack", + "dgc_momentum", + "lamb", + "generate_proposals_v2", + "bitwise_or", + "gru_unit", + "fake_channel_wise_quantize_dequantize_abs_max", + "sampling_id", + "unsqueeze2", + "average_accumulates", + "sequence_enumerate", + "fusion_seqconv_eltadd_relu", + "bce_loss", + "generate_proposal_labels", + "im2sequence", + "isinf", + "adagrad", + "linear_chain_crf", + "retinanet_target_assign", + "fusion_group", + "teacher_student_sigmoid_loss", + "random_crop", + "lookup_table_v2", + "detection_map", + "l1_norm", + "sqrt", + "fused_elemwise_activation", + "slogdeterminant", + "share_buffer", + "bitwise_and", + "diag_embed", + "unbind", + "dropout", + "moving_average_abs_max_scale", + "beam_search", + "log_loss", + "greater_than", + "kron", + "sigmoid_focal_loss", + "rmsprop", + "conv2d", + "uniform_random_inplace", + "maxout", + "linear_interp", + "auc", + "logical_or", + "batch_norm", + "acos", + "unpool", + "cumprod", + "sample_logits", + "crop_tensor", + "fill_constant", + "deformable_conv", + "generate_mask_labels", + "locality_aware_nms", + "expand_as", + "matrix_power", + "greater_equal", + "generate_proposals", + "bilinear_interp", + "inplace_abn", + "softshrink", + "mul", + "data_norm", + "get_tensor_from_selected_rows", + "spp", + "floor", + "gelu", + "retinanet_detection_output", + "push_dense", + "silu", + "sequence_erase", + "real", + "nearest_interp_v2", + "dgc_clip_by_norm", + "squeeze2", + "strided_slice", + "conj", + "precision_recall", + "save", + "fusion_seqexpand_concat_fc", + "fake_quantize_range_abs_max", + "depthwise_conv2d_transpose", + "positive_negative_pair", + "square", + "var_conv_2d", + "log1p", + "fused_softmax_mask_upper_triangle", + "clip_by_norm", + "atan2", + "box_decoder_and_assign", + "fft_r2c", + "roi_pool", + "overlap_add", + "fill_constant_batch_size_like", + "fill_any", + "dequantize_log", + "max_pool2d_with_index", + "pad3d", + "norm", + "viterbi_decode", + "mish", + "box_coder", + "flatten", + "elementwise_mod", + "margin_cross_entropy", + "logical_and", + "pow", + "stanh", + "label_smooth", + "merged_momentum", + "ascend_trigger", + "fused_feedforward", + "rpn_target_assign", + "roi_perspective_transform", + "expand", + "prroi_pool", + "pool3d", + "memcpy", + "distribute_fpn_proposals", + "frame", + "bincount", + "shape", + "group_norm", + "resnet_unit", + "sequence_expand_as", + "cos_sim", + "eigvals", + "save_combine", + "class_center_sample", + "read_file", + "isfinite", + "arg_max", + "equal", + "fake_dequantize_max_abs", + "qr", + "anchor_generator", + "layer_norm", + "merge_selected_rows", + "less_equal", + "rnn", + "fusion_lstm", + "lars_momentum", + "hard_sigmoid", + "isnan", + "elementwise_floordiv", + "correlation", + "histogram", + "gather_tree", + "segment_pool", + "sync_batch_norm", + "fusion_repeated_fc_relu", + "nop", + "expand_as_v2", + "filter_by_instag", + "nll_loss", + "dot", + "scale", + "ncclBcast", + "shuffle_batch", + "ncclReduce", + "diag", + "multiplex", + "leaky_relu", + "allclose", + "adamw", + "elementwise_pow", + "prior_box", + "p_norm", + "unique_consecutive", + "lod_reset", + "pad", + "sequence_conv", + "log10", + "set_value", + "bitwise_xor", + "center_loss", + "randint", + "attention_lstm", + "uniform_random", + "slice", + "meshgrid", + "hard_swish", + "sin", + "mean_iou", + "pad2d", + "inverse", + "spectral_norm", + "shuffle_channel", + "psroi_pool", + "seed", + "ceil", + "eig", + "reduce_min", + "cos", + "ncclAllReduce", + "cudnn_lstm", + "digamma", + "assign_value", + "increment", + "tdm_sampler", + "fused_softmax_mask", + "sequence_reverse", + "eigvalsh", + "diagonal", + "trunc", + "log2", + "tanh", + "yolov3_loss", + "graph_send_recv", + "accuracy", + "atan", + "less_than", + "unsqueeze", + "crf_decoding", + "log_softmax", + "ftrl", + "matrix_nms", + "top_k_v2", + "cast", + "tanh_shrink", + "hard_shrink", + "multiclass_nms", + "fusion_transpose_flatten_concat", + "sequence_unpad", + "fused_elemwise_add_activation", + "frobenius_norm", + "crop", + "cross_entropy2", + "skip_layernorm", + "tdm_child", + "fused_embedding_seq_pool", + "erf", + "conv2d_inception_fusion", + "trilinear_interp", + "logsumexp", + "fusion_seqpool_concat", + "alloc_float_status", + "sequence_concat", + "fusion_seqpool_cvm_concat", + "similarity_focus", + "argsort", + "sequence_expand", + "sgd", + "fused_bn_add_activation", + "bilinear_interp_v2", + "clip", + "deformable_conv_v1", + "hinge_loss", + "determinant", + "conv2d_transpose", + "memcpy_d2h", + "softsign", + "fake_quantize_dequantize_abs_max", + "broadcast_tensors", + "grid_sampler", + "fft_c2r", + "pyramid_hash", + "fake_quantize_dequantize_moving_average_abs_max", + "multi_dot", + "sequence_pool", + "transpose", + "top_k", + "dist", + "affine_grid", + "gaussian_random_batch_size_like", + "fake_channel_wise_dequantize_max_abs", + "reciprocal", + "sequence_mask", + "fill_diagonal_tensor", + "abs", + "partial_concat", + "elu", + "index_select", + "row_conv", + "cross", + "elementwise_mul", + "decayed_adagrad", + "bipartite_match", + "fake_quantize_moving_average_abs_max", + "mine_hard_examples", + "target_assign", + "lstm", + "truncated_gaussian_random", + "match_matrix_tensor", + "elementwise_div", + "kldiv_loss", + "cumsum", + "sum", + "proximal_adagrad", + "shard_index", + "selu", + "mean", + "gumbel_softmax", + "sequence_pad", + "tree_conv", + "assign", + "flatten_contiguous_range", + "tril_triu", + "brelu", + "celu", + "reduce_mean", + "sinh", + "rank_loss", + "reduce_max", + "fusion_gru", + "fill_zeros_like2", + "expm1", + "squared_l2_norm", + "elementwise_sub", + "margin_rank_loss", + "faster_tokenizer", + "relu", + "is_empty", + "reduce_all", + "edit_distance", + "bmm", + "yolo_box", + "soft_relu", + "density_prior_box", + "eye", + "swish", + "cross_entropy", + "dpsgd", + "cholesky", + "batch_fc", + "nearest_interp", + "gather", + "trilinear_interp_v2", + "box_clip", + "isnan_v2", + "softmax", + "conv2d_fusion", + "fused_batch_norm_act", + "get_float_status", + "index_sample", + "elementwise_min", + "logical_not", + "collect_fpn_proposals", + "pixel_shuffle", + "thresholded_relu", + "polygon_box_transform", + "lookup_table_dequant", + "warpctc", + "fake_channel_wise_quantize_abs_max", + "dequantize_abs_max", + "svd", + "flip"}; // clang-format off const char* OUT_INITIALIZER_TEMPLATE = @@ -178,15 +672,7 @@ std::string GenerateOpFunctionsBody( ins_cast_str += paddle::string::Sprintf(in_cast_type, in_name, op_type, in_name, arg_idx++, dispensable); - if (input.dispensable()) { - const auto in_template = input.duplicable() - ? INPUT_INITIALIZER_TEMPLATE_WITH_NULL_LIST - : INPUT_INITIALIZER_TEMPLATE_WITH_NULL; - ins_initializer_with_null += - paddle::string::Sprintf(in_template, in_name, in_name, in_name); - } else { - call_api_str += in_name + ", "; - } + call_api_str += in_name + ", "; } if (!input_args.empty() && input_args.back() == ',') { @@ -237,6 +723,8 @@ std::string GenerateOpFunctionsBody( auto dispensable = output.dispensable() ? "true" : "false"; ins_cast_str += paddle::string::Sprintf(in_cast_type, out_name, op_type, out_name, arg_idx++, dispensable); + + // call_api_str += out_name + ", "; } else { // There are few Operators that have duplicable output, like `Out` in // split op. We need to specify the number of variables for the @@ -281,11 +769,9 @@ std::string GenerateOpFunctionsBody( HANDLE_VIEW_BETWEEN_INPUT_AND_OUTPUT, viwe_input_name, viwe_output_name, viwe_input_name, viwe_output_name); } - if (outs_num == 0) { - return_str = "Py_INCREF(Py_None);\n return Py_None;"; - } else { - return_str = "return ToPyObject(out);"; - } + + return_str = "return ToPyObject(out);"; + std::string function_args = ""; if (input_args == "") { function_args = FUNCTION_ARGS_NO_INPUT; diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index eb53884186..3a0990c126 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -370,6 +370,15 @@ PyObject* ToPyObject(const platform::Place& value) { return obj.ptr(); } +PyObject* ToPyObject(const void* value) { + if (value == nullptr) { + Py_INCREF(Py_None); + return Py_None; + } + PADDLE_THROW( + platform::errors::Fatal("ToPyObject do not support void* with value.")); +} + egr::EagerTensor GetEagerTensorFromArgs(const std::string& op_type, const std::string& arg_name, PyObject* args, ssize_t arg_idx, diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h index e72820c4db..bb1d247e59 100644 --- a/paddle/fluid/pybind/eager_utils.h +++ b/paddle/fluid/pybind/eager_utils.h @@ -51,6 +51,7 @@ PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const platform::Place& value); +PyObject* ToPyObject(const void* value); template struct TupleEagerTensorResult { diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 635f314977..a93ddb1a22 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -75,7 +75,9 @@ limitations under the License. */ #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/pybind/cuda_streams_py.h" +#ifndef PADDLE_ON_INFERENCE #include "paddle/fluid/pybind/eager.h" +#endif #include "paddle/fluid/pybind/io.h" #include "paddle/utils/none.h" #ifdef PADDLE_WITH_ASCEND @@ -541,7 +543,9 @@ PYBIND11_MODULE(core_avx, m) { PYBIND11_MODULE(core_noavx, m) { #endif +#ifndef PADDLE_ON_INFERENCE BindEager(&m); +#endif BindCudaStream(&m); // Not used, just make sure cpu_info.cc is linked. -- GitLab