diff --git a/paddle/fluid/eager/tests/performance_tests/CMakeLists.txt b/paddle/fluid/eager/tests/performance_tests/CMakeLists.txt index 516789cbb8cf743b5c06ca6ff18b6a7780b4c133..8811aa8ad38a5e72c82e3d690eaaaa303487538d 100644 --- a/paddle/fluid/eager/tests/performance_tests/CMakeLists.txt +++ b/paddle/fluid/eager/tests/performance_tests/CMakeLists.txt @@ -1,4 +1,4 @@ -cc_library(performance_benchmark_utils SRCS benchmark_utils.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node scale_op matmul_v2_op dygraph_function) +cc_library(performance_benchmark_utils SRCS benchmark_utils.cc DEPS ${eager_deps} ${fluid_deps} ${generated_deps} eager_scale scale_node scale_op matmul_v2_op) cc_test(test_egr_performance_benchmark_eager_cpu SRCS benchmark_eager_cpu.cc DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps}) cc_test(test_egr_performance_benchmark_fluid_cpu SRCS benchmark_fluid_cpu.cc DEPS performance_benchmark_utils ${eager_deps} ${fluid_deps}) diff --git a/paddle/fluid/pybind/eager_op_function_generator.cc b/paddle/fluid/pybind/eager_op_function_generator.cc index b0d8c7768968d3e1b271ed9472a82dd77596d788..46d0bdcb46de77709b649d385096aa8132e658e7 100644 --- a/paddle/fluid/pybind/eager_op_function_generator.cc +++ b/paddle/fluid/pybind/eager_op_function_generator.cc @@ -32,502 +32,8 @@ #endif #include "paddle/fluid/pybind/op_function_generator.h" -std::set gen_list = { - "sigmoid", - "matmul_v2", - "reduce_sum", - "elementwise_add", - "rsqrt", - "multihead_matmul", - "addmm", - "gru", - "round", - "push_dense", - "rank_attention", - "fused_embedding_fc_lstm", - "where_index", - "bicubic_interp", - "arg_min", - "tile", - "bilinear_tensor_product", - "ctc_align", - "pow2_decay_with_linear_warmup", - "marker", - "split", - "fc", - "clear_float_status", - "load", - "elementwise_max", - "adadelta", - "sparse_momentum", - "tan", - "adam", - "fsp", - "where", - "logical_xor", - "multiclass_nms3", - "one_hot_v2", - "sequence_softmax", - "affine_channel", - "triangular_solve", - "sequence_topk_avg_pooling", - "space_to_depth", - "reverse", - "fused_embedding_eltwise_layernorm", - "expand_v2", - "lgamma", - "solve", - "deformable_psroi_pooling", - "instance_norm", - "decode_jpeg", - "gather_nd", - "reduce_prod", - "matrix_rank", - "asin", - "lstmp", - "iou_similarity", - "huber_loss", - "one_hot", - "sequence_slice", - "lookup_table", - "softplus", - "depthwise_conv2d", - "fused_fc_elementwise_layernorm", - "sigmoid_cross_entropy_with_logits", - "exp", - "scatter", - "equal_all", - "searchsorted", - "fusion_squared_mat_sub", - "unique", - "log", - "conv_shift", - "smooth_l1_loss", - "linear_interp_v2", - "momentum", - "temporal_shift", - "nce", - "mv", - "proximal_gd", - "memcpy_h2d", - "add_position_encoding", - "cosh", - "hash", - "grad_add", - "sign", - "prelu", - "linspace", - "fill_diagonal", - "logsigmoid", - "load_combine", - "fetch_v2", - "randperm", - "sequence_scatter", - "partial_sum", - "relu6", - "conv3d", - "lstm_unit", - "not_equal", - "transpose2", - "uniform_random_batch_size_like", - "unfold", - "lrn", - "softmax_with_cross_entropy", - "isfinite_v2", - "bernoulli", - "max_pool3d_with_index", - "gaussian_random", - "flatten2", - "matmul", - "cvm", - "adamax", - "masked_select", - "range", - "bitwise_not", - "trace", - "multinomial", - "modified_huber_loss", - "roll", - "squared_l2_distance", - "conv3d_transpose", - "share_data", - "fake_quantize_abs_max", - "unique_with_counts", - "fill", - "concat", - "fill_zeros_like", - "hierarchical_sigmoid", - "isinf_v2", - "squeeze", - "multiclass_nms2", - "bpr_loss", - "fft_c2c", - "bicubic_interp_v2", - "reshape", - "coalesce_tensor", - "roi_align", - "reshape2", - "reduce_any", - "unstack", - "scatter_nd_add", - "sequence_reshape", - "bilateral_slice", - "fill_any_like", - "empty", - "pad_constant_like", - "pool2d", - "size", - "imag", - "eigh", - "stack", - "dgc_momentum", - "lamb", - "generate_proposals_v2", - "bitwise_or", - "gru_unit", - "fake_channel_wise_quantize_dequantize_abs_max", - "sampling_id", - "unsqueeze2", - "average_accumulates", - "sequence_enumerate", - "fusion_seqconv_eltadd_relu", - "bce_loss", - "generate_proposal_labels", - "im2sequence", - "isinf", - "adagrad", - "linear_chain_crf", - "retinanet_target_assign", - "fusion_group", - "teacher_student_sigmoid_loss", - "random_crop", - "lookup_table_v2", - "detection_map", - "l1_norm", - "sqrt", - "fused_elemwise_activation", - "slogdeterminant", - "share_buffer", - "bitwise_and", - "diag_embed", - "unbind", - "dropout", - "moving_average_abs_max_scale", - "beam_search", - "log_loss", - "greater_than", - "kron", - "sigmoid_focal_loss", - "rmsprop", - "conv2d", - "uniform_random_inplace", - "maxout", - "linear_interp", - "auc", - "logical_or", - "batch_norm", - "acos", - "unpool", - "cumprod", - "sample_logits", - "crop_tensor", - "fill_constant", - "deformable_conv", - "generate_mask_labels", - "locality_aware_nms", - "expand_as", - "matrix_power", - "greater_equal", - "generate_proposals", - "bilinear_interp", - "inplace_abn", - "softshrink", - "mul", - "data_norm", - "get_tensor_from_selected_rows", - "spp", - "floor", - "gelu", - "retinanet_detection_output", - "push_dense", - "silu", - "sequence_erase", - "real", - "nearest_interp_v2", - "dgc_clip_by_norm", - "squeeze2", - "strided_slice", - "conj", - "precision_recall", - "save", - "fusion_seqexpand_concat_fc", - "fake_quantize_range_abs_max", - "depthwise_conv2d_transpose", - "positive_negative_pair", - "square", - "var_conv_2d", - "log1p", - "fused_softmax_mask_upper_triangle", - "clip_by_norm", - "atan2", - "box_decoder_and_assign", - "fft_r2c", - "roi_pool", - "overlap_add", - "fill_constant_batch_size_like", - "fill_any", - "dequantize_log", - "max_pool2d_with_index", - "pad3d", - "norm", - "viterbi_decode", - "mish", - "box_coder", - "flatten", - "elementwise_mod", - "margin_cross_entropy", - "logical_and", - "pow", - "stanh", - "label_smooth", - "merged_momentum", - "ascend_trigger", - "fused_feedforward", - "rpn_target_assign", - "roi_perspective_transform", - "expand", - "prroi_pool", - "pool3d", - "memcpy", - "distribute_fpn_proposals", - "frame", - "bincount", - "shape", - "group_norm", - "resnet_unit", - "sequence_expand_as", - "cos_sim", - "eigvals", - "save_combine", - "class_center_sample", - "read_file", - "isfinite", - "arg_max", - "equal", - "fake_dequantize_max_abs", - "qr", - "anchor_generator", - "layer_norm", - "merge_selected_rows", - "less_equal", - "rnn", - "fusion_lstm", - "lars_momentum", - "hard_sigmoid", - "isnan", - "elementwise_floordiv", - "correlation", - "histogram", - "gather_tree", - "segment_pool", - "sync_batch_norm", - "fusion_repeated_fc_relu", - "nop", - "expand_as_v2", - "filter_by_instag", - "nll_loss", - "dot", - "scale", - "ncclBcast", - "shuffle_batch", - "ncclReduce", - "diag", - "multiplex", - "leaky_relu", - "allclose", - "adamw", - "elementwise_pow", - "prior_box", - "p_norm", - "unique_consecutive", - "lod_reset", - "pad", - "sequence_conv", - "log10", - "set_value", - "bitwise_xor", - "center_loss", - "randint", - "attention_lstm", - "uniform_random", - "slice", - "meshgrid", - "hard_swish", - "sin", - "mean_iou", - "pad2d", - "inverse", - "spectral_norm", - "shuffle_channel", - "psroi_pool", - "seed", - "ceil", - "eig", - "reduce_min", - "cos", - "ncclAllReduce", - "cudnn_lstm", - "digamma", - "assign_value", - "increment", - "tdm_sampler", - "fused_softmax_mask", - "sequence_reverse", - "eigvalsh", - "diagonal", - "trunc", - "log2", - "tanh", - "yolov3_loss", - "graph_send_recv", - "accuracy", - "atan", - "less_than", - "unsqueeze", - "crf_decoding", - "log_softmax", - "ftrl", - "matrix_nms", - "top_k_v2", - "cast", - "tanh_shrink", - "hard_shrink", - "multiclass_nms", - "fusion_transpose_flatten_concat", - "sequence_unpad", - "fused_elemwise_add_activation", - "frobenius_norm", - "crop", - "cross_entropy2", - "skip_layernorm", - "tdm_child", - "fused_embedding_seq_pool", - "erf", - "conv2d_inception_fusion", - "trilinear_interp", - "logsumexp", - "fusion_seqpool_concat", - "alloc_float_status", - "sequence_concat", - "fusion_seqpool_cvm_concat", - "similarity_focus", - "argsort", - "sequence_expand", - "sgd", - "fused_bn_add_activation", - "bilinear_interp_v2", - "clip", - "deformable_conv_v1", - "hinge_loss", - "determinant", - "conv2d_transpose", - "memcpy_d2h", - "softsign", - "fake_quantize_dequantize_abs_max", - "broadcast_tensors", - "grid_sampler", - "fft_c2r", - "pyramid_hash", - "fake_quantize_dequantize_moving_average_abs_max", - "multi_dot", - "sequence_pool", - "transpose", - "top_k", - "dist", - "affine_grid", - "gaussian_random_batch_size_like", - "fake_channel_wise_dequantize_max_abs", - "reciprocal", - "sequence_mask", - "fill_diagonal_tensor", - "abs", - "partial_concat", - "elu", - "index_select", - "row_conv", - "cross", - "elementwise_mul", - "decayed_adagrad", - "bipartite_match", - "fake_quantize_moving_average_abs_max", - "mine_hard_examples", - "target_assign", - "lstm", - "truncated_gaussian_random", - "match_matrix_tensor", - "elementwise_div", - "kldiv_loss", - "cumsum", - "sum", - "proximal_adagrad", - "shard_index", - "selu", - "mean", - "gumbel_softmax", - "sequence_pad", - "tree_conv", - "assign", - "flatten_contiguous_range", - "tril_triu", - "brelu", - "celu", - "reduce_mean", - "sinh", - "rank_loss", - "reduce_max", - "fusion_gru", - "fill_zeros_like2", - "expm1", - "squared_l2_norm", - "elementwise_sub", - "margin_rank_loss", - "faster_tokenizer", - "relu", - "is_empty", - "reduce_all", - "edit_distance", - "bmm", - "yolo_box", - "soft_relu", - "density_prior_box", - "eye", - "swish", - "cross_entropy", - "dpsgd", - "cholesky", - "batch_fc", - "nearest_interp", - "gather", - "trilinear_interp_v2", - "box_clip", - "isnan_v2", - "softmax", - "conv2d_fusion", - "fused_batch_norm_act", - "get_float_status", - "index_sample", - "elementwise_min", - "logical_not", - "collect_fpn_proposals", - "pixel_shuffle", - "thresholded_relu", - "polygon_box_transform", - "lookup_table_dequant", - "warpctc", - "fake_channel_wise_quantize_abs_max", - "dequantize_abs_max", - "svd", - "flip"}; +std::set gen_list = {"elementwise_add", "reduce_sum", "matmul_v2", + "sigmoid"}; // clang-format off const char* OUT_INITIALIZER_TEMPLATE = @@ -672,7 +178,15 @@ std::string GenerateOpFunctionsBody( ins_cast_str += paddle::string::Sprintf(in_cast_type, in_name, op_type, in_name, arg_idx++, dispensable); - call_api_str += in_name + ", "; + if (input.dispensable()) { + const auto in_template = input.duplicable() + ? INPUT_INITIALIZER_TEMPLATE_WITH_NULL_LIST + : INPUT_INITIALIZER_TEMPLATE_WITH_NULL; + ins_initializer_with_null += + paddle::string::Sprintf(in_template, in_name, in_name, in_name); + } else { + call_api_str += in_name + ", "; + } } if (!input_args.empty() && input_args.back() == ',') { @@ -723,8 +237,6 @@ std::string GenerateOpFunctionsBody( auto dispensable = output.dispensable() ? "true" : "false"; ins_cast_str += paddle::string::Sprintf(in_cast_type, out_name, op_type, out_name, arg_idx++, dispensable); - - // call_api_str += out_name + ", "; } else { // There are few Operators that have duplicable output, like `Out` in // split op. We need to specify the number of variables for the @@ -769,9 +281,11 @@ std::string GenerateOpFunctionsBody( HANDLE_VIEW_BETWEEN_INPUT_AND_OUTPUT, viwe_input_name, viwe_output_name, viwe_input_name, viwe_output_name); } - - return_str = "return ToPyObject(out);"; - + if (outs_num == 0) { + return_str = "Py_INCREF(Py_None);\n return Py_None;"; + } else { + return_str = "return ToPyObject(out);"; + } std::string function_args = ""; if (input_args == "") { function_args = FUNCTION_ARGS_NO_INPUT; diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index 3a0990c1263918ea886ffe13f50474ec437e8b7e..eb53884186ffc0d021daafbc794bfc99fb6e1a18 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -370,15 +370,6 @@ PyObject* ToPyObject(const platform::Place& value) { return obj.ptr(); } -PyObject* ToPyObject(const void* value) { - if (value == nullptr) { - Py_INCREF(Py_None); - return Py_None; - } - PADDLE_THROW( - platform::errors::Fatal("ToPyObject do not support void* with value.")); -} - egr::EagerTensor GetEagerTensorFromArgs(const std::string& op_type, const std::string& arg_name, PyObject* args, ssize_t arg_idx, diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h index bb1d247e59007badaaf2971e05140c61503835f4..e72820c4dbe8c56a4d0077da27b30c4beb1ff746 100644 --- a/paddle/fluid/pybind/eager_utils.h +++ b/paddle/fluid/pybind/eager_utils.h @@ -51,7 +51,6 @@ PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const platform::Place& value); -PyObject* ToPyObject(const void* value); template struct TupleEagerTensorResult { diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index a93ddb1a22f9cba8a2694769285a7d734394cd38..635f3149773e8f4add54cbd2b9ab4c88583c7695 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -75,9 +75,7 @@ limitations under the License. */ #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/pybind/cuda_streams_py.h" -#ifndef PADDLE_ON_INFERENCE #include "paddle/fluid/pybind/eager.h" -#endif #include "paddle/fluid/pybind/io.h" #include "paddle/utils/none.h" #ifdef PADDLE_WITH_ASCEND @@ -543,9 +541,7 @@ PYBIND11_MODULE(core_avx, m) { PYBIND11_MODULE(core_noavx, m) { #endif -#ifndef PADDLE_ON_INFERENCE BindEager(&m); -#endif BindCudaStream(&m); // Not used, just make sure cpu_info.cc is linked.