From 77dfb2e8dd8f40461671d40e4d1d1cb608060f96 Mon Sep 17 00:00:00 2001 From: Zhanlue Yang Date: Wed, 15 Dec 2021 09:51:40 +0800 Subject: [PATCH] Synchronized auto-generated Python-C API with Dygraph Forward Functions (#38017) * Rearranged Eager AutoCodeGen directory structure * Removed USE_OP in Eager AutoCodeGen * Enabled generation for Operators without Grad/Inputs/Outputs * Resolved operators without input * Fixed merge conflicts * Enabled Eager AutoCodeGen for 10+ more operators * Refactored Eager AutoCodeGen with more organized helper objects * Enabled Eager AutoCodeGen for operators with multiple OpBases * Adjusted Eager AutoCodeGen to Enable Passing Output Tensor as Input Argument * Handled Dispensable Inputs/Outputs in Eager AutoCodeGen * Adjusted function generation/call between Python-C API & Dygraph API * Synchronized auto-generated Python-C API with Dygraph Forward Functions * Added safe_initialized interface to EagerTensor for use in processing dispensable inputs --- .../auto_code_generator/eager_generator.cc | 37 ++--- paddle/fluid/eager/eager_tensor.h | 4 + paddle/fluid/eager/utils.cc | 24 +++ paddle/fluid/eager/utils.h | 5 + paddle/fluid/pybind/CMakeLists.txt | 4 +- .../pybind/eager_op_function_generator.cc | 143 +++--------------- 6 files changed, 66 insertions(+), 151 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index ca60c8ee9a..e5b2ebce03 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -1174,7 +1174,7 @@ static std::pair GenerateForwardFunctionContents( FWD_INS_CONTENT_TEMPLATE, input_name, input_name, input_name); } else { const char* FWD_INS_CONTENT_TEMPLATE = - " if(%s.initialized()) " + " if(%s.safe_initialized()) " "ins[\"%s\"] = egr::EagerUtils::SyncToVars(%s)\n;"; generated_function_body += paddle::string::Sprintf( FWD_INS_CONTENT_TEMPLATE, input_name, input_name, input_name); @@ -1196,25 +1196,21 @@ static std::pair GenerateForwardFunctionContents( // in form of shared_ptr/vector> if (output.duplicable()) { const char* FWD_NUM_ARG_TEMPLATE = - ", std::vector>& %s"; + ", std::vector& %s"; std::string arg_str = paddle::string::Sprintf(FWD_NUM_ARG_TEMPLATE, output_var_name); dygraph_function_args_str += arg_str; - const char* FWD_OUTS_CONTENT_TEMPLATE = "{ \"%s\", %s },"; - outs_contents_str += paddle::string::Sprintf( - FWD_OUTS_CONTENT_TEMPLATE, output_name, output_var_name); } else { - const char* FWD_NUM_ARG_TEMPLATE = - ", std::shared_ptr& %s"; + const char* FWD_NUM_ARG_TEMPLATE = ", egr::EagerTensor& %s"; std::string arg_str = paddle::string::Sprintf(FWD_NUM_ARG_TEMPLATE, output_var_name); dygraph_function_args_str += arg_str; - - const char* FWD_OUTS_CONTENT_TEMPLATE = "{ \"%s\", {%s} },"; - outs_contents_str += paddle::string::Sprintf( - FWD_OUTS_CONTENT_TEMPLATE, output_name, output_var_name); } + const char* FWD_OUTS_CONTENT_TEMPLATE = + "{ \"%s\", egr::EagerUtils::TrySyncToVars(&%s) },"; + outs_contents_str += paddle::string::Sprintf( + FWD_OUTS_CONTENT_TEMPLATE, output_name, output_var_name); } else { if (output.duplicable()) { @@ -1557,22 +1553,11 @@ static std::string GenerateGradNodeCCContents( "fwd_outputs_name_pos_map")); size_t grads_position = fwd_outputs_name_pos_map.at(fwd_name); - std::string grad_ptr_name = fwd_name + "_ptrs"; - const char* GET_GRADS_PTR_TEMPLATE = - " std::vector> %s;\n" - " for(const auto& t : grads[%d]) {\n " - "%s.emplace_back(std::move(std::make_shared(t))" - ");" - "\n }\n"; - std::string grads_ptr_str = - paddle::string::Sprintf(GET_GRADS_PTR_TEMPLATE, grad_ptr_name, - grads_position, grad_ptr_name); - generated_grad_function_body += grads_ptr_str; - generated_grad_function_body += "\n"; - - const char* GRAD_OUTS_CONTENT_TEMPLATE = "{ \"%s\", %s },"; + + const char* GRAD_OUTS_CONTENT_TEMPLATE = + "{ \"%s\", egr::EagerUtils::SyncToVars(grads[%d]) },"; outs_contents_str += paddle::string::Sprintf( - GRAD_OUTS_CONTENT_TEMPLATE, grad_output_name, grad_ptr_name); + GRAD_OUTS_CONTENT_TEMPLATE, grad_output_name, grads_position); } else { size_t fwd_input_position = fwd_inputs_name_pos_map.at(fwd_name); diff --git a/paddle/fluid/eager/eager_tensor.h b/paddle/fluid/eager/eager_tensor.h index 7ade0a9848..b175f07cab 100644 --- a/paddle/fluid/eager/eager_tensor.h +++ b/paddle/fluid/eager/eager_tensor.h @@ -152,6 +152,10 @@ class EagerTensor final { */ bool initialized() const { return tensor_->initialized(); } + bool safe_initialized() const { + return initialized() || var_.IsInitialized(); + } + /** * @description: Reset the Tensor implementation * @param None diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index a275954687..2e52753bcc 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -135,6 +135,30 @@ std::vector> EagerUtils::SyncToVars( return res; } +static std::shared_ptr TrySyncToVar( + egr::EagerTensor* tensor) { + if (tensor->initialized() || tensor->Var().IsInitialized()) { + tensor->SyncToVar(paddle::framework::proto::VarType_Type_LOD_TENSOR); + } + return std::make_shared(*tensor); +} + +std::vector> EagerUtils::TrySyncToVars( + egr::EagerTensor* tensor) { + return {TrySyncToVar(tensor)}; +} + +std::vector> EagerUtils::TrySyncToVars( + std::vector* tensors) { + std::vector> res; + size_t num = tensors->size(); + res.reserve(num); + for (size_t i = 0; i < num; i++) { + res.emplace_back(TrySyncToVar(&(*tensors)[i])); + } + return res; +} + /* ---- VarBase -> Tensor ---- */ std::vector> EagerUtils::SyncToTensors( const egr::EagerTensor& tensor) { diff --git a/paddle/fluid/eager/utils.h b/paddle/fluid/eager/utils.h index 851f665bba..843b6404af 100644 --- a/paddle/fluid/eager/utils.h +++ b/paddle/fluid/eager/utils.h @@ -145,6 +145,11 @@ class EagerUtils { const std::shared_ptr& grad_node); // Intermidate needed remove this once we don't need legacy + static std::vector> TrySyncToVars( + egr::EagerTensor* tensor); + static std::vector> TrySyncToVars( + std::vector* tensors); + static std::vector> SyncToVars( const egr::EagerTensor& tensor); static std::vector> SyncToVars( diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index e6d0a096b2..4ae1bb8215 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -181,7 +181,7 @@ if(WITH_PYTHON) ":retry\n" "ECHO eager_op_function_generator run %build_times% time\n" "taskkill /f /im eager_op_function_generator.exe 2>NUL\n" - "${op_impl_path}/eager_op_function_generator.exe ${tmp_eager_impl_file}\n" + "${op_impl_path}/eager_op_function_generator.exe ${tmp_eager_impl_file} ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/op_list.txt\n" "if %ERRORLEVEL% NEQ 0 (\n" " set /a build_times=%build_times%+1\n" " if %build_times% GEQ 10 (\n" @@ -256,7 +256,7 @@ if(WITH_PYTHON) add_custom_command(OUTPUT ${eager_impl_file} COMMAND ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:." "${CMAKE_CURRENT_BINARY_DIR}/eager_op_function_generator" - "${tmp_eager_impl_file}" + "${tmp_eager_impl_file}" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/op_list.txt" COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_eager_impl_file} ${eager_impl_file} COMMENT "copy_if_different ${tmp_eager_impl_file} to ${eager_impl_file}" DEPENDS ${EAGER_OP_IMPL_DEPS} diff --git a/paddle/fluid/pybind/eager_op_function_generator.cc b/paddle/fluid/pybind/eager_op_function_generator.cc index 0bfc883730..7eb29fe92a 100644 --- a/paddle/fluid/pybind/eager_op_function_generator.cc +++ b/paddle/fluid/pybind/eager_op_function_generator.cc @@ -32,126 +32,7 @@ #endif #include "paddle/fluid/pybind/op_function_generator.h" -std::set gen_list = { - "sigmoid", "matmul_v2", "reduce_sum", "elementwise_add", "rsqrt", - "multihead_matmul", "addmm", "gru", "round", "push_dense", "rank_attention", - "fused_embedding_fc_lstm", "where_index", "bicubic_interp", "arg_min", - "tile", "bilinear_tensor_product", "ctc_align", - "pow2_decay_with_linear_warmup", "marker", "split", "fc", - "load", "elementwise_max", "adadelta", - "tan", - "fsp", "where", "logical_xor", "multiclass_nms3", "one_hot_v2", - "sequence_softmax", "affine_channel", "triangular_solve", - "sequence_topk_avg_pooling", "space_to_depth", "reverse", - "fused_embedding_eltwise_layernorm", "expand_v2", "lgamma", "solve", - "deformable_psroi_pooling", "instance_norm", "decode_jpeg", "gather_nd", - "reduce_prod", "matrix_rank", "asin", "lstmp", "iou_similarity", - "huber_loss", "one_hot", "sequence_slice", "lookup_table", "softplus", - "depthwise_conv2d", "fused_fc_elementwise_layernorm", - "sigmoid_cross_entropy_with_logits", "exp", "scatter", "equal_all", - "searchsorted", "fusion_squared_mat_sub", "unique", "log", "conv_shift", - "smooth_l1_loss", "linear_interp_v2", - "temporal_shift", "nce", "mv", "proximal_gd", "memcpy_h2d", - "add_position_encoding", "cosh", "hash", "grad_add", "sign", "prelu", - "linspace", "fill_diagonal", "logsigmoid", "load_combine", "fetch_v2", - "randperm", "sequence_scatter", "partial_sum", "relu6", "conv3d", - "lstm_unit", "not_equal", "transpose2", "uniform_random_batch_size_like", - "unfold", "lrn", "softmax_with_cross_entropy", "isfinite_v2", "bernoulli", - "max_pool3d_with_index", "gaussian_random", "flatten2", - "cvm", "adamax", "masked_select", "range", "bitwise_not", "trace", - "multinomial", "modified_huber_loss", "roll", "squared_l2_distance", - "conv3d_transpose", "share_data", "fake_quantize_abs_max", - "unique_with_counts", "fill", "concat", "fill_zeros_like", - "hierarchical_sigmoid", "isinf_v2", "squeeze", "multiclass_nms2", - "bpr_loss", "fft_c2c", "bicubic_interp_v2", "reshape", "coalesce_tensor", - "roi_align", "reshape2", "reduce_any", "unstack", "scatter_nd_add", - "sequence_reshape", "bilateral_slice", "fill_any_like", "empty", - "pad_constant_like", "pool2d", "size", "imag", "eigh", "stack", - "dgc_momentum", - "generate_proposals_v2", "bitwise_or", "gru_unit", - "sampling_id", "unsqueeze2", - "sequence_enumerate", "fusion_seqconv_eltadd_relu", "bce_loss", - "generate_proposal_labels", "im2sequence", "isinf", "adagrad", - "linear_chain_crf", "retinanet_target_assign", "fusion_group", - "teacher_student_sigmoid_loss", "random_crop", "lookup_table_v2", - "detection_map", "l1_norm", "sqrt", "fused_elemwise_activation", - "slogdeterminant", "share_buffer", "bitwise_and", "diag_embed", "unbind", - "dropout", - "beam_search", "log_loss", "greater_than", "kron", "sigmoid_focal_loss", - "rmsprop", "conv2d", "uniform_random_inplace", "maxout", "linear_interp", - "auc", "logical_or", - "acos", "unpool", "cumprod", "sample_logits", "crop_tensor", - "deformable_conv", "generate_mask_labels", "locality_aware_nms", - "expand_as", "matrix_power", "greater_equal", "generate_proposals", - "bilinear_interp", "inplace_abn", "softshrink", "mul", "data_norm", - "get_tensor_from_selected_rows", "spp", "floor", "gelu", - "retinanet_detection_output", "push_dense", "silu", "sequence_erase", - "real", "nearest_interp_v2", "dgc_clip_by_norm", "squeeze2", - "strided_slice", "conj", "precision_recall", "save", - "fusion_seqexpand_concat_fc", "fake_quantize_range_abs_max", - "depthwise_conv2d_transpose", "positive_negative_pair", "square", - "var_conv_2d", "log1p", "fused_softmax_mask_upper_triangle", "clip_by_norm", - "atan2", "box_decoder_and_assign", "fft_r2c", "roi_pool", "overlap_add", - "fill_constant_batch_size_like", "fill_any", "dequantize_log", - "max_pool2d_with_index", "pad3d", "norm", "viterbi_decode", "mish", - "box_coder", "flatten", "elementwise_mod", "margin_cross_entropy", - "logical_and", "pow", "stanh", "label_smooth", "merged_momentum", - "ascend_trigger", "fused_feedforward", "rpn_target_assign", - "roi_perspective_transform", "expand", "prroi_pool", "pool3d", "memcpy", - "distribute_fpn_proposals", "frame", "bincount", "shape", "group_norm", - "resnet_unit", "sequence_expand_as", "cos_sim", "eigvals", "save_combine", - "class_center_sample", "read_file", "isfinite", "arg_max", "equal", - "fake_dequantize_max_abs", "qr", "anchor_generator", "layer_norm", - "merge_selected_rows", "less_equal", - "fusion_lstm", "lars_momentum", "hard_sigmoid", "isnan", - "elementwise_floordiv", "correlation", "histogram", "gather_tree", - "segment_pool", - "fusion_repeated_fc_relu", "nop", - "expand_as_v2", "filter_by_instag", "nll_loss", "dot", "scale", "ncclBcast", - "shuffle_batch", "ncclReduce", "diag", "multiplex", "leaky_relu", - "allclose", - "elementwise_pow", "prior_box", "p_norm", "unique_consecutive", "lod_reset", - "pad", "sequence_conv", "log10", "set_value", "bitwise_xor", "center_loss", - "randint", "attention_lstm", "uniform_random", "slice", "meshgrid", - "hard_swish", "sin", "mean_iou", "pad2d", "inverse", "spectral_norm", - "shuffle_channel", "psroi_pool", "seed", "ceil", "eig", "reduce_min", "cos", - "ncclAllReduce", "cudnn_lstm", "digamma", "assign_value", "increment", - "tdm_sampler", "fused_softmax_mask", "sequence_reverse", "eigvalsh", - "diagonal", "trunc", "log2", "tanh", "yolov3_loss", "graph_send_recv", - "atan", "less_than", "unsqueeze", "crf_decoding", "log_softmax", "ftrl", - "matrix_nms", "top_k_v2", "cast", "tanh_shrink", "hard_shrink", - "multiclass_nms", "fusion_transpose_flatten_concat", "sequence_unpad", - "fused_elemwise_add_activation", "frobenius_norm", "crop", "cross_entropy2", - "skip_layernorm", "tdm_child", "fused_embedding_seq_pool", "erf", - "conv2d_inception_fusion", "trilinear_interp", "logsumexp", - "fusion_seqpool_concat", "alloc_float_status", "sequence_concat", - "fusion_seqpool_cvm_concat", "similarity_focus", "argsort", - "sequence_expand", - "fused_bn_add_activation", "bilinear_interp_v2", "clip", - "deformable_conv_v1", "hinge_loss", "determinant", "conv2d_transpose", - "memcpy_d2h", "softsign", - "broadcast_tensors", "grid_sampler", "fft_c2r", "pyramid_hash", - "multi_dot", "sequence_pool", "transpose", "top_k", "dist", "affine_grid", - "gaussian_random_batch_size_like", "fake_channel_wise_dequantize_max_abs", - "reciprocal", "sequence_mask", "fill_diagonal_tensor", "abs", - "partial_concat", "elu", "index_select", "row_conv", "cross", - "elementwise_mul", "decayed_adagrad", "bipartite_match", - "fake_quantize_moving_average_abs_max", "mine_hard_examples", - "target_assign", "lstm", "truncated_gaussian_random", "match_matrix_tensor", - "elementwise_div", "kldiv_loss", "cumsum", "sum", "proximal_adagrad", - "shard_index", "selu", "mean", "gumbel_softmax", "sequence_pad", - "tree_conv", "assign", "flatten_contiguous_range", "tril_triu", "brelu", - "celu", "reduce_mean", "sinh", "rank_loss", "reduce_max", "fusion_gru", - "fill_zeros_like2", "expm1", "squared_l2_norm", "elementwise_sub", - "margin_rank_loss", "faster_tokenizer", "relu", "is_empty", "reduce_all", - "edit_distance", "bmm", "yolo_box", "soft_relu", "density_prior_box", "eye", - "swish", "cross_entropy", "dpsgd", "cholesky", "batch_fc", "nearest_interp", - "gather", "trilinear_interp_v2", "box_clip", "isnan_v2", "softmax", - "conv2d_fusion", "fused_batch_norm_act", - "index_sample", "elementwise_min", "logical_not", "collect_fpn_proposals", - "pixel_shuffle", "thresholded_relu", "polygon_box_transform", - "lookup_table_dequant", "warpctc", "fake_channel_wise_quantize_abs_max", - "dequantize_abs_max", "svd", "flip"}; +std::set gen_list = {}; // clang-format off const char* OUT_INITIALIZER_TEMPLATE = @@ -348,7 +229,7 @@ std::string GenerateOpFunctionsBody( ins_cast_str += paddle::string::Sprintf(in_cast_type, out_name, op_type, out_name, arg_idx++, dispensable); - // call_api_str += out_name + ", "; + call_api_str += out_name + ", "; } else { // There are few Operators that have duplicable output, like `Out` in // split op. We need to specify the number of variables for the @@ -448,12 +329,28 @@ GenerateOpFunctions() { return std::make_tuple(op_function_list, bind_function_list); } +static void CollectOperatorsToCodeGen(const std::string& op_list_path) { + std::string line; + std::ifstream op_list_file(op_list_path); + if (op_list_file.is_open()) { + while (getline(op_list_file, line)) { + gen_list.insert(line); + } + op_list_file.close(); + } else { + PADDLE_THROW( + paddle::platform::errors::Fatal("Unable to open op_list.txt file")); + } +} + int main(int argc, char* argv[]) { - if (argc != 2) { - std::cerr << "argc must be 2" << std::endl; + if (argc != 3) { + std::cerr << "argc must be 3" << std::endl; return -1; } + CollectOperatorsToCodeGen(argv[2]); + #ifdef PADDLE_WITH_ASCEND_CL auto ascend_ptr = paddle::framework::AscendInstance::GetInstance(); ascend_ptr->InitGEForUT(); -- GitLab