未验证 提交 77dfb2e8 编写于 作者: Z Zhanlue Yang 提交者: GitHub

Synchronized auto-generated Python-C API with Dygraph Forward Functions (#38017)

* Rearranged Eager AutoCodeGen directory structure

* Removed USE_OP in Eager AutoCodeGen

* Enabled generation for Operators without Grad/Inputs/Outputs

* Resolved operators without input

* Fixed merge conflicts

* Enabled Eager AutoCodeGen for 10+ more operators

* Refactored Eager AutoCodeGen with more organized helper objects

* Enabled Eager AutoCodeGen for operators with multiple OpBases

* Adjusted Eager AutoCodeGen to Enable Passing Output Tensor as Input Argument

* Handled Dispensable Inputs/Outputs in Eager AutoCodeGen

* Adjusted function generation/call between Python-C API & Dygraph API

* Synchronized auto-generated Python-C API with Dygraph Forward Functions

* Added safe_initialized interface to EagerTensor for use in processing dispensable inputs
上级 472c9085
......@@ -1174,7 +1174,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
FWD_INS_CONTENT_TEMPLATE, input_name, input_name, input_name);
} else {
const char* FWD_INS_CONTENT_TEMPLATE =
" if(%s.initialized()) "
" if(%s.safe_initialized()) "
"ins[\"%s\"] = egr::EagerUtils::SyncToVars(%s)\n;";
generated_function_body += paddle::string::Sprintf(
FWD_INS_CONTENT_TEMPLATE, input_name, input_name, input_name);
......@@ -1196,25 +1196,21 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
// in form of shared_ptr<EagerTensor>/vector<shared_ptr<EagerTensor>>
if (output.duplicable()) {
const char* FWD_NUM_ARG_TEMPLATE =
", std::vector<std::shared_ptr<egr::EagerTensor>>& %s";
", std::vector<egr::EagerTensor>& %s";
std::string arg_str =
paddle::string::Sprintf(FWD_NUM_ARG_TEMPLATE, output_var_name);
dygraph_function_args_str += arg_str;
const char* FWD_OUTS_CONTENT_TEMPLATE = "{ \"%s\", %s },";
outs_contents_str += paddle::string::Sprintf(
FWD_OUTS_CONTENT_TEMPLATE, output_name, output_var_name);
} else {
const char* FWD_NUM_ARG_TEMPLATE =
", std::shared_ptr<egr::EagerTensor>& %s";
const char* FWD_NUM_ARG_TEMPLATE = ", egr::EagerTensor& %s";
std::string arg_str =
paddle::string::Sprintf(FWD_NUM_ARG_TEMPLATE, output_var_name);
dygraph_function_args_str += arg_str;
const char* FWD_OUTS_CONTENT_TEMPLATE = "{ \"%s\", {%s} },";
outs_contents_str += paddle::string::Sprintf(
FWD_OUTS_CONTENT_TEMPLATE, output_name, output_var_name);
}
const char* FWD_OUTS_CONTENT_TEMPLATE =
"{ \"%s\", egr::EagerUtils::TrySyncToVars(&%s) },";
outs_contents_str += paddle::string::Sprintf(
FWD_OUTS_CONTENT_TEMPLATE, output_name, output_var_name);
} else {
if (output.duplicable()) {
......@@ -1557,22 +1553,11 @@ static std::string GenerateGradNodeCCContents(
"fwd_outputs_name_pos_map"));
size_t grads_position = fwd_outputs_name_pos_map.at(fwd_name);
std::string grad_ptr_name = fwd_name + "_ptrs";
const char* GET_GRADS_PTR_TEMPLATE =
" std::vector<std::shared_ptr<egr::EagerTensor>> %s;\n"
" for(const auto& t : grads[%d]) {\n "
"%s.emplace_back(std::move(std::make_shared<egr::EagerTensor>(t))"
");"
"\n }\n";
std::string grads_ptr_str =
paddle::string::Sprintf(GET_GRADS_PTR_TEMPLATE, grad_ptr_name,
grads_position, grad_ptr_name);
generated_grad_function_body += grads_ptr_str;
generated_grad_function_body += "\n";
const char* GRAD_OUTS_CONTENT_TEMPLATE = "{ \"%s\", %s },";
const char* GRAD_OUTS_CONTENT_TEMPLATE =
"{ \"%s\", egr::EagerUtils::SyncToVars(grads[%d]) },";
outs_contents_str += paddle::string::Sprintf(
GRAD_OUTS_CONTENT_TEMPLATE, grad_output_name, grad_ptr_name);
GRAD_OUTS_CONTENT_TEMPLATE, grad_output_name, grads_position);
} else {
size_t fwd_input_position = fwd_inputs_name_pos_map.at(fwd_name);
......
......@@ -152,6 +152,10 @@ class EagerTensor final {
*/
bool initialized() const { return tensor_->initialized(); }
bool safe_initialized() const {
return initialized() || var_.IsInitialized();
}
/**
* @description: Reset the Tensor implementation
* @param None
......
......@@ -135,6 +135,30 @@ std::vector<std::shared_ptr<egr::EagerTensor>> EagerUtils::SyncToVars(
return res;
}
static std::shared_ptr<egr::EagerTensor> TrySyncToVar(
egr::EagerTensor* tensor) {
if (tensor->initialized() || tensor->Var().IsInitialized()) {
tensor->SyncToVar(paddle::framework::proto::VarType_Type_LOD_TENSOR);
}
return std::make_shared<EagerTensor>(*tensor);
}
std::vector<std::shared_ptr<egr::EagerTensor>> EagerUtils::TrySyncToVars(
egr::EagerTensor* tensor) {
return {TrySyncToVar(tensor)};
}
std::vector<std::shared_ptr<egr::EagerTensor>> EagerUtils::TrySyncToVars(
std::vector<egr::EagerTensor>* tensors) {
std::vector<std::shared_ptr<EagerTensor>> res;
size_t num = tensors->size();
res.reserve(num);
for (size_t i = 0; i < num; i++) {
res.emplace_back(TrySyncToVar(&(*tensors)[i]));
}
return res;
}
/* ---- VarBase -> Tensor ---- */
std::vector<std::shared_ptr<egr::EagerTensor>> EagerUtils::SyncToTensors(
const egr::EagerTensor& tensor) {
......
......@@ -145,6 +145,11 @@ class EagerUtils {
const std::shared_ptr<GradNodeBase>& grad_node);
// Intermidate needed remove this once we don't need legacy
static std::vector<std::shared_ptr<egr::EagerTensor>> TrySyncToVars(
egr::EagerTensor* tensor);
static std::vector<std::shared_ptr<egr::EagerTensor>> TrySyncToVars(
std::vector<egr::EagerTensor>* tensors);
static std::vector<std::shared_ptr<egr::EagerTensor>> SyncToVars(
const egr::EagerTensor& tensor);
static std::vector<std::shared_ptr<egr::EagerTensor>> SyncToVars(
......
......@@ -181,7 +181,7 @@ if(WITH_PYTHON)
":retry\n"
"ECHO eager_op_function_generator run %build_times% time\n"
"taskkill /f /im eager_op_function_generator.exe 2>NUL\n"
"${op_impl_path}/eager_op_function_generator.exe ${tmp_eager_impl_file}\n"
"${op_impl_path}/eager_op_function_generator.exe ${tmp_eager_impl_file} ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/op_list.txt\n"
"if %ERRORLEVEL% NEQ 0 (\n"
" set /a build_times=%build_times%+1\n"
" if %build_times% GEQ 10 (\n"
......@@ -256,7 +256,7 @@ if(WITH_PYTHON)
add_custom_command(OUTPUT ${eager_impl_file}
COMMAND ${CMAKE_COMMAND} -E env "LD_LIBRARY_PATH=$ENV{LD_LIBRARY_PATH}:."
"${CMAKE_CURRENT_BINARY_DIR}/eager_op_function_generator"
"${tmp_eager_impl_file}"
"${tmp_eager_impl_file}" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/op_list.txt"
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_eager_impl_file} ${eager_impl_file}
COMMENT "copy_if_different ${tmp_eager_impl_file} to ${eager_impl_file}"
DEPENDS ${EAGER_OP_IMPL_DEPS}
......
......@@ -32,126 +32,7 @@
#endif
#include "paddle/fluid/pybind/op_function_generator.h"
std::set<std::string> gen_list = {
"sigmoid", "matmul_v2", "reduce_sum", "elementwise_add", "rsqrt",
"multihead_matmul", "addmm", "gru", "round", "push_dense", "rank_attention",
"fused_embedding_fc_lstm", "where_index", "bicubic_interp", "arg_min",
"tile", "bilinear_tensor_product", "ctc_align",
"pow2_decay_with_linear_warmup", "marker", "split", "fc",
"load", "elementwise_max", "adadelta",
"tan",
"fsp", "where", "logical_xor", "multiclass_nms3", "one_hot_v2",
"sequence_softmax", "affine_channel", "triangular_solve",
"sequence_topk_avg_pooling", "space_to_depth", "reverse",
"fused_embedding_eltwise_layernorm", "expand_v2", "lgamma", "solve",
"deformable_psroi_pooling", "instance_norm", "decode_jpeg", "gather_nd",
"reduce_prod", "matrix_rank", "asin", "lstmp", "iou_similarity",
"huber_loss", "one_hot", "sequence_slice", "lookup_table", "softplus",
"depthwise_conv2d", "fused_fc_elementwise_layernorm",
"sigmoid_cross_entropy_with_logits", "exp", "scatter", "equal_all",
"searchsorted", "fusion_squared_mat_sub", "unique", "log", "conv_shift",
"smooth_l1_loss", "linear_interp_v2",
"temporal_shift", "nce", "mv", "proximal_gd", "memcpy_h2d",
"add_position_encoding", "cosh", "hash", "grad_add", "sign", "prelu",
"linspace", "fill_diagonal", "logsigmoid", "load_combine", "fetch_v2",
"randperm", "sequence_scatter", "partial_sum", "relu6", "conv3d",
"lstm_unit", "not_equal", "transpose2", "uniform_random_batch_size_like",
"unfold", "lrn", "softmax_with_cross_entropy", "isfinite_v2", "bernoulli",
"max_pool3d_with_index", "gaussian_random", "flatten2",
"cvm", "adamax", "masked_select", "range", "bitwise_not", "trace",
"multinomial", "modified_huber_loss", "roll", "squared_l2_distance",
"conv3d_transpose", "share_data", "fake_quantize_abs_max",
"unique_with_counts", "fill", "concat", "fill_zeros_like",
"hierarchical_sigmoid", "isinf_v2", "squeeze", "multiclass_nms2",
"bpr_loss", "fft_c2c", "bicubic_interp_v2", "reshape", "coalesce_tensor",
"roi_align", "reshape2", "reduce_any", "unstack", "scatter_nd_add",
"sequence_reshape", "bilateral_slice", "fill_any_like", "empty",
"pad_constant_like", "pool2d", "size", "imag", "eigh", "stack",
"dgc_momentum",
"generate_proposals_v2", "bitwise_or", "gru_unit",
"sampling_id", "unsqueeze2",
"sequence_enumerate", "fusion_seqconv_eltadd_relu", "bce_loss",
"generate_proposal_labels", "im2sequence", "isinf", "adagrad",
"linear_chain_crf", "retinanet_target_assign", "fusion_group",
"teacher_student_sigmoid_loss", "random_crop", "lookup_table_v2",
"detection_map", "l1_norm", "sqrt", "fused_elemwise_activation",
"slogdeterminant", "share_buffer", "bitwise_and", "diag_embed", "unbind",
"dropout",
"beam_search", "log_loss", "greater_than", "kron", "sigmoid_focal_loss",
"rmsprop", "conv2d", "uniform_random_inplace", "maxout", "linear_interp",
"auc", "logical_or",
"acos", "unpool", "cumprod", "sample_logits", "crop_tensor",
"deformable_conv", "generate_mask_labels", "locality_aware_nms",
"expand_as", "matrix_power", "greater_equal", "generate_proposals",
"bilinear_interp", "inplace_abn", "softshrink", "mul", "data_norm",
"get_tensor_from_selected_rows", "spp", "floor", "gelu",
"retinanet_detection_output", "push_dense", "silu", "sequence_erase",
"real", "nearest_interp_v2", "dgc_clip_by_norm", "squeeze2",
"strided_slice", "conj", "precision_recall", "save",
"fusion_seqexpand_concat_fc", "fake_quantize_range_abs_max",
"depthwise_conv2d_transpose", "positive_negative_pair", "square",
"var_conv_2d", "log1p", "fused_softmax_mask_upper_triangle", "clip_by_norm",
"atan2", "box_decoder_and_assign", "fft_r2c", "roi_pool", "overlap_add",
"fill_constant_batch_size_like", "fill_any", "dequantize_log",
"max_pool2d_with_index", "pad3d", "norm", "viterbi_decode", "mish",
"box_coder", "flatten", "elementwise_mod", "margin_cross_entropy",
"logical_and", "pow", "stanh", "label_smooth", "merged_momentum",
"ascend_trigger", "fused_feedforward", "rpn_target_assign",
"roi_perspective_transform", "expand", "prroi_pool", "pool3d", "memcpy",
"distribute_fpn_proposals", "frame", "bincount", "shape", "group_norm",
"resnet_unit", "sequence_expand_as", "cos_sim", "eigvals", "save_combine",
"class_center_sample", "read_file", "isfinite", "arg_max", "equal",
"fake_dequantize_max_abs", "qr", "anchor_generator", "layer_norm",
"merge_selected_rows", "less_equal",
"fusion_lstm", "lars_momentum", "hard_sigmoid", "isnan",
"elementwise_floordiv", "correlation", "histogram", "gather_tree",
"segment_pool",
"fusion_repeated_fc_relu", "nop",
"expand_as_v2", "filter_by_instag", "nll_loss", "dot", "scale", "ncclBcast",
"shuffle_batch", "ncclReduce", "diag", "multiplex", "leaky_relu",
"allclose",
"elementwise_pow", "prior_box", "p_norm", "unique_consecutive", "lod_reset",
"pad", "sequence_conv", "log10", "set_value", "bitwise_xor", "center_loss",
"randint", "attention_lstm", "uniform_random", "slice", "meshgrid",
"hard_swish", "sin", "mean_iou", "pad2d", "inverse", "spectral_norm",
"shuffle_channel", "psroi_pool", "seed", "ceil", "eig", "reduce_min", "cos",
"ncclAllReduce", "cudnn_lstm", "digamma", "assign_value", "increment",
"tdm_sampler", "fused_softmax_mask", "sequence_reverse", "eigvalsh",
"diagonal", "trunc", "log2", "tanh", "yolov3_loss", "graph_send_recv",
"atan", "less_than", "unsqueeze", "crf_decoding", "log_softmax", "ftrl",
"matrix_nms", "top_k_v2", "cast", "tanh_shrink", "hard_shrink",
"multiclass_nms", "fusion_transpose_flatten_concat", "sequence_unpad",
"fused_elemwise_add_activation", "frobenius_norm", "crop", "cross_entropy2",
"skip_layernorm", "tdm_child", "fused_embedding_seq_pool", "erf",
"conv2d_inception_fusion", "trilinear_interp", "logsumexp",
"fusion_seqpool_concat", "alloc_float_status", "sequence_concat",
"fusion_seqpool_cvm_concat", "similarity_focus", "argsort",
"sequence_expand",
"fused_bn_add_activation", "bilinear_interp_v2", "clip",
"deformable_conv_v1", "hinge_loss", "determinant", "conv2d_transpose",
"memcpy_d2h", "softsign",
"broadcast_tensors", "grid_sampler", "fft_c2r", "pyramid_hash",
"multi_dot", "sequence_pool", "transpose", "top_k", "dist", "affine_grid",
"gaussian_random_batch_size_like", "fake_channel_wise_dequantize_max_abs",
"reciprocal", "sequence_mask", "fill_diagonal_tensor", "abs",
"partial_concat", "elu", "index_select", "row_conv", "cross",
"elementwise_mul", "decayed_adagrad", "bipartite_match",
"fake_quantize_moving_average_abs_max", "mine_hard_examples",
"target_assign", "lstm", "truncated_gaussian_random", "match_matrix_tensor",
"elementwise_div", "kldiv_loss", "cumsum", "sum", "proximal_adagrad",
"shard_index", "selu", "mean", "gumbel_softmax", "sequence_pad",
"tree_conv", "assign", "flatten_contiguous_range", "tril_triu", "brelu",
"celu", "reduce_mean", "sinh", "rank_loss", "reduce_max", "fusion_gru",
"fill_zeros_like2", "expm1", "squared_l2_norm", "elementwise_sub",
"margin_rank_loss", "faster_tokenizer", "relu", "is_empty", "reduce_all",
"edit_distance", "bmm", "yolo_box", "soft_relu", "density_prior_box", "eye",
"swish", "cross_entropy", "dpsgd", "cholesky", "batch_fc", "nearest_interp",
"gather", "trilinear_interp_v2", "box_clip", "isnan_v2", "softmax",
"conv2d_fusion", "fused_batch_norm_act",
"index_sample", "elementwise_min", "logical_not", "collect_fpn_proposals",
"pixel_shuffle", "thresholded_relu", "polygon_box_transform",
"lookup_table_dequant", "warpctc", "fake_channel_wise_quantize_abs_max",
"dequantize_abs_max", "svd", "flip"};
std::set<std::string> gen_list = {};
// clang-format off
const char* OUT_INITIALIZER_TEMPLATE =
......@@ -348,7 +229,7 @@ std::string GenerateOpFunctionsBody(
ins_cast_str += paddle::string::Sprintf(in_cast_type, out_name, op_type,
out_name, arg_idx++, dispensable);
// call_api_str += out_name + ", ";
call_api_str += out_name + ", ";
} else {
// There are few Operators that have duplicable output, like `Out` in
// split op. We need to specify the number of variables for the
......@@ -448,12 +329,28 @@ GenerateOpFunctions() {
return std::make_tuple(op_function_list, bind_function_list);
}
static void CollectOperatorsToCodeGen(const std::string& op_list_path) {
std::string line;
std::ifstream op_list_file(op_list_path);
if (op_list_file.is_open()) {
while (getline(op_list_file, line)) {
gen_list.insert(line);
}
op_list_file.close();
} else {
PADDLE_THROW(
paddle::platform::errors::Fatal("Unable to open op_list.txt file"));
}
}
int main(int argc, char* argv[]) {
if (argc != 2) {
std::cerr << "argc must be 2" << std::endl;
if (argc != 3) {
std::cerr << "argc must be 3" << std::endl;
return -1;
}
CollectOperatorsToCodeGen(argv[2]);
#ifdef PADDLE_WITH_ASCEND_CL
auto ascend_ptr = paddle::framework::AscendInstance::GetInstance();
ascend_ptr->InitGEForUT();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册