diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index db1dbaa2aa7e1e1ad772c7bdd606b614f8a62153..da13b08b59f80aeb64002d0d2d38cda93e4c366c 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -1107,10 +1107,12 @@ static std::string GenerateGradNodeCreationContent( size_t bwd_in_slot_num = out_vars.size(); size_t bwd_out_slot_num = in_vars.size(); const char* GRAD_OP_NODE_TEMPLATE = - " auto grad_node = std::make_shared(%d, %d);\n"; + " auto grad_node = std::shared_ptr(new GradNode%s(%d, " + "%d));\n"; grad_node_creation_str += " // Create GradOpNode\n"; - grad_node_creation_str += paddle::string::Sprintf( - GRAD_OP_NODE_TEMPLATE, op_type, bwd_in_slot_num, bwd_out_slot_num); + grad_node_creation_str += + paddle::string::Sprintf(GRAD_OP_NODE_TEMPLATE, op_type, op_type, + bwd_in_slot_num, bwd_out_slot_num); grad_node_creation_str += "\n"; VLOG(6) << "Generated GradOpNode construction"; diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index 7339f3581ac1fd5ec50a8fd161a2723d49ec78a1..8d061c8929ae6b040b5a723097d2eb5d906b8d3f 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -650,8 +650,12 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase): num_backward_inputs = len(forward_outputs_position_map.keys()) num_backward_outputs = len(forward_inputs_position_map.keys()) grad_node_name = GetGradNodeName(forward_api_name) - - node_construction_str = f" auto grad_node = std::make_shared<{grad_node_name}>({num_backward_inputs}, {num_backward_outputs});" + # NOTE(Aurelius74): DO NOT use make_shared here. Because some Node contains experimental::Scalar + # which contains "complex128" as data. "complex128" is memory-aligned manually. But make_shared + # request MEMALIGN for allocation (Maybe). + # See https://stackoverflow.com/questions/31228656/how-can-shared-ptr-disrupt-alignment + # and https://github.com/MRtrix3/mrtrix3/issues/957 + node_construction_str = f" auto grad_node = std::shared_ptr<{grad_node_name}>(new {grad_node_name}({num_backward_inputs}, {num_backward_outputs}));" # SetAttributes set_attributes_list = [] diff --git a/python/paddle/utils/code_gen/api.yaml b/python/paddle/utils/code_gen/api.yaml index 09bfe746271092e5bd5777671893b5930ff76bdb..9a6059c53a7e57932aa9304d1018b5d98ae8509b 100644 --- a/python/paddle/utils/code_gen/api.yaml +++ b/python/paddle/utils/code_gen/api.yaml @@ -1373,7 +1373,15 @@ backward : tile_grad # no_need_buffer : x -# trace +- api : top_k + args : (Tensor x, Scalar k, int axis = -1, bool largest = true, bool sorted = true) + output : Tensor(out), Tensor(indices) + infer_meta : + func : TopKInferMeta + kernel : + func : top_k + backward : top_k_grad + - api : trace args : (Tensor x, int offset, int axis1, int axis2) output : Tensor diff --git a/python/paddle/utils/code_gen/backward.yaml b/python/paddle/utils/code_gen/backward.yaml index 43f512540ec4b24629b02dd0130831fb5387b97e..4f2b76db1a27f3428ad8a2d9f3172d02fae20657 100644 --- a/python/paddle/utils/code_gen/backward.yaml +++ b/python/paddle/utils/code_gen/backward.yaml @@ -866,6 +866,16 @@ kernel : func : tile_grad +- backward_api : top_k_grad + forward : top_k (Tensor x, Scalar k, int axis = -1, bool largest = true, bool sorted = true) -> Tensor(out), Tensor(indices) + args : (Tensor x, Tensor indices, Tensor out_grad, Scalar k = -1, int axis = -1, bool largest = true, bool sorted = true) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : top_k_grad + - backward_api : trace_grad forward : trace (Tensor x, int offset, int axis1, int axis2) -> Tensor(out) args : (Tensor x, Tensor out_grad, int offset, int axis1, int axis2)