From a3730dc87bc61593514b830727e36e5d19e753cd Mon Sep 17 00:00:00 2001 From: Sing_chan <51314274+betterpig@users.noreply.github.com> Date: Sun, 5 Jun 2022 11:11:31 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90code=20format=20check=20upgrade?= =?UTF-8?q?=E3=80=91=20step2=EF=BC=9Aclang-format=20(#42840)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .pre-commit-config.yaml | 4 + .../fluid/distributed/collective/HCCLTools.cc | 1 + .../fluid/distributed/collective/HCCLTools.h | 1 + .../fluid/distributed/collective/NCCLTools.cc | 1 + .../fluid/distributed/collective/NCCLTools.h | 4 +- .../distributed/collective/ProcessGroup.h | 1 - .../collective/ProcessGroupGloo.cc | 6 +- .../collective/ProcessGroupHCCL.cc | 20 +- .../distributed/collective/ProcessGroupHCCL.h | 5 +- .../collective/ProcessGroupHeter.cc | 27 +- .../collective/ProcessGroupNCCL.cc | 132 +-- .../distributed/collective/ProcessGroupNCCL.h | 3 +- .../fluid/distributed/collective/reducer.cc | 5 +- paddle/fluid/distributed/collective/reducer.h | 1 + .../fluid/distributed/common/afs_warpper.cc | 8 +- paddle/fluid/distributed/common/afs_warpper.h | 1 + paddle/fluid/distributed/common/cost_timer.h | 1 + .../fluid/distributed/common/local_random.h | 1 + paddle/fluid/distributed/common/registerer.h | 1 + .../distributed/fleet_executor/carrier.cc | 8 +- .../distributed/fleet_executor/carrier.h | 2 +- .../fleet_executor/compute_interceptor.cc | 2 +- .../distributed/fleet_executor/dist_model.cc | 9 +- .../distributed/fleet_executor/dist_model.h | 2 +- .../dist_model_tensor_wrapper.cc | 1 + .../dist_model_tensor_wrapper.h | 1 + .../fleet_executor/fleet_executor.cc | 3 +- .../fleet_executor/fleet_executor.h | 2 +- .../distributed/fleet_executor/interceptor.cc | 1 + .../distributed/fleet_executor/interceptor.h | 2 +- .../distributed/fleet_executor/message_bus.cc | 8 +- .../fleet_executor/message_service.cc | 1 + .../fleet_executor/runtime_graph.cc | 1 + .../fleet_executor/runtime_graph.h | 1 + .../fleet_executor/sink_interceptor.cc | 1 + .../fleet_executor/source_interceptor.cc | 1 + .../fleet_executor/task_loop_thread.cc | 5 +- .../fleet_executor/task_loop_thread_pool.cc | 20 +- .../distributed/fleet_executor/task_node.cc | 11 +- .../distributed/fleet_executor/task_node.h | 2 +- .../test/compute_interceptor_run_op_test.cc | 1 - .../test/compute_interceptor_test.cc | 1 - .../test/interceptor_ping_pong_test.cc | 1 - .../interceptor_ping_pong_with_brpc_test.cc | 2 +- .../interceptor_pipeline_long_path_test.cc | 1 - .../interceptor_pipeline_short_path_test.cc | 1 - .../test/sink_interceptor_test.cc | 1 - .../test/source_interceptor_test.cc | 1 - .../index_dataset/index_sampler.cc | 1 + .../distributed/index_dataset/index_sampler.h | 1 + .../index_dataset/index_wrapper.cc | 5 +- .../distributed/index_dataset/index_wrapper.h | 10 +- .../distributed/ps/service/brpc_ps_client.cc | 13 +- .../distributed/ps/service/brpc_ps_client.h | 1 + .../distributed/ps/service/brpc_ps_server.cc | 2 + .../fluid/distributed/ps/service/brpc_utils.h | 1 + .../ps/service/communicator/communicator.cc | 2 + .../ps/service/communicator/communicator.h | 9 +- paddle/fluid/distributed/ps/service/env.h | 2 + .../ps/service/graph_brpc_client.cc | 8 +- .../ps/service/graph_brpc_client.h | 3 +- .../ps/service/graph_brpc_server.cc | 3 +- .../ps/service/graph_brpc_server.h | 6 +- .../fluid/distributed/ps/service/ps_client.cc | 1 + .../fluid/distributed/ps/service/ps_client.h | 1 + .../distributed/ps/service/ps_local_client.cc | 5 +- .../distributed/ps/service/ps_local_client.h | 4 +- .../distributed/ps/service/ps_local_server.h | 5 +- .../ps/service/ps_service/graph_py_service.cc | 6 +- .../ps/service/ps_service/graph_py_service.h | 16 +- .../ps/service/ps_service/service.cc | 2 + paddle/fluid/distributed/ps/service/server.h | 1 + paddle/fluid/distributed/ps/table/accessor.h | 2 + .../ps/table/common_graph_table.cc | 5 +- .../distributed/ps/table/common_graph_table.h | 4 +- .../fluid/distributed/ps/table/common_table.h | 3 +- .../distributed/ps/table/ctr_accessor.cc | 2 + .../fluid/distributed/ps/table/ctr_accessor.h | 2 + .../ps/table/ctr_double_accessor.cc | 2 + .../ps/table/ctr_double_accessor.h | 2 + .../distributed/ps/table/ctr_dymf_accessor.cc | 2 + .../distributed/ps/table/ctr_dymf_accessor.h | 2 + .../distributed/ps/table/depends/dense.h | 3 +- .../ps/table/depends/feature_value.h | 4 +- .../ps/table/depends/geo_recorder.h | 1 + .../ps/table/depends/initializers.h | 3 +- .../ps/table/depends/rocksdb_warpper.h | 5 +- .../distributed/ps/table/graph/graph_edge.cc | 5 +- .../distributed/ps/table/graph/graph_edge.h | 4 +- .../distributed/ps/table/graph/graph_node.cc | 1 + .../distributed/ps/table/graph/graph_node.h | 1 + .../ps/table/graph/graph_weighted_sampler.cc | 2 + .../ps/table/graph/graph_weighted_sampler.h | 1 + .../distributed/ps/table/memory_dense_table.h | 2 + .../ps/table/memory_sparse_geo_table.h | 1 + .../ps/table/memory_sparse_table.cc | 14 +- .../ps/table/memory_sparse_table.h | 2 + .../distributed/ps/table/sparse_accessor.cc | 2 + .../distributed/ps/table/sparse_accessor.h | 2 + .../distributed/ps/table/sparse_sgd_rule.cc | 2 + .../distributed/ps/table/sparse_sgd_rule.h | 2 + .../distributed/ps/table/ssd_sparse_table.cc | 11 +- paddle/fluid/distributed/ps/table/table.cc | 4 +- paddle/fluid/distributed/ps/table/table.h | 2 + .../distributed/ps/table/tensor_accessor.cc | 1 + .../distributed/ps/table/tensor_accessor.h | 1 + paddle/fluid/distributed/ps/wrapper/fleet.cc | 3 +- paddle/fluid/distributed/ps/wrapper/fleet.h | 2 +- .../fluid/distributed/ps/wrapper/ps_wrapper.h | 2 +- paddle/fluid/distributed/store/tcp_store.cc | 3 +- paddle/fluid/distributed/store/tcp_utils.cc | 24 +- paddle/fluid/distributed/store/tcp_utils.h | 15 +- .../distributed/test/barrier_table_test.cc | 2 + .../test/brpc_service_dense_sgd_test.cc | 1 + .../test/brpc_service_sparse_sgd_test.cc | 1 + .../fluid/distributed/test/brpc_utils_test.cc | 4 +- .../distributed/test/ctr_accessor_test.cc | 2 + .../test/ctr_dymf_accessor_test.cc | 2 + .../distributed/test/dense_table_test.cc | 2 + .../distributed/test/feature_value_test.cc | 2 + .../distributed/test/graph_node_split_test.cc | 3 +- .../fluid/distributed/test/graph_node_test.cc | 6 +- .../test/graph_table_sample_test.cc | 5 +- .../distributed/test/memory_geo_table_test.cc | 2 +- .../test/memory_sparse_table_test.cc | 5 +- .../distributed/test/sparse_sgd_rule_test.cc | 2 + paddle/fluid/distributed/test/table_test.cc | 2 +- .../eager/accumulation/accumulation_node.cc | 13 +- .../eager/accumulation/accumulation_node.h | 3 +- paddle/fluid/eager/amp_utils.h | 1 + .../eager_generated/backwards/scale_node.cc | 11 +- .../eager_generated/backwards/scale_node.h | 3 +- .../eager_generated/forwards/scale.cc | 2 +- paddle/fluid/eager/api/utils/global_utils.h | 6 +- paddle/fluid/eager/api/utils/hook_utils.cc | 1 + paddle/fluid/eager/api/utils/tensor_utils.cc | 5 +- paddle/fluid/eager/backward.cc | 18 +- .../custom_operator/custom_operator_node.cc | 1 + paddle/fluid/eager/grad_node_info.cc | 15 +- paddle/fluid/eager/grad_node_info.h | 19 +- paddle/fluid/eager/grad_tensor_holder.cc | 2 +- paddle/fluid/eager/hooks.h | 1 + paddle/fluid/eager/pylayer/py_layer_node.cc | 13 +- paddle/fluid/eager/pylayer/py_layer_node.h | 3 +- .../accumulation_node_test.cc | 4 +- .../autograd_meta_test.cc | 4 +- .../data_structure_tests/eager_tensor_test.cc | 6 +- .../grad_node_info_test.cc | 8 +- .../data_structure_tests/grad_node_test.h | 4 +- .../grad_tensor_holder_test.cc | 7 +- .../tensor_wrapper_test.cc | 4 +- .../performance_tests/benchmark_eager_cpu.cc | 8 +- .../performance_tests/benchmark_eager_cuda.cc | 8 +- .../performance_tests/benchmark_fluid_cpu.cc | 1 - .../performance_tests/benchmark_fluid_cuda.cc | 1 - .../tests/performance_tests/benchmark_utils.h | 1 + .../eager/tests/task_tests/backward_test.cc | 11 +- .../cross_batch_accumulation_test.cc | 11 +- .../tests/task_tests/eager_utils_test.cc | 2 - .../tests/task_tests/forward_autograd_test.cc | 5 +- .../tests/task_tests/fwd_bwd_joint_test.cc | 8 +- .../eager/tests/task_tests/generated_test.cc | 7 +- .../fluid/eager/tests/task_tests/grad_test.cc | 5 +- .../fluid/eager/tests/task_tests/hook_test.cc | 11 +- .../task_tests/hook_test_intermidiate.cc | 6 +- .../tests/task_tests/nan_inf_utils_test.cc | 4 +- .../tests/task_tests/tensor_utils_test.cc | 5 +- paddle/fluid/eager/tests/test_utils.h | 8 +- .../eager/to_static/run_program_op_node.h | 6 +- paddle/fluid/eager/utils.h | 14 +- paddle/fluid/framework/archive.h | 2 + paddle/fluid/framework/async_executor.cc | 4 +- paddle/fluid/framework/async_executor.h | 2 + paddle/fluid/framework/attribute.h | 1 + paddle/fluid/framework/attribute_test.cc | 6 +- paddle/fluid/framework/channel.h | 2 + paddle/fluid/framework/convert_utils_test.cc | 1 + .../fluid/framework/copy_same_tensor_test.cc | 1 + paddle/fluid/framework/custom_operator.cc | 60 +- .../framework/data_device_transform_test.cu | 4 +- paddle/fluid/framework/data_feed.cc | 19 +- paddle/fluid/framework/data_feed_factory.cc | 1 + paddle/fluid/framework/data_feed_test.cc | 3 + paddle/fluid/framework/data_set.cc | 1 + paddle/fluid/framework/data_set.h | 2 + paddle/fluid/framework/data_type_test.cc | 2 +- .../framework/data_type_transform_test.cu | 3 +- .../bind_threaded_ssa_graph_executor.cc | 2 + .../bind_threaded_ssa_graph_executor.h | 2 + .../fluid/framework/details/bkcl_op_handle.h | 3 +- .../fluid/framework/details/build_strategy.cc | 1 + .../framework/details/build_strategy_test.cc | 4 +- .../fluid/framework/details/cow_ptr_test.cc | 1 + .../framework/details/execution_strategy.h | 1 + .../fast_threaded_ssa_graph_executor.h | 2 + .../details/fused_all_reduce_op_handle.cc | 16 +- .../grad_merge_all_reduce_op_handle.cc | 1 + .../fluid/framework/details/graph_test_base.h | 1 + .../framework/details/nan_inf_utils_detail.cc | 10 +- .../framework/details/nan_inf_utils_detail.cu | 5 +- paddle/fluid/framework/details/op_registry.h | 38 +- .../details/parallel_ssa_graph_executor.cc | 7 +- .../details/parallel_ssa_graph_executor.h | 1 + .../framework/details/reduce_op_handle.cc | 17 +- .../fluid/framework/details/rpc_op_handle.cc | 1 + .../details/scope_buffered_monitor.cc | 1 + .../scope_buffered_ssa_graph_executor.h | 2 + .../details/sparse_all_reduce_op_handle.cc | 16 +- paddle/fluid/framework/device_worker.cc | 7 +- .../fluid/framework/device_worker_factory.cc | 1 + paddle/fluid/framework/dlpack_tensor.cc | 1 + paddle/fluid/framework/dlpack_tensor_test.cc | 3 +- .../fluid/framework/downpour_lite_worker.cc | 8 +- paddle/fluid/framework/downpour_worker.cc | 23 +- paddle/fluid/framework/eigen_test.cc | 3 +- paddle/fluid/framework/executor.cc | 7 +- paddle/fluid/framework/executor_cache.cc | 1 + .../fluid/framework/executor_thread_worker.cc | 19 +- .../fluid/framework/executor_thread_worker.h | 1 + paddle/fluid/framework/feed_fetch_method.cc | 2 +- paddle/fluid/framework/fleet/ascend_wrapper.h | 9 +- paddle/fluid/framework/fleet/box_wrapper.cc | 22 +- paddle/fluid/framework/fleet/box_wrapper.cu | 15 +- paddle/fluid/framework/fleet/box_wrapper.h | 37 +- .../fluid/framework/fleet/box_wrapper_impl.h | 10 +- paddle/fluid/framework/fleet/fleet_wrapper.h | 1 + paddle/fluid/framework/fleet/gloo_wrapper.cc | 1 + paddle/fluid/framework/fleet/gloo_wrapper.h | 5 +- paddle/fluid/framework/fleet/heter_context.h | 1 + .../cudf/concurrent_unordered_map.cuh.h | 3 +- .../framework/fleet/heter_ps/gpu_graph_node.h | 5 +- .../fleet/heter_ps/graph_gpu_ps_table.h | 6 +- .../fleet/heter_ps/graph_gpu_ps_table_inl.cu | 40 +- .../fleet/heter_ps/graph_gpu_wrapper.cu | 4 +- .../fleet/heter_ps/graph_gpu_wrapper.h | 5 +- .../framework/fleet/heter_ps/graph_sampler.h | 6 +- .../fleet/heter_ps/graph_sampler_inl.h | 4 +- .../framework/fleet/heter_ps/hashtable.h | 2 + .../fleet/heter_ps/hashtable_kernel.cu | 38 +- .../framework/fleet/heter_ps/heter_comm.h | 2 + .../framework/fleet/heter_ps/heter_comm_inl.h | 1 + .../fleet/heter_ps/heter_comm_kernel.cu | 8 +- .../framework/fleet/heter_ps/heter_ps.cc | 1 + .../framework/fleet/heter_ps/heter_ps.cu | 1 + .../fluid/framework/fleet/heter_ps/heter_ps.h | 1 + .../framework/fleet/heter_ps/heter_ps_base.h | 1 + .../framework/fleet/heter_ps/heter_resource.h | 1 + .../framework/fleet/heter_ps/optimizer.cuh.h | 1 + .../framework/fleet/heter_ps/test_comm.cu | 2 + .../fleet/heter_ps/test_cpu_graph_sample.cu | 2 + .../fleet/heter_ps/test_cpu_query.cu | 2 + .../framework/fleet/heter_ps/test_graph.cu | 2 + .../fleet/heter_ps/test_sample_rate.cu | 20 +- paddle/fluid/framework/fleet/metrics.cc | 21 +- paddle/fluid/framework/fleet/metrics.h | 2 + .../fluid/framework/fleet/ps_gpu_wrapper.cu | 1 + paddle/fluid/framework/fleet/ps_gpu_wrapper.h | 1 + paddle/fluid/framework/fleet/test_fleet.cc | 1 + paddle/fluid/framework/generator.cc | 1 + paddle/fluid/framework/generator.h | 1 + paddle/fluid/framework/gpu_utils.h | 17 +- paddle/fluid/framework/grad_op_desc_maker.h | 6 +- paddle/fluid/framework/heter_service.h | 1 + paddle/fluid/framework/hetercpu_worker.cc | 23 +- paddle/fluid/framework/heterxpu_trainer.cc | 1 + .../fluid/framework/infershape_utils_test.cc | 4 +- paddle/fluid/framework/inplace_op_inference.h | 1 + .../framework/io/crypto/aes_cipher_test.cc | 3 + paddle/fluid/framework/io/crypto/cipher.cc | 1 + .../fluid/framework/io/crypto/cipher_utils.cc | 1 + .../framework/io/crypto/cipher_utils_test.cc | 5 +- paddle/fluid/framework/io/fs.cc | 1 + paddle/fluid/framework/io/fs.h | 1 + paddle/fluid/framework/io/test_fs.cc | 2 + ...ptive_pool2d_convert_global_pass_tester.cc | 4 +- .../framework/ir/add_support_int8_pass.cc | 5 +- .../framework/ir/coalesce_grad_tensor_pass.cc | 2 + .../framework/ir/conv_bn_fuse_pass_tester.cc | 4 +- paddle/fluid/framework/ir/cost_model.cc | 1 + paddle/fluid/framework/ir/cost_model_test.cc | 1 + .../ir/cudnn_placement_pass_tester.cc | 4 +- .../framework/ir/delete_dropout_op_pass.cc | 4 +- .../ir/delete_fill_constant_op_pass.cc | 1 + .../ir/delete_quant_dequant_filter_op_pass.cc | 7 +- ...ding_eltwise_layernorm_fuse_pass_tester.cc | 4 +- .../ir/embedding_fc_lstm_fuse_pass.cc | 1 + ..._elementwise_layernorm_fuse_pass_tester.cc | 3 +- paddle/fluid/framework/ir/fc_fuse_pass.cc | 1 + .../fluid/framework/ir/fc_fuse_pass_tester.cc | 4 +- .../framework/ir/fc_gru_fuse_pass_tester.h | 4 +- .../fluid/framework/ir/fc_lstm_fuse_pass.cc | 1 + .../framework/ir/fc_lstm_fuse_pass_tester.h | 4 +- .../ir/fillconstant_elementwisemul_fuse.h | 1 + paddle/fluid/framework/ir/fuse_bn_act_pass.cc | 2 + .../framework/ir/fuse_bn_add_act_pass.cc | 2 + .../framework/ir/fuse_elewise_add_act_pass.cc | 2 + .../framework/ir/fuse_gemm_epilogue_pass.cc | 2 + .../fuse_adam_op_pass.cc | 1 + .../fuse_momentum_op_pass.cc | 5 +- .../fuse_optimizer_op_pass.cc | 1 + .../ir/fuse_relu_depthwise_conv_pass.cc | 2 + .../ir/fusion_group/code_generator.cc | 1 + .../ir/fusion_group/code_generator_helper.cc | 1 + .../ir/fusion_group/code_generator_tester.cc | 1 + .../elementwise_group_detector.cc | 1 + .../ir/fusion_group/fusion_group_pass.cc | 1 + .../fusion_group/fusion_group_pass_tester.cc | 4 +- .../framework/ir/fusion_group/operation.cc | 1 + .../framework/ir/fusion_group/subgraph.h | 1 + paddle/fluid/framework/ir/generate_pass.cc | 320 +++---- .../framework/ir/generate_pass_tester.cc | 10 +- .../ir/gpu_cpu_map_matmul_to_mul_pass.cc | 2 +- paddle/fluid/framework/ir/graph.cc | 3 +- paddle/fluid/framework/ir/graph.h | 1 + paddle/fluid/framework/ir/graph_helper.cc | 7 +- .../fluid/framework/ir/graph_helper_test.cc | 5 +- .../framework/ir/graph_pattern_detector.cc | 15 +- .../ir/graph_pattern_detector_tester.cc | 12 +- paddle/fluid/framework/ir/graph_printer.h | 2 + paddle/fluid/framework/ir/graph_test.cc | 1 + .../framework/ir/graph_to_program_pass.cc | 1 + paddle/fluid/framework/ir/graph_traits.cc | 17 +- paddle/fluid/framework/ir/graph_viz_pass.cc | 2 + .../ir/identity_scale_op_clean_pass.cc | 67 +- .../fluid/framework/ir/ipu/avg_shard_pass.cc | 3 +- .../framework/ir/ipu/infer_shape_pass.cc | 1 + .../ir/ipu/inference_process_pass.cc | 5 +- .../ir/ipu/optimizer_state_align_pass.cc | 1 + .../fluid/framework/ir/is_test_pass_tester.cc | 4 +- .../framework/ir/layer_norm_fuse_pass.cc | 3 +- .../framework/ir/lock_free_optimize_pass.h | 50 +- .../framework/ir/matmul_scale_fuse_pass.cc | 2 +- ...uffer_shared_cross_op_memory_reuse_pass.cc | 10 +- .../buffer_shared_inplace_op_pass.cc | 5 +- .../memory_optimization_var_info.h | 1 + .../ir/memory_optimize_pass/op_graph_view.h | 10 +- .../recurrent_op_eager_deletion_pass.cc | 2 +- .../share_varinfo_into_cinn_pass.cc | 1 + .../share_varinfo_into_cinn_pass_test.cc | 1 + .../ir/mixed_precision_configure_pass.cc | 8 +- .../ir/mkldnn/batch_norm_act_fuse_pass.cc | 1 + .../mkldnn/batch_norm_act_fuse_pass_tester.cc | 57 +- .../compute_propagate_scales_mkldnn_pass.cc | 4 +- .../compute_propagate_scales_mkldnn_pass.h | 1 + ...conv_activation_mkldnn_fuse_pass_tester.cc | 5 +- .../ir/mkldnn/conv_bias_mkldnn_fuse_pass.h | 8 +- .../conv_bias_mkldnn_fuse_pass_tester.cc | 6 +- ...onv_concat_relu_mkldnn_fuse_pass_tester.cc | 4 +- .../framework/ir/mkldnn/cpu_bfloat16_pass.cc | 2 +- .../framework/ir/mkldnn/cpu_quantize_pass.cc | 3 +- .../ir/mkldnn/cpu_quantize_pass_tester.cc | 3 +- .../cpu_quantize_placement_pass_tester.cc | 4 +- .../depthwise_conv_mkldnn_pass_tester.cc | 3 +- .../ir/mkldnn/elt_act_mkldnn_fuse_pass.cc | 1 + .../ir/mkldnn/fc_act_mkldnn_fuse_pass.cc | 1 + .../mkldnn/fc_act_mkldnn_fuse_pass_tester.cc | 32 +- .../fc_elementwise_add_mkldnn_fuse_pass.cc | 1 + .../int8_scale_calculation_mkldnn_pass.cc | 16 +- ...t8_scale_calculation_mkldnn_pass_tester.cc | 3 +- .../ir/mkldnn/interpolate_mkldnn_pass.cc | 2 + .../matmul_transpose_reshape_fuse_pass.cc | 3 + ...tmul_transpose_reshape_fuse_pass_tester.cc | 1 + .../matmul_v2_transpose_reshape_fuse_pass.cc | 2 + .../mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc | 3 +- .../mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc | 1 + .../ir/mkldnn/mkldnn_inplace_pass.cc | 2 + .../framework/ir/mkldnn/mkldnn_inplace_pass.h | 1 + .../ir/mkldnn/mkldnn_inplace_pass_tester.cc | 5 +- .../framework/ir/mkldnn/mkldnn_pass_util.h | 1 + .../ir/mkldnn/mkldnn_placement_pass_tester.cc | 5 +- .../ir/mkldnn/multi_gru_fuse_pass.cc | 2 + .../framework/ir/mkldnn/multi_gru_fuse_pass.h | 1 + .../ir/mkldnn/multi_gru_fuse_pass_tester.cc | 3 +- .../ir/mkldnn/multi_gru_seq_fuse_pass.cc | 2 + .../ir/mkldnn/multi_gru_seq_fuse_pass.h | 1 + .../mkldnn/multi_gru_seq_fuse_pass_tester.cc | 4 +- .../ir/mkldnn/quant_dequant_mkldnn_pass.cc | 11 +- .../ir/mkldnn/quant_dequant_mkldnn_pass.h | 1 + ...shape_transpose_matmul_mkldnn_fuse_pass.cc | 2 + ...ranspose_matmul_mkldnn_fuse_pass_tester.cc | 4 +- ...pe_transpose_matmul_v2_mkldnn_fuse_pass.cc | 2 + .../mkldnn/scale_matmul_fuse_pass_tester.cc | 3 +- .../shuffle_channel_mkldnn_detect_pass.cc | 3 +- ...uffle_channel_mkldnn_detect_pass_tester.cc | 1 + .../softplus_activation_mkldnn_fuse_pass.cc | 1 + ...plus_activation_mkldnn_fuse_pass_tester.cc | 45 +- .../framework/ir/multi_batch_merge_pass.cc | 1 + .../add_reader_dependency_pass.cc | 1 + .../fix_op_run_order_pass.cc | 1 + .../fuse_all_reduce_op_pass.cc | 1 + .../multi_devices_graph_pass.cc | 32 +- .../multi_devices_graph_pass.h | 2 +- .../set_reader_device_info_utils.cc | 1 + .../ir/multihead_matmul_fuse_pass.cc | 37 +- .../ir/multihead_matmul_fuse_pass_tester.cc | 3 +- paddle/fluid/framework/ir/node_test.cc | 1 + .../framework/ir/op_compat_sensible_pass.cc | 2 + .../framework/ir/op_compat_sensible_pass.h | 1 + .../ir/op_compat_sensible_pass_tester.cc | 2 +- paddle/fluid/framework/ir/pass.cc | 8 +- paddle/fluid/framework/ir/pass_test.cc | 10 +- paddle/fluid/framework/ir/pass_test_util.cc | 3 +- .../fluid/framework/ir/pass_tester_helper.h | 1 + .../fluid/framework/ir/placement_pass_base.cc | 2 + .../ir/preln_skip_layernorm_fuse_pass.cc | 4 +- .../ir/repeated_fc_relu_fuse_pass.cc | 13 +- .../ir/repeated_fc_relu_fuse_pass_tester.cc | 4 +- .../ir/runtime_context_cache_pass.cc | 1 + .../ir/seqconv_eltadd_relu_fuse_pass.cc | 1 + .../framework/ir/seqpool_concat_fuse_pass.cc | 4 +- .../ir/seqpool_concat_fuse_pass_tester.cc | 3 +- .../ir/seqpool_cvm_concat_fuse_pass.cc | 36 +- .../ir/seqpool_cvm_concat_fuse_pass_tester.cc | 3 +- .../ir/shuffle_channel_detect_pass.cc | 3 +- .../ir/simplify_with_basic_ops_pass_tester.cc | 4 +- .../framework/ir/skip_layernorm_fuse_pass.cc | 4 +- .../ir/skip_layernorm_fuse_pass_tester.cc | 4 +- .../framework/ir/squared_mat_sub_fuse_pass.cc | 10 +- .../ir/sync_batch_norm_pass_tester.cc | 1 + .../ir/transpose_flatten_concat_fuse_pass.cc | 1 + .../ir/trt_map_matmul_to_mul_pass.cc | 2 +- .../ir/trt_multihead_matmul_fuse_pass.cc | 37 +- .../ir/trt_skip_layernorm_fuse_pass.cc | 4 +- .../ir/unsqueeze2_eltwise_fuse_pass_tester.cc | 4 +- .../fluid/framework/ir/yolo_box_fuse_pass.cc | 2 + paddle/fluid/framework/lod_tensor.h | 1 + paddle/fluid/framework/lod_tensor_array.h | 1 + paddle/fluid/framework/lod_tensor_test.cc | 3 +- paddle/fluid/framework/naive_executor.cc | 2 + paddle/fluid/framework/naive_executor_test.cc | 3 + .../framework/new_executor/data_transfer.cc | 7 +- .../framework/new_executor/event_manager.cc | 1 + .../new_executor/executor_statistics.cc | 6 +- .../new_executor/executor_statistics.h | 1 + .../event_garbage_collector.cc | 4 +- .../event_garbage_collector.h | 1 + .../garbage_collector/garbage_collector.cc | 1 + .../garbage_collector/garbage_collector.h | 1 + .../framework/new_executor/interpretercore.cc | 20 +- .../new_executor/interpretercore_util.cc | 28 +- .../new_executor/interpretercore_util.h | 3 +- .../new_executor/new_executor_defs.cc | 22 +- .../new_executor/standalone_executor.cc | 1 + .../new_executor/standalone_executor_test.cc | 1 + .../framework/new_executor/stream_analyzer.cc | 1 + .../framework/new_executor/stream_analyzer.h | 1 + .../new_executor/workqueue/event_count.h | 1 + .../new_executor/workqueue/events_waiter.cc | 2 + .../new_executor/workqueue/events_waiter.h | 1 + .../workqueue/nonblocking_threadpool.h | 1 + .../new_executor/workqueue/run_queue.h | 26 +- .../new_executor/workqueue/workqueue.cc | 15 +- .../new_executor/workqueue/workqueue.h | 16 +- .../new_executor/workqueue/workqueue_test.cc | 14 +- .../new_executor/workqueue/workqueue_utils.cc | 1 + .../new_executor/workqueue/workqueue_utils.h | 1 + .../no_need_buffer_vars_inference.cc | 2 + .../no_need_buffer_vars_inference_test.cc | 1 + paddle/fluid/framework/op_def_api.cc | 2 + paddle/fluid/framework/op_def_api.h | 4 +- paddle/fluid/framework/op_desc.cc | 28 +- paddle/fluid/framework/op_proto_maker.h | 1 + paddle/fluid/framework/op_registry_test.cc | 9 +- paddle/fluid/framework/op_version_proto.h | 1 + .../framework/op_version_registry_test.cc | 4 +- paddle/fluid/framework/operator.cc | 19 +- paddle/fluid/framework/operator.h | 17 +- .../framework/operator_exception_test.cc | 3 +- .../fluid/framework/operator_kernel_configs.h | 1 + paddle/fluid/framework/operator_test.cc | 4 +- .../framework/paddle2cinn/build_cinn_pass.cc | 26 +- .../paddle2cinn/build_cinn_pass_test.cc | 1 - .../framework/paddle2cinn/cinn_cache_key.cc | 2 +- .../paddle2cinn/cinn_cache_key_test.cc | 2 + .../framework/paddle2cinn/cinn_compiler.cc | 6 +- .../framework/paddle2cinn/cinn_compiler.h | 1 + .../paddle2cinn/cinn_compiler_test.cc | 2 +- .../paddle2cinn/cinn_graph_symbolization.cc | 2 + .../paddle2cinn/cinn_graph_symbolization.h | 2 + .../cinn_graph_symbolization_test.cc | 4 +- .../framework/paddle2cinn/transform_desc.h | 3 + .../paddle2cinn/transform_desc_test.cc | 2 + .../framework/paddle2cinn/transform_type.cc | 1 + .../framework/paddle2cinn/transform_type.h | 2 +- .../paddle2cinn/transform_type_test.cc | 1 + paddle/fluid/framework/parallel_executor.cc | 5 +- paddle/fluid/framework/parallel_executor.h | 2 +- paddle/fluid/framework/phi_utils.cc | 4 +- paddle/fluid/framework/phi_utils.h | 3 +- paddle/fluid/framework/phi_utils_test.cc | 1 + paddle/fluid/framework/program_desc.cc | 1 + paddle/fluid/framework/program_desc.h | 1 + paddle/fluid/framework/program_processing.cc | 1 + paddle/fluid/framework/prune.cc | 1 + paddle/fluid/framework/prune_test.cc | 1 + paddle/fluid/framework/ps_gpu_trainer.cc | 1 + paddle/fluid/framework/pull_dense_worker.cc | 1 + paddle/fluid/framework/reader.cc | 1 + paddle/fluid/framework/save_load_util.cc | 5 +- paddle/fluid/framework/save_load_util_test.cc | 3 +- paddle/fluid/framework/scope_guard.h | 13 +- paddle/fluid/framework/scope_guard_test.cc | 1 + paddle/fluid/framework/section_worker.cc | 1 + paddle/fluid/framework/selected_rows_utils.h | 3 +- .../framework/selected_rows_utils_test.cc | 4 +- paddle/fluid/framework/string_array.cc | 3 +- paddle/fluid/framework/tensor.h | 5 +- paddle/fluid/framework/tensor_impl.h | 13 +- paddle/fluid/framework/tensor_test.cc | 1 + paddle/fluid/framework/tensor_util.cc | 14 +- paddle/fluid/framework/tensor_util_test.cc | 98 +- paddle/fluid/framework/threadpool.cc | 5 +- paddle/fluid/framework/threadpool_test.cc | 2 + paddle/fluid/framework/trainer.cc | 1 + paddle/fluid/framework/trainer_factory.cc | 1 + paddle/fluid/framework/trainer_test.cc | 5 +- paddle/fluid/framework/type_defs.h | 1 + paddle/fluid/framework/unused_var_check.cc | 1 + paddle/fluid/framework/unused_var_check.h | 1 + paddle/fluid/framework/var_desc.cc | 14 +- .../framework/var_type_inference_test.cc | 9 +- paddle/fluid/framework/var_type_traits.cc | 2 + .../fluid/framework/var_type_traits_test.cc | 3 +- paddle/fluid/framework/version.cc | 4 +- paddle/fluid/framework/version_test.cc | 1 + paddle/fluid/imperative/all_reduce.cc | 1 + paddle/fluid/imperative/amp_auto_cast.cc | 7 +- paddle/fluid/imperative/basic_engine.h | 1 + paddle/fluid/imperative/bkcl_context.cc | 12 +- paddle/fluid/imperative/cncl_context.cc | 18 +- paddle/fluid/imperative/data_loader.cc | 1 + paddle/fluid/imperative/data_loader.h | 1 + paddle/fluid/imperative/execution_context.h | 1 + paddle/fluid/imperative/flags.cc | 1 + paddle/fluid/imperative/gloo_context.cc | 1 + paddle/fluid/imperative/gloo_context.h | 1 + .../fluid/imperative/gradient_accumulator.cc | 12 +- .../fluid/imperative/gradient_accumulator.h | 1 + paddle/fluid/imperative/hccl_context.cc | 18 +- .../fluid/imperative/infer_var_type_context.h | 1 + .../imperative/jit/program_desc_tracer.cc | 1 + paddle/fluid/imperative/layer.cc | 8 +- paddle/fluid/imperative/layout_autotune.cc | 6 +- paddle/fluid/imperative/layout_autotune.h | 2 + paddle/fluid/imperative/nccl_context.cc | 11 +- paddle/fluid/imperative/op_base.h | 1 + .../fluid/imperative/partial_grad_engine.cc | 1 + paddle/fluid/imperative/partial_grad_engine.h | 1 + paddle/fluid/imperative/prepared_operator.cc | 4 +- paddle/fluid/imperative/prepared_operator.h | 3 +- paddle/fluid/imperative/profiler.cc | 2 + paddle/fluid/imperative/py_layer_fwd.h | 6 +- paddle/fluid/imperative/reducer.cc | 12 +- paddle/fluid/imperative/reducer.h | 1 + .../imperative/tests/bkcl_context_test.cc | 4 +- .../imperative/tests/cncl_context_test.cc | 6 +- .../tests/heter_ccl_context_test.cc | 6 +- .../imperative/tests/nccl_context_test.cc | 6 +- paddle/fluid/imperative/tests/test_eager.cc | 5 +- .../tests/test_gradient_accmulator.cc | 4 +- paddle/fluid/imperative/tests/test_group.cc | 2 +- .../fluid/imperative/tests/test_prepare_op.cc | 2 + paddle/fluid/imperative/tracer.cc | 2 + paddle/fluid/imperative/tracer.h | 1 + paddle/fluid/imperative/var_helper.h | 1 + .../fluid/inference/analysis/analysis_pass.h | 1 + paddle/fluid/inference/analysis/analyzer.cc | 2 + paddle/fluid/inference/analysis/analyzer.h | 1 + .../inference/analysis/analyzer_tester.cc | 4 +- paddle/fluid/inference/analysis/dot.h | 1 + paddle/fluid/inference/analysis/dot_tester.cc | 5 +- paddle/fluid/inference/analysis/helper.h | 6 +- .../inference/analysis/ir_pass_manager.cc | 4 +- .../inference/analysis/ir_pass_manager.h | 1 + .../analysis/ir_passes/dlnne_subgraph_pass.cc | 41 +- .../analysis/ir_passes/lite_subgraph_pass.cc | 20 +- .../analysis/ir_passes/lite_subgraph_pass.h | 2 + .../ir_passes/lite_subgraph_pass_tester.cc | 5 +- .../analysis/passes/ir_analysis_pass.cc | 2 + .../analysis/passes/ir_analysis_pass.h | 1 + .../analysis/passes/ir_graph_build_pass.cc | 2 + .../analysis/passes/ir_graph_build_pass.h | 1 + .../passes/ir_graph_to_program_pass.cc | 1 + .../passes/ir_graph_to_program_pass.h | 1 + .../ir_params_sync_among_devices_pass.cc | 1 + .../analysis/passes/memory_optimize_pass.cc | 3 +- .../analysis/passes/memory_optimize_pass.h | 19 +- .../fluid/inference/analysis/passes/passes.cc | 1 + .../fluid/inference/analysis/passes/passes.h | 1 + paddle/fluid/inference/analysis/ut_helper.h | 2 + paddle/fluid/inference/api/analysis_config.cc | 6 +- .../fluid/inference/api/analysis_predictor.cc | 7 +- .../fluid/inference/api/analysis_predictor.h | 8 +- .../api/analysis_predictor_tester.cc | 2 + paddle/fluid/inference/api/api.cc | 1 + paddle/fluid/inference/api/api_impl.cc | 9 +- paddle/fluid/inference/api/api_impl.h | 1 + paddle/fluid/inference/api/api_tester.cc | 1 + .../api/demo_ci/onnxruntime_mobilenet_demo.cc | 2 + .../api/demo_ci/trt_mobilenet_demo.cc | 1 + paddle/fluid/inference/api/demo_ci/utils.h | 2 + .../fluid/inference/api/demo_ci/vis_demo.cc | 1 + .../api/demo_ci/windows_mobilenet.cc | 3 +- .../inference/api/details/zero_copy_tensor.cc | 10 +- paddle/fluid/inference/api/helper.cc | 1 + paddle/fluid/inference/api/helper.h | 6 +- paddle/fluid/inference/api/infer_context.h | 10 +- .../fluid/inference/api/mkldnn_quantizer.cc | 17 +- paddle/fluid/inference/api/mkldnn_quantizer.h | 1 + .../inference/api/mkldnn_quantizer_tester.cc | 3 +- .../inference/api/onnxruntime_predictor.h | 6 +- .../api/onnxruntime_predictor_tester.cc | 5 +- .../inference/api/paddle_analysis_config.h | 13 +- paddle/fluid/inference/api/paddle_api.h | 11 +- .../inference/api/paddle_infer_contrib.cc | 1 + .../inference/api/paddle_pass_builder.cc | 1 + paddle/fluid/inference/api/resource_manager.h | 1 + paddle/fluid/inference/capi/c_api.cc | 1 + paddle/fluid/inference/capi/c_api_internal.h | 1 + paddle/fluid/inference/capi/pd_config.cc | 1 + paddle/fluid/inference/capi/pd_predictor.cc | 1 + paddle/fluid/inference/capi/pd_tensor.cc | 1 + paddle/fluid/inference/capi_exp/lod_demo.cc | 2 + paddle/fluid/inference/capi_exp/pd_config.cc | 1 + .../fluid/inference/capi_exp/pd_predictor.cc | 1 + paddle/fluid/inference/capi_exp/pd_tensor.cc | 1 + paddle/fluid/inference/capi_exp/pd_utils.cc | 3 +- .../com_baidu_paddle_inference_Config.cpp | 3 +- .../com_baidu_paddle_inference_Predictor.cpp | 2 + .../com_baidu_paddle_inference_Tensor.cpp | 2 + .../javaapi/native/jni_convert_util.h | 5 +- paddle/fluid/inference/io.h | 1 + paddle/fluid/inference/lite/engine.cc | 1 + paddle/fluid/inference/lite/op_teller.cc | 3 +- paddle/fluid/inference/lite/op_teller.h | 1 + paddle/fluid/inference/lite/tensor_utils.cc | 6 +- .../fluid/inference/lite/test_engine_lite.cc | 6 +- .../fluid/inference/lite/test_tensor_utils.cc | 5 +- .../tensorrt/convert/activation_op.cc | 1 + .../tensorrt/convert/deformable_conv_op.cc | 1 + .../convert/flatten_contiguous_range_op.cc | 9 +- .../tensorrt/convert/group_norm_op.cc | 1 + .../tensorrt/convert/io_converter.cc | 19 +- .../inference/tensorrt/convert/io_converter.h | 1 + .../tensorrt/convert/multiclass_nms3_op.cc | 1 + .../tensorrt/convert/multiclass_nms_op.cc | 1 + .../inference/tensorrt/convert/op_converter.h | 11 +- .../inference/tensorrt/convert/softmax_op.cc | 1 + .../tensorrt/convert/test_activation_op.cc | 1 + .../tensorrt/convert/test_batch_norm_op.cc | 1 + .../tensorrt/convert/test_concat_op.cc | 1 + .../tensorrt/convert/test_conv2d_op.cc | 1 + .../tensorrt/convert/test_dropout_op.cc | 1 + .../tensorrt/convert/test_elementwise_op.cc | 1 + .../inference/tensorrt/convert/test_fc_op.cc | 1 + .../tensorrt/convert/test_io_converter.cc | 1 + .../tensorrt/convert/test_leaky_relu_op.cc | 1 + .../tensorrt/convert/test_mish_op.cc | 1 + .../inference/tensorrt/convert/test_mul_op.cc | 1 + .../convert/test_nearest_interp_v2_op.cc | 1 + .../tensorrt/convert/test_op_converter.cc | 3 +- .../inference/tensorrt/convert/test_pad_op.cc | 1 + .../tensorrt/convert/test_pool2d_op.cc | 2 + .../tensorrt/convert/test_prelu_op.cc | 1 + .../convert/test_shuffle_channel_op.cc | 1 + .../tensorrt/convert/test_softmax_op.cc | 1 + .../tensorrt/convert/test_split_op.cc | 1 + .../tensorrt/convert/test_swish_op.cc | 1 + .../inference/tensorrt/convert/unary_op.cc | 2 + .../inference/tensorrt/convert/yolo_box_op.cc | 1 + paddle/fluid/inference/tensorrt/engine.cc | 1 + paddle/fluid/inference/tensorrt/engine.h | 3 +- paddle/fluid/inference/tensorrt/helper.h | 2 + paddle/fluid/inference/tensorrt/op_teller.cc | 2 + paddle/fluid/inference/tensorrt/op_teller.h | 1 + .../plugin/anchor_generator_op_plugin.cu | 1 + .../plugin/deformable_conv_op_plugin.cu | 35 +- .../tensorrt/plugin/elementwise_op_plugin.cu | 19 +- .../tensorrt/plugin/elementwise_op_plugin.h | 1 + .../plugin/emb_eltwise_layernorm_plugin.cu | 11 +- .../tensorrt/plugin/gather_nd_op_plugin.h | 2 + .../tensorrt/plugin/gelu_op_plugin.cu | 30 +- .../tensorrt/plugin/gelu_op_plugin.h | 2 + .../tensorrt/plugin/hard_swish_op_plugin.cu | 1 + .../tensorrt/plugin/hard_swish_op_plugin.h | 2 + .../plugin/instance_norm_op_plugin.cu | 2 + .../tensorrt/plugin/layer_norm_op_plugin.cu | 2 + .../tensorrt/plugin/layer_norm_op_plugin.h | 1 + .../tensorrt/plugin/matmul_op_int8_plugin.h | 2 +- .../tensorrt/plugin/mish_op_plugin.cu | 37 +- .../tensorrt/plugin/mish_op_plugin.h | 2 + .../tensorrt/plugin/pool3d_op_plugin.cu | 9 +- .../tensorrt/plugin/pool3d_op_plugin.h | 2 + .../tensorrt/plugin/pool_op_plugin.cu | 9 +- .../tensorrt/plugin/pool_op_plugin.h | 2 + .../tensorrt/plugin/prelu_op_plugin.cu | 9 +- .../tensorrt/plugin/prelu_op_plugin.h | 2 +- .../tensorrt/plugin/qkv_to_context_plugin.cu | 19 +- .../tensorrt/plugin/recover_padding_plugin.h | 2 +- .../tensorrt/plugin/remove_padding_plugin.h | 2 +- .../tensorrt/plugin/roi_align_op_plugin.cu | 14 +- .../plugin/skip_layernorm_op_plugin.cu | 7 +- .../tensorrt/plugin/slice_op_plugin.cu | 16 +- .../tensorrt/plugin/split_op_plugin.cu | 2 + .../tensorrt/plugin/split_op_plugin.h | 2 + .../tensorrt/plugin/stack_op_plugin.cu | 6 +- .../tensorrt/plugin/stack_op_plugin.h | 2 + .../tensorrt/plugin/swish_op_plugin.cu | 15 +- .../tensorrt/plugin/test_split_plugin.cc | 1 + .../plugin/transformer_input_convert_plugin.h | 2 +- .../inference/tensorrt/plugin/trt_plugin.h | 1 + .../tensorrt/plugin/trt_plugin_utils.h | 1 + .../tensorrt/plugin/yolo_box_head_op_plugin.h | 1 + .../fluid/inference/tensorrt/test_tensorrt.cc | 1 + .../inference/tensorrt/trt_int8_calibrator.h | 1 + .../tests/api/analyzer_capi_exp_gpu_tester.cc | 2 + .../tests/api/analyzer_capi_exp_int_tester.cc | 2 + .../tests/api/analyzer_capi_exp_ner_tester.cc | 2 + .../api/analyzer_capi_exp_pd_config_tester.cc | 2 + .../api/analyzer_capi_exp_pd_tensor_tester.cc | 2 + .../analyzer_capi_exp_pd_threads_tester.cc | 2 + .../tests/api/analyzer_capi_exp_xpu_tester.cc | 2 + .../tests/api/analyzer_capi_gpu_tester.cc | 2 + .../tests/api/analyzer_capi_int_tester.cc | 2 + .../tests/api/analyzer_capi_ner_tester.cc | 2 + .../api/analyzer_capi_pd_tensor_tester.cc | 7 +- .../tests/api/analyzer_capi_tester.cc | 2 + .../tests/api/analyzer_capi_xpu_tester.cc | 2 + .../tests/api/analyzer_dam_tester.cc | 1 + ...nalyzer_detect_functional_mkldnn_tester.cc | 2 + .../tests/api/analyzer_detect_tester.cc | 2 + .../analyzer_image_classification_tester.cc | 1 + ...alyzer_int8_image_classification_tester.cc | 1 + .../analyzer_int8_object_detection_tester.cc | 1 + .../tests/api/analyzer_lac_tester.cc | 5 +- .../analyzer_lexical_analysis_gru_tester.cc | 22 +- .../tests/api/analyzer_mmp_tester.cc | 9 +- .../api/analyzer_paddle_tensor_tester.cc | 3 +- ...lyzer_quant_image_classification_tester.cc | 1 + .../tests/api/analyzer_seq_conv1_tester.cc | 5 +- ...yzer_seq_pool1_compare_determine_tester.cc | 1 + .../api/analyzer_seq_pool1_compare_tester.cc | 1 + ...seq_pool1_fuse_compare_zero_copy_tester.cc | 1 + .../analyzer_seq_pool1_fuse_statis_tester.cc | 1 + .../api/analyzer_seq_pool1_profile_tester.cc | 1 + .../api/analyzer_seq_pool1_tester_helper.h | 1 + .../api/analyzer_transformer_tester_helper.h | 1 + .../tests/api/analyzer_vis_tester.cc | 2 + .../tests/api/analyzer_vit_ocr_tester.cc | 1 + .../api/analyzer_zerocopy_tensor_tester.cc | 3 +- .../inference/tests/api/config_printer.h | 1 + .../tests/api/ipu_resnet50_fp16_test.cc | 1 + .../inference/tests/api/ipu_resnet50_test.cc | 1 + .../tests/api/ipu_word2vec_sample.cc | 2 +- .../tests/api/lite_mul_model_test.cc | 3 +- .../inference/tests/api/lite_resnet50_test.cc | 1 + .../api/mkldnn_quantizer_config_tester.cc | 8 +- .../paddle_infer_api_copy_tensor_tester.cc | 2 + .../api/paddle_infer_api_errors_tester.cc | 1 - .../tests/api/paddle_infer_api_test.cc | 3 +- .../fluid/inference/tests/api/tester_helper.h | 4 +- .../tests/api/trt_cascade_rcnn_test.cc | 2 +- ...e_ernie_fp16_serialize_deserialize_test.cc | 2 +- ..._shape_ernie_serialize_deserialize_test.cc | 2 +- ...c_shape_ernie_serialize_deserialize_test.h | 2 +- .../tests/api/trt_dynamic_shape_ernie_test.cc | 79 +- .../tests/api/trt_dynamic_shape_test.cc | 2 +- ...rt_dynamic_shape_transformer_prune_test.cc | 2 +- .../inference/tests/api/trt_fc_prelu_test.cc | 2 +- .../api/trt_instance_norm_converter_test.cc | 2 +- .../inference/tests/api/trt_mobilenet_test.cc | 2 +- .../tests/api/trt_quant_int8_test.cc | 3 +- .../api/trt_quant_int8_yolov3_r50_test.cc | 3 +- .../inference/tests/api/trt_resnet50_test.cc | 2 +- .../inference/tests/api/trt_resnext_test.cc | 2 +- .../tests/api/trt_split_converter_test.cc | 2 +- .../inference/tests/api/trt_test_helper.h | 2 +- .../inference/tests/infer_ut/test_suite.h | 8 +- .../fluid/inference/utils/benchmark_tester.cc | 3 +- paddle/fluid/inference/utils/io_utils.cc | 5 +- .../fluid/inference/utils/io_utils_tester.cc | 4 +- paddle/fluid/inference/utils/singleton.h | 1 + .../inference/utils/table_printer_tester.cc | 3 +- .../memory/allocation/allocator_facade.cc | 1 + .../memory/allocation/allocator_facade.h | 1 + .../allocator_facade_abs_flags_test.cc | 3 +- .../auto_growth_best_fit_allocator.cc | 1 + ...o_growth_best_fit_allocator_facade_test.cc | 2 + .../auto_growth_best_fit_allocator_test.cc | 6 +- .../memory/allocation/best_fit_allocator.cc | 1 + .../memory/allocation/best_fit_allocator.h | 1 + .../fluid/memory/allocation/cuda_allocator.cc | 1 + .../fluid/memory/allocation/cuda_allocator.h | 1 + .../memory/allocation/cuda_ipc_allocator.cc | 3 +- .../allocation/cuda_managed_allocator.cc | 1 + .../allocation/cuda_virtual_mem_allocator.cc | 1 + .../allocation/cuda_virtual_mem_allocator.h | 2 + .../memory/allocation/custom_allocator.cc | 1 + .../memory/allocation/custom_allocator.h | 1 + .../fluid/memory/allocation/mmap_allocator.cc | 13 +- .../allocation/naive_best_fit_allocator.cc | 1 - .../allocation/naive_best_fit_allocator.h | 1 + .../fluid/memory/allocation/npu_allocator.cc | 2 + .../fluid/memory/allocation/npu_allocator.h | 1 + .../memory/allocation/pinned_allocator.cc | 1 + .../memory/allocation/retry_allocator.cc | 5 +- .../memory/allocation/retry_allocator_test.cc | 1 + .../allocation/stream_safe_cuda_allocator.cc | 1 + .../allocation/stream_safe_cuda_allocator.h | 1 + .../allocation/thread_local_allocator_test.cc | 2 + ...l_memory_auto_growth_best_fit_allocator.cc | 3 +- paddle/fluid/memory/buffer.h | 1 + .../fluid/memory/detail/system_allocator.cc | 30 +- paddle/fluid/memory/detail/system_allocator.h | 1 + paddle/fluid/memory/get_base_ptr_test.cu | 1 + paddle/fluid/memory/malloc.h | 4 +- paddle/fluid/memory/memory_stats_test.cc | 3 +- paddle/fluid/memory/pinned_memory_test.cu | 2 +- paddle/fluid/memory/stats.h | 20 +- paddle/fluid/memory/stats_test.cc | 2 + .../memory/stream_safe_cuda_alloc_test.cu | 7 +- paddle/fluid/operators/abs_op.cc | 1 + .../fluid/operators/activation_cudnn_op.cu.cc | 2 +- paddle/fluid/operators/activation_op.cc | 50 +- paddle/fluid/operators/activation_op.h | 10 +- paddle/fluid/operators/activation_op_xpu.cc | 20 +- .../operators/add_position_encoding_op.cc | 1 + paddle/fluid/operators/addmm_op.cc | 1 + paddle/fluid/operators/affine_channel_op.cc | 1 + paddle/fluid/operators/affine_channel_op.cu | 40 +- .../fluid/operators/affine_channel_op_xpu.cc | 1 + .../operators/affine_grid_cudnn_op.cu.cc | 5 +- paddle/fluid/operators/affine_grid_op.cc | 2 + paddle/fluid/operators/affine_grid_op.cu | 4 +- paddle/fluid/operators/affine_grid_op.h | 1 + .../amp/alloc_float_status_op_npu.cc | 1 + .../amp/check_finite_and_unscale_op.cu | 8 +- .../check_finite_and_unscale_op_npu_test.cc | 1 + .../amp/check_finite_and_unscale_op_xpu.cc | 61 +- .../amp/clear_float_status_op_npu.cc | 1 + .../operators/amp/get_float_status_op_npu.cc | 1 + .../operators/amp/update_loss_scaling_op.cc | 2 + .../operators/amp/update_loss_scaling_op.cu | 1 + .../operators/amp/update_loss_scaling_op.h | 1 + .../amp/update_loss_scaling_op_npu.cc | 3 +- .../amp/update_loss_scaling_op_xpu.cc | 12 +- paddle/fluid/operators/angle_op.h | 2 +- paddle/fluid/operators/arg_max_op.cc | 33 +- paddle/fluid/operators/arg_min_max_op_base.h | 1 + paddle/fluid/operators/arg_min_op.cc | 30 +- paddle/fluid/operators/array_operator.h | 1 + .../fluid/operators/array_to_lod_tensor_op.cc | 1 + paddle/fluid/operators/ascend_trigger_op.h | 1 + paddle/fluid/operators/assign_op_xpu.cc | 4 +- paddle/fluid/operators/attention_lstm_op.cc | 34 +- .../fluid/operators/average_accumulates_op.h | 1 + paddle/fluid/operators/batch_fc_op.cc | 6 +- paddle/fluid/operators/batch_fc_op.cu | 1 + paddle/fluid/operators/batch_norm_op.cc | 11 +- paddle/fluid/operators/batch_norm_op.h | 1 + paddle/fluid/operators/batch_norm_op_mlu.cc | 2 +- paddle/fluid/operators/batch_norm_op_npu.cc | 14 +- paddle/fluid/operators/batch_norm_op_xpu.cc | 44 +- paddle/fluid/operators/batch_size_like.h | 1 + .../fluid/operators/beam_search_decode_op.cc | 3 +- .../operators/beam_search_decode_op_test.cc | 8 +- paddle/fluid/operators/beam_search_op.cc | 1 + paddle/fluid/operators/beam_search_op.cu.cc | 1 + paddle/fluid/operators/beam_search_op_npu.cc | 2 +- paddle/fluid/operators/benchmark/op_tester.cc | 2 + paddle/fluid/operators/benchmark/op_tester.h | 1 + .../operators/benchmark/op_tester_config.cc | 2 + paddle/fluid/operators/bilateral_slice_op.cc | 2 + paddle/fluid/operators/bilateral_slice_op.cu | 41 +- paddle/fluid/operators/bilateral_slice_op.h | 1 + paddle/fluid/operators/bmm_op.cc | 1 + paddle/fluid/operators/bmm_op.h | 1 + paddle/fluid/operators/bmm_op_xpu.cc | 2 +- paddle/fluid/operators/bpr_loss_op.cc | 1 + paddle/fluid/operators/bpr_loss_op.h | 5 +- .../fluid/operators/broadcast_tensors_op.cc | 2 +- paddle/fluid/operators/cast_op.cc | 2 + paddle/fluid/operators/cast_op.h | 1 - paddle/fluid/operators/cast_op_xpu.cc | 3 +- paddle/fluid/operators/center_loss_op.cc | 1 + paddle/fluid/operators/center_loss_op.cu | 1 + paddle/fluid/operators/center_loss_op.h | 1 + paddle/fluid/operators/chunk_eval_op.cc | 12 +- .../operators/cinn/cinn_instruction_run_op.cc | 13 +- .../cinn/cinn_instruction_run_op.cu.cc | 1 + .../operators/cinn/cinn_instruction_run_op.h | 1 + .../cinn/cinn_instruction_run_op_test.cc | 2 + .../operators/cinn/cinn_launch_context.cc | 6 +- .../operators/cinn/cinn_launch_context.h | 1 + .../cinn/cinn_launch_context_test.cc | 6 +- paddle/fluid/operators/cinn/cinn_launch_op.cc | 2 + .../fluid/operators/cinn/cinn_launch_op.cu.cc | 1 + paddle/fluid/operators/cinn/cinn_launch_op.h | 20 +- .../operators/cinn/cinn_launch_op_test.cc | 3 + paddle/fluid/operators/cinn/cinn_op_helper.cc | 1 + paddle/fluid/operators/cinn/cinn_op_helper.h | 1 + paddle/fluid/operators/cinn/test_helper.h | 1 + .../fluid/operators/class_center_sample_op.cu | 3 + .../fluid/operators/class_center_sample_op.h | 1 + paddle/fluid/operators/clip_by_norm_op.h | 9 +- paddle/fluid/operators/clip_by_norm_op_xpu.cc | 3 +- paddle/fluid/operators/clip_op.cc | 20 +- paddle/fluid/operators/clip_op_xpu.cc | 9 +- paddle/fluid/operators/coalesce_tensor_op.cc | 21 +- .../operators/collective/allreduce_op.cc | 4 +- .../fluid/operators/collective/barrier_op.h | 1 + .../operators/collective/broadcast_op.cc | 1 + .../operators/collective/c_allgather_op.cc | 5 +- .../operators/collective/c_allgather_op.h | 1 + .../collective/c_allgather_op_npu.cc | 4 +- .../collective/c_allgather_op_npu_test.cc | 7 +- .../collective/c_allreduce_max_op_npu_test.cc | 7 +- .../operators/collective/c_allreduce_op.h | 10 +- .../collective/c_allreduce_sum_op_npu_test.cc | 7 +- .../operators/collective/c_broadcast_op.h | 1 + .../collective/c_broadcast_op_npu_test.cc | 7 +- .../collective/c_comm_init_all_op.cc | 7 +- .../collective/c_comm_init_multitrainer_op.cc | 1 + .../operators/collective/c_comm_init_op.cc | 5 +- .../fluid/operators/collective/c_concat_op.cc | 14 +- .../operators/collective/c_concat_op.cu.cc | 3 +- .../operators/collective/c_gen_bkcl_id_op.cc | 3 +- .../operators/collective/c_gen_cncl_id_op.cc | 4 +- .../operators/collective/c_gen_hccl_id_op.cc | 5 +- .../operators/collective/c_gen_nccl_id_op.cc | 3 +- .../fluid/operators/collective/c_reduce_op.h | 17 +- .../collective/c_reduce_sum_op_npu_test.cc | 7 +- .../collective/c_reducescatter_op_npu_test.cc | 7 +- .../fluid/operators/collective/c_scatter_op.h | 1 + .../c_softmax_with_cross_entropy_op.cu | 16 +- .../fluid/operators/collective/c_split_op.cc | 14 +- .../fluid/operators/collective/c_split_op.cu | 9 +- .../c_sync_comm_stream_op_npu_test.cc | 5 +- .../collective/checknumeric_npu_test.cc | 7 +- .../operators/collective/gen_bkcl_id_op.cc | 10 +- .../operators/collective/gen_hccl_id_op.cc | 10 +- .../collective/gen_hccl_id_op_helper.cc | 1 + .../operators/collective/gen_nccl_id_op.cc | 7 +- .../collective/partial_allgather_op.cc | 5 +- .../collective/partial_allgather_op_npu.cc | 2 +- .../operators/collective/partial_recv_op.cc | 1 + .../collective/partial_recv_op_npu.cc | 6 +- .../collective/partial_send_op_npu.cc | 6 +- .../fluid/operators/collective/recv_v2_op.cc | 1 + .../operators/collective/recv_v2_op_npu.cc | 5 +- .../collective/recv_v2_op_npu_test.cc | 7 +- .../operators/collective/send_v2_op_npu.cc | 5 +- .../collective/send_v2_op_npu_test.cc | 8 +- .../operators/common_infer_shape_functions.cc | 13 +- paddle/fluid/operators/complex_op.cc | 1 + paddle/fluid/operators/complex_view_op.cc | 1 + paddle/fluid/operators/complex_view_op.cu | 3 +- paddle/fluid/operators/concat_op.cc | 3 +- paddle/fluid/operators/concat_op.h | 2 +- paddle/fluid/operators/concat_op_mlu.cc | 9 +- paddle/fluid/operators/concat_op_xpu.cc | 37 +- paddle/fluid/operators/conj_op.cc | 5 +- paddle/fluid/operators/conj_op.cu | 5 +- .../fluid/operators/controlflow/bitwise_op.cc | 1 + .../fluid/operators/controlflow/compare_op.cc | 14 +- .../controlflow/conditional_block_op.h | 9 +- .../fluid/operators/controlflow/fetch_op.cc | 20 +- .../operators/controlflow/fetch_v2_op.cc | 20 +- .../operators/controlflow/get_places_op.cc | 7 +- .../fluid/operators/controlflow/logical_op.cc | 1 + .../fluid/operators/controlflow/op_variant.h | 5 +- .../controlflow/recurrent_op_helper.cc | 1 + .../fluid/operators/controlflow/while_op.cc | 11 +- .../operators/controlflow/while_op_helper.cc | 1 + paddle/fluid/operators/conv_base_helper.h | 1 + paddle/fluid/operators/conv_cudnn_op_cache.h | 1 + paddle/fluid/operators/conv_op.cc | 34 +- paddle/fluid/operators/conv_op.h | 1 + paddle/fluid/operators/conv_op_npu.cc | 98 +- paddle/fluid/operators/conv_op_xpu.cc | 3 +- paddle/fluid/operators/conv_shift_op.cc | 2 + paddle/fluid/operators/conv_transpose_op.cc | 1 + .../fluid/operators/conv_transpose_op_npu.cc | 40 +- .../fluid/operators/conv_transpose_op_xpu.cc | 4 +- paddle/fluid/operators/correlation_op.cc | 1 + paddle/fluid/operators/correlation_op.cu | 31 +- paddle/fluid/operators/cos_sim_op.cc | 1 + paddle/fluid/operators/crf_decoding_op.cc | 11 +- paddle/fluid/operators/crf_decoding_op.h | 3 +- paddle/fluid/operators/crop_op.cc | 1 + paddle/fluid/operators/crop_op.h | 23 +- paddle/fluid/operators/crop_tensor_op.cc | 1 + paddle/fluid/operators/crop_tensor_op.h | 23 +- paddle/fluid/operators/cross_entropy_op.cc | 1 + paddle/fluid/operators/cross_op.cc | 3 +- paddle/fluid/operators/ctc_align_op.cu | 10 +- paddle/fluid/operators/ctc_align_op.h | 2 + paddle/fluid/operators/cudnn_lstm_cache.h | 1 + paddle/fluid/operators/cudnn_lstm_op.cc | 1 + paddle/fluid/operators/cudnn_rnn_cache.h | 1 + paddle/fluid/operators/cumsum_op.cc | 15 +- paddle/fluid/operators/cvm_op.cc | 2 + paddle/fluid/operators/data_norm_op.cc | 32 +- paddle/fluid/operators/data_norm_op.cu | 11 +- paddle/fluid/operators/decode_jpeg_op.cu | 1 + paddle/fluid/operators/deformable_conv_op.cc | 1 + .../fluid/operators/deformable_conv_op_xpu.cc | 46 +- .../fluid/operators/deformable_conv_v1_op.cc | 1 + .../operators/deformable_psroi_pooling_op.cc | 13 +- .../operators/deformable_psroi_pooling_op.cu | 2 + .../operators/deformable_psroi_pooling_op.h | 1 + paddle/fluid/operators/dequantize_op.cc | 11 +- paddle/fluid/operators/dequantize_op.h | 1 + paddle/fluid/operators/dequeue_op.cc | 1 + .../operators/detection/anchor_generator_op.h | 1 + paddle/fluid/operators/detection/bbox_util.h | 6 +- .../fluid/operators/detection/box_clip_op.cc | 1 + .../fluid/operators/detection/box_clip_op.cu | 1 + .../fluid/operators/detection/box_clip_op.h | 1 + .../fluid/operators/detection/box_coder_op.cc | 1 + .../fluid/operators/detection/box_coder_op.cu | 1 + .../fluid/operators/detection/box_coder_op.h | 1 + .../detection/box_decoder_and_assign_op.h | 1 + .../detection/collect_fpn_proposals_op.cc | 1 + .../detection/collect_fpn_proposals_op.cu | 1 + .../detection/collect_fpn_proposals_op.h | 1 + .../detection/density_prior_box_op.h | 1 + .../detection/distribute_fpn_proposals_op.cc | 1 + .../detection/distribute_fpn_proposals_op.cu | 1 + .../detection/distribute_fpn_proposals_op.h | 1 + .../detection/generate_mask_labels_op.cc | 2 + .../detection/generate_proposal_labels_op.cc | 2 + .../detection/generate_proposals_op.cc | 1 + .../detection/generate_proposals_op.cu | 2 + .../detection/generate_proposals_v2_op.cc | 1 + .../detection/generate_proposals_v2_op.cu | 2 + paddle/fluid/operators/detection/gpc.cc | 1 + .../detection/locality_aware_nms_op.cc | 22 +- paddle/fluid/operators/detection/mask_util.cc | 2 + paddle/fluid/operators/detection/mask_util.h | 1 + .../operators/detection/mask_util_test.cc | 2 + .../operators/detection/matrix_nms_op.cc | 7 +- .../operators/detection/multiclass_nms_op.cc | 26 +- paddle/fluid/operators/detection/nms_op.cc | 1 + paddle/fluid/operators/detection/nms_op.cu | 1 + paddle/fluid/operators/detection/nms_util.h | 1 + paddle/fluid/operators/detection/poly_util.cc | 3 +- paddle/fluid/operators/detection/poly_util.h | 1 + .../fluid/operators/detection/prior_box_op.h | 1 + .../retinanet_detection_output_op.cc | 34 +- .../detection/roi_perspective_transform_op.cc | 12 +- .../detection/roi_perspective_transform_op.cu | 7 +- .../detection/rpn_target_assign_op.cc | 1 + .../detection/sigmoid_focal_loss_op.cc | 1 + .../detection/sigmoid_focal_loss_op.h | 1 + .../fluid/operators/detection/yolo_box_op.cc | 20 +- .../operators/detection/yolov3_loss_op.cc | 1 + paddle/fluid/operators/detection_map_op.cc | 1 + paddle/fluid/operators/detection_map_op.h | 1 + paddle/fluid/operators/determinant_op.cc | 1 + paddle/fluid/operators/determinant_op.h | 1 + paddle/fluid/operators/dgc_clip_by_norm_op.cc | 4 +- paddle/fluid/operators/dgc_op.cc | 2 + paddle/fluid/operators/dgc_op.h | 12 +- paddle/fluid/operators/diag_embed_op.cu | 1 + paddle/fluid/operators/diag_embed_op.h | 1 + paddle/fluid/operators/dirichlet_op.h | 1 + paddle/fluid/operators/dist_op.cc | 1 + .../fluid/operators/dlnne/dlnne_engine_op.h | 12 +- .../operators/dlnne/dlnne_engine_op_test.cc | 2 + paddle/fluid/operators/dropout_impl.cu.h | 2 + paddle/fluid/operators/dropout_op.cc | 1 + paddle/fluid/operators/dropout_op_xpu.cc | 1 + paddle/fluid/operators/edit_distance_op.cc | 13 +- paddle/fluid/operators/edit_distance_op.cu | 1 + paddle/fluid/operators/edit_distance_op.h | 1 + paddle/fluid/operators/eig_op.cc | 11 +- paddle/fluid/operators/eig_op.h | 2 + paddle/fluid/operators/eigvals_op.cc | 1 + paddle/fluid/operators/eigvals_op.h | 17 +- paddle/fluid/operators/einsum_op.cc | 1 + .../elementwise/elementwise_add_op_xpu.cc | 2 +- .../elementwise/elementwise_div_op.cc | 1 + .../elementwise/elementwise_div_op.h | 1 + .../elementwise/elementwise_heaviside_op.cc | 1 + .../operators/elementwise/elementwise_mlu.h | 1 + .../elementwise/elementwise_mod_op_xpu.cc | 2 +- .../elementwise/elementwise_mul_op.cc | 2 + .../elementwise/elementwise_mul_op.h | 2 +- .../elementwise/elementwise_op_function.h | 115 +-- .../operators/elementwise/elementwise_xpu.h | 6 +- .../test_elementwise_div_grad_grad.cc | 12 +- .../test_elementwise_op_grad_grad.h | 1 + paddle/fluid/operators/empty_op.cc | 3 +- paddle/fluid/operators/expand_as_op.cc | 1 + paddle/fluid/operators/expand_as_v2_op.cc | 9 +- paddle/fluid/operators/expand_as_v2_op_npu.cc | 9 +- paddle/fluid/operators/expand_as_v2_op_xpu.cc | 9 +- paddle/fluid/operators/expand_op.cc | 1 + paddle/fluid/operators/expand_op.h | 13 +- paddle/fluid/operators/expand_v2_op.cc | 1 + paddle/fluid/operators/expand_v2_op_npu.cc | 2 +- paddle/fluid/operators/expand_v2_op_xpu.cc | 11 +- paddle/fluid/operators/fake_dequantize_op.cc | 2 + .../fluid/operators/fake_dequantize_op.cu.h | 8 +- paddle/fluid/operators/fake_dequantize_op.h | 1 + paddle/fluid/operators/fake_quantize_op.cc | 10 +- paddle/fluid/operators/fake_quantize_op.cu.h | 39 +- paddle/fluid/operators/fake_quantize_op.h | 1 + paddle/fluid/operators/fc_op.cc | 1 + paddle/fluid/operators/fc_op.h | 1 + .../fluid/operators/fill_any_like_op_xpu.cc | 1 - paddle/fluid/operators/fill_constant_op.cc | 2 + .../fluid/operators/fill_constant_op_npu.cc | 7 +- .../fluid/operators/fill_diagonal_tensor_op.h | 1 + paddle/fluid/operators/fill_op.cc | 1 + paddle/fluid/operators/fill_op.h | 2 +- paddle/fluid/operators/fill_zeros_like_op.cc | 1 + .../fluid/operators/fill_zeros_like_op.cu.cc | 1 + paddle/fluid/operators/filter_by_instag_op.cc | 1 + paddle/fluid/operators/filter_by_instag_op.cu | 8 +- paddle/fluid/operators/filter_by_instag_op.h | 1 + paddle/fluid/operators/flatten_op.cc | 2 + paddle/fluid/operators/flatten_op.h | 1 + paddle/fluid/operators/flip_op.cc | 13 +- paddle/fluid/operators/fold_op.h | 1 + paddle/fluid/operators/frame_op.cc | 9 +- paddle/fluid/operators/fsp_op.cc | 1 + .../operators/fused/attention_layer_norm.h | 9 +- .../fluid/operators/fused/attn_bias_add.cu.h | 32 +- paddle/fluid/operators/fused/attn_gemm.h | 6 +- .../fluid/operators/fused/conv_fusion_op.cc | 1 + .../fluid/operators/fused/conv_fusion_op.cu | 1 + .../operators/fused/cudnn_bn_add_relu_test.cc | 42 +- .../operators/fused/cudnn_fusion_helper.h | 1 + .../operators/fused/cudnn_norm_conv_test.cc | 7 +- paddle/fluid/operators/fused/fmha_ref.h | 5 +- .../operators/fused/fused_attention_op.cc | 14 +- .../operators/fused/fused_attention_op.cu | 22 +- ...sed_bias_dropout_residual_layer_norm_op.cc | 1 + ...sed_bias_dropout_residual_layer_norm_op.cu | 2 + .../operators/fused/fused_bn_activation_op.cc | 68 +- .../operators/fused/fused_bn_activation_op.cu | 16 +- .../operators/fused/fused_bn_activation_op.h | 1 + .../fused/fused_bn_add_activation_op.cc | 32 +- .../fused/fused_bn_add_activation_op.cu | 6 +- .../fused/fused_bn_add_activation_op.h | 1 + .../operators/fused/fused_dropout_act_bias.h | 44 +- .../operators/fused/fused_dropout_helper.h | 6 +- .../fused/fused_elemwise_activation_op.cc | 1 + .../fused/fused_elemwise_activation_op.h | 16 +- .../fused_embedding_eltwise_layernorm_op.cc | 1 + .../fused_embedding_eltwise_layernorm_op.cu | 2 + .../fused/fused_embedding_fc_lstm_op.cc | 41 +- .../fused/fused_embedding_seq_pool_op.cc | 2 + .../fused_fc_elementwise_layernorm_op.cu | 22 +- .../operators/fused/fused_feedforward_op.cc | 1 + .../operators/fused/fused_feedforward_op.cu | 32 +- .../fused/fused_gate_attention_op.cc | 1 + .../fused/fused_gate_attention_op.cu | 6 +- .../operators/fused/fused_gemm_epilogue_op.cc | 6 +- .../operators/fused/fused_gemm_epilogue_op.h | 2 + .../fused_layernorm_residual_dropout_bias.h | 42 +- .../fused/fused_multi_transformer_op.cc | 14 +- .../fused/fused_multi_transformer_op.cu | 17 +- .../fused/fused_residual_dropout_bias.h | 42 +- .../operators/fused/fused_seqpool_cvm_op.cc | 8 +- .../operators/fused/fused_seqpool_cvm_op.cu | 1 + .../operators/fused/fused_seqpool_cvm_op.h | 1 + .../operators/fused/fused_softmax_mask.cu.h | 7 +- .../operators/fused/fused_transformer_op.cc | 5 +- .../operators/fused/fused_transformer_op.h | 4 +- .../fused/fusion_conv_inception_op.cc | 6 +- .../operators/fused/fusion_group_op.cu.cc | 1 + .../fluid/operators/fused/fusion_group_op.h | 1 + paddle/fluid/operators/fused/fusion_gru_op.cc | 2 + .../fluid/operators/fused/fusion_lstm_op.cc | 2 + .../fused/fusion_repeated_fc_relu_op.cc | 11 +- .../fused/fusion_seqconv_eltadd_relu_op.cc | 2 + .../fused/fusion_seqexpand_concat_fc_op.cc | 7 +- .../fused/fusion_seqpool_concat_op.cc | 20 +- .../fused/fusion_seqpool_cvm_concat_op.cc | 25 +- .../fused/fusion_squared_mat_sub_op.cc | 2 + .../fusion_transpose_flatten_concat_op.cc | 2 + .../fusion_transpose_flatten_concat_op.cu.cc | 1 + .../fusion_transpose_flatten_concat_op.h | 1 + .../fused/mkldnn/multi_gru_mkldnn_op.cc | 3 +- paddle/fluid/operators/fused/multi_gru_op.cc | 1 + paddle/fluid/operators/fused/multi_gru_op.h | 2 +- .../operators/fused/multihead_matmul_op.cc | 1 + .../operators/fused/multihead_matmul_op.cu | 14 +- .../fluid/operators/fused/resnet_unit_op.cc | 33 +- .../operators/fused/skip_layernorm_op.cc | 1 + .../operators/fused/skip_layernorm_op.cu | 2 + .../fluid/operators/fused_softmax_mask_op.cc | 1 + .../fluid/operators/fused_softmax_mask_op.cu | 1 + .../fused_softmax_mask_upper_triangle_op.cc | 1 + .../fused_softmax_mask_upper_triangle_op.cu | 127 +-- paddle/fluid/operators/gather_op.cc | 8 +- paddle/fluid/operators/gather_op_xpu.cc | 9 +- .../fluid/operators/gather_scatter_kernel.cu | 9 +- paddle/fluid/operators/gather_test.cc | 3 +- paddle/fluid/operators/gaussian_random_op.cu | 1 + .../fluid/operators/gaussian_random_op_xpu.cc | 1 + paddle/fluid/operators/gelu_op.cc | 1 + paddle/fluid/operators/gelu_op_xpu.cc | 1 + .../fluid/operators/graph_khop_sampler_op.cc | 9 +- .../fluid/operators/graph_khop_sampler_op.cu | 99 +- .../fluid/operators/graph_khop_sampler_op.h | 2 + paddle/fluid/operators/group_norm_op.cc | 1 + paddle/fluid/operators/group_norm_op.cu | 28 +- paddle/fluid/operators/group_norm_op.h | 1 + paddle/fluid/operators/group_norm_op_npu.cc | 3 +- paddle/fluid/operators/gru_op.cc | 2 + paddle/fluid/operators/gru_op.h | 1 + paddle/fluid/operators/gru_unit_op.cc | 1 + paddle/fluid/operators/gru_unit_op.h | 6 +- paddle/fluid/operators/hinge_loss_op.cc | 1 + paddle/fluid/operators/huber_loss_op_xpu.cc | 9 +- paddle/fluid/operators/im2sequence_op.cc | 1 + paddle/fluid/operators/im2sequence_op.h | 1 + paddle/fluid/operators/index_impl.cu.h | 13 +- paddle/fluid/operators/index_sample_op.cc | 4 +- paddle/fluid/operators/index_select_op.h | 1 + paddle/fluid/operators/inplace_abn_op.cc | 34 +- paddle/fluid/operators/inplace_abn_op.cu | 5 +- paddle/fluid/operators/inplace_abn_op.h | 1 + paddle/fluid/operators/instance_norm_op.cc | 20 +- paddle/fluid/operators/instance_norm_op.h | 1 + paddle/fluid/operators/interpolate_op.cc | 13 +- paddle/fluid/operators/interpolate_op.cu | 79 +- paddle/fluid/operators/interpolate_op.h | 43 +- paddle/fluid/operators/interpolate_op_npu.cc | 3 +- paddle/fluid/operators/interpolate_op_xpu.cc | 18 +- paddle/fluid/operators/interpolate_v2_op.cc | 27 +- .../fluid/operators/interpolate_v2_op_npu.cc | 3 +- .../fluid/operators/interpolate_v2_op_xpu.cc | 18 +- paddle/fluid/operators/inverse_op.cc | 1 + paddle/fluid/operators/isfinite_op.cc | 19 +- paddle/fluid/operators/isfinite_op.cu | 10 +- paddle/fluid/operators/jit/benchmark.cc | 5 +- paddle/fluid/operators/jit/gen/act.cc | 5 +- paddle/fluid/operators/jit/gen/jitcode.h | 1 + paddle/fluid/operators/jit/gen/matmul.cc | 27 +- paddle/fluid/operators/jit/gen/matmul.h | 10 +- paddle/fluid/operators/jit/gen/seqpool.cc | 27 +- paddle/fluid/operators/jit/gen_base.cc | 1 + paddle/fluid/operators/jit/gen_base.h | 2 +- paddle/fluid/operators/jit/helper.cc | 11 +- paddle/fluid/operators/jit/kernel_base.h | 1 + paddle/fluid/operators/jit/kernel_key.cc | 1 + .../jit/more/intrinsic/crf_decoding.cc | 2 + .../jit/more/intrinsic/layer_norm.cc | 2 + paddle/fluid/operators/jit/more/mix/mix.cc | 1 + paddle/fluid/operators/jit/more/mkl/mkl.cc | 1 + paddle/fluid/operators/jit/more/mkl/mkl.h | 31 +- paddle/fluid/operators/jit/refer/refer.cc | 1 + paddle/fluid/operators/jit/refer/refer.h | 20 +- paddle/fluid/operators/jit/registry.h | 1 + paddle/fluid/operators/jit/test.cc | 244 ++--- .../kernel_primitives/kernel_primitives.h | 2 +- paddle/fluid/operators/kldiv_loss_op.cc | 1 + paddle/fluid/operators/kldiv_loss_op_npu.cc | 1 + paddle/fluid/operators/kthvalue_op.cc | 1 + paddle/fluid/operators/l1_norm_op.cc | 1 + paddle/fluid/operators/label_smooth_op.cc | 1 + paddle/fluid/operators/layer_norm_kernel.cu.h | 144 +-- paddle/fluid/operators/layer_norm_op.cc | 1 + paddle/fluid/operators/layer_norm_op_xpu.cc | 5 +- paddle/fluid/operators/layout_utils.h | 1 + paddle/fluid/operators/linear_chain_crf_op.h | 9 +- paddle/fluid/operators/linspace_op.cc | 9 +- paddle/fluid/operators/lite/lite_engine_op.cc | 1 + paddle/fluid/operators/lite/lite_engine_op.h | 3 +- .../operators/lite/lite_engine_op_test.cc | 5 +- paddle/fluid/operators/load_combine_op.cc | 4 +- paddle/fluid/operators/load_op.cc | 4 +- paddle/fluid/operators/lod_reset_op.cc | 1 + paddle/fluid/operators/lod_reset_op.h | 1 + paddle/fluid/operators/log_loss_op.cc | 1 + paddle/fluid/operators/log_loss_op_npu.cc | 1 + paddle/fluid/operators/log_loss_op_xpu.cc | 1 + paddle/fluid/operators/log_softmax_op.cc | 1 + paddle/fluid/operators/lookup_table_op.cu | 28 +- paddle/fluid/operators/lookup_table_v2_op.cc | 1 + .../fluid/operators/lookup_table_v2_op_npu.cc | 1 + .../fluid/operators/lookup_table_v2_op_xpu.cc | 3 +- paddle/fluid/operators/lrn_op.cc | 29 +- paddle/fluid/operators/lrn_op.h | 28 +- paddle/fluid/operators/lstm_op.cc | 1 + paddle/fluid/operators/lstm_op.h | 8 +- paddle/fluid/operators/lstm_unit_op.cc | 1 + paddle/fluid/operators/lstmp_op.cc | 1 + paddle/fluid/operators/lstmp_op.h | 8 +- paddle/fluid/operators/lstsq_op.cc | 4 +- paddle/fluid/operators/lstsq_op.cu | 1 + paddle/fluid/operators/lstsq_op.h | 2 + paddle/fluid/operators/lu_op.cc | 5 +- paddle/fluid/operators/lu_unpack_op.cc | 5 +- .../operators/margin_cross_entropy_op.cu | 39 +- paddle/fluid/operators/margin_rank_loss_op.cc | 2 + paddle/fluid/operators/marker_op.cu | 4 +- .../fluid/operators/match_matrix_tensor_op.cc | 3 +- paddle/fluid/operators/math.h | 3 +- paddle/fluid/operators/math/beam_search.cu | 17 +- paddle/fluid/operators/math/beam_search.h | 1 + .../fluid/operators/math/beam_search_test.cc | 1 + .../operators/math/bert_encoder_functor.cu | 28 +- .../operators/math/bert_encoder_functor.h | 2 + paddle/fluid/operators/math/bloomfilter.h | 4 +- .../fluid/operators/math/concat_and_split.cu | 1 - .../fluid/operators/math/concat_and_split.h | 1 + paddle/fluid/operators/math/concat_test.cc | 44 +- paddle/fluid/operators/math/cross_entropy.cc | 1 + paddle/fluid/operators/math/cross_entropy.h | 1 + .../operators/math/eigen_values_vectors.h | 7 +- paddle/fluid/operators/math/gru_compute.cu | 127 +-- paddle/fluid/operators/math/im2col.cc | 21 +- paddle/fluid/operators/math/im2col.cu | 45 +- paddle/fluid/operators/math/im2col.h | 1 + paddle/fluid/operators/math/im2col_cfo_cpu.h | 1 + paddle/fluid/operators/math/im2col_test.cc | 2 + paddle/fluid/operators/math/inclusive_scan.h | 17 +- paddle/fluid/operators/math/math_function.cc | 1 + paddle/fluid/operators/math/matrix_bit_code.h | 1 + paddle/fluid/operators/math/matrix_solve.cc | 1 + .../fluid/operators/math/matrix_solve.cu.cc | 1 + paddle/fluid/operators/math/matrix_solve.h | 1 + paddle/fluid/operators/math/sample_prob.cu | 1 + paddle/fluid/operators/math/sampler.cc | 1 + .../operators/math/selected_rows_functor.cc | 1 + .../operators/math/selected_rows_functor.cu | 38 +- .../math/selected_rows_functor_test.cc | 5 +- .../math/selected_rows_functor_test.cu.cc | 1 + .../fluid/operators/math/sequence_padding.cc | 1 + .../fluid/operators/math/sequence_padding.cu | 1 + .../fluid/operators/math/sequence_padding.h | 16 +- .../fluid/operators/math/sequence_pooling.cc | 3 +- .../fluid/operators/math/sequence_pooling.cu | 121 +-- .../fluid/operators/math/sequence_pooling.h | 1 + .../operators/math/sequence_pooling_test.cc | 1 + paddle/fluid/operators/math/sequence_scale.cc | 1 + paddle/fluid/operators/math/sequence_scale.cu | 16 +- paddle/fluid/operators/math/softmax.cc | 1 + paddle/fluid/operators/math/softmax_impl.h | 91 +- paddle/fluid/operators/math/sparse_impl.cu.h | 3 +- paddle/fluid/operators/math/tree2col.cc | 1 + paddle/fluid/operators/math/tree2col.cu | 1 + paddle/fluid/operators/math/tree2col.h | 1 + paddle/fluid/operators/math/vol2col.cu | 1 + paddle/fluid/operators/math/vol2col.h | 1 + paddle/fluid/operators/math/vol2col_test.cc | 1 + paddle/fluid/operators/matmul_op.cc | 61 +- paddle/fluid/operators/matmul_op_xpu.cc | 17 +- paddle/fluid/operators/matmul_v2_op.cc | 1 + paddle/fluid/operators/matmul_v2_op.h | 1 + paddle/fluid/operators/matmul_v2_op_xpu.cc | 2 +- paddle/fluid/operators/matrix_power_op.cc | 1 + paddle/fluid/operators/matrix_rank_op.cc | 1 + paddle/fluid/operators/mean_iou_op.h | 1 + paddle/fluid/operators/mean_op_xpu.cc | 5 +- paddle/fluid/operators/merge_lod_tensor_op.cc | 1 - .../fluid/operators/merge_selected_rows_op.cc | 1 + .../fluid/operators/merge_selected_rows_op.h | 1 + paddle/fluid/operators/meshgrid_op.cc | 3 +- paddle/fluid/operators/miopen_lstm_cache.h | 1 + paddle/fluid/operators/miopen_rnn_cache.h | 1 + .../operators/mkldnn/activation_mkldnn_op.cc | 4 +- paddle/fluid/operators/mkldnn/axpy_handler.cc | 5 +- .../operators/mkldnn/concat_mkldnn_op.cc | 9 +- .../fluid/operators/mkldnn/conv_mkldnn_op.cc | 18 +- .../operators/mkldnn/dequantize_mkldnn_op.cc | 2 +- .../operators/mkldnn/expand_v2_mkldnn_op.cc | 6 +- paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc | 16 +- .../operators/mkldnn/interpolate_mkldnn_op.cc | 11 +- .../operators/mkldnn/matmul_mkldnn_op.cc | 6 +- .../fluid/operators/mkldnn/matmul_mkldnn_op.h | 2 +- .../operators/mkldnn/matmul_v2_mkldnn_op.cc | 13 +- .../fluid/operators/mkldnn/pool_mkldnn_op.cc | 4 +- .../operators/mkldnn/quantize_mkldnn_op.cc | 2 +- .../operators/mkldnn/requantize_mkldnn_op.cc | 10 +- .../operators/mkldnn/reshape_mkldnn_op.cc | 2 +- .../fluid/operators/mkldnn/stack_mkldnn_op.cc | 8 +- .../fluid/operators/mkldnn/sum_mkldnn_op.cc | 5 +- .../operators/mkldnn/test_mkldnn_caching.cc | 6 +- .../mkldnn/test_mkldnn_op_inplace.cc | 1 + .../operators/mkldnn/test_mkldnn_op_nhwc.cc | 1 + .../operators/mkldnn/transpose_mkldnn_op.cc | 2 +- paddle/fluid/operators/mlu/mlu_baseop.cc | 1 + paddle/fluid/operators/mode_op.cc | 3 +- .../fluid/operators/modified_huber_loss_op.cc | 10 +- .../fluid/operators/modified_huber_loss_op.cu | 1 + paddle/fluid/operators/mul_op.cc | 1 + paddle/fluid/operators/mul_op_xpu.cc | 1 + paddle/fluid/operators/multiplex_op.cc | 1 - paddle/fluid/operators/nanmedian_op.cc | 1 + .../fluid/operators/nccl/nccl_gpu_common.cc | 2 +- paddle/fluid/operators/nccl/nccl_op.cu.cc | 2 +- .../fluid/operators/nccl/nccl_op_test.cu.cc | 1 + paddle/fluid/operators/nce_op.h | 2 + paddle/fluid/operators/nll_loss_op.cc | 1 + paddle/fluid/operators/norm_op.cc | 1 + paddle/fluid/operators/norm_utils.cu.h | 98 +- paddle/fluid/operators/norm_utils.h | 1 + paddle/fluid/operators/number_count_op.cu | 12 +- paddle/fluid/operators/one_hot_op.cc | 1 + paddle/fluid/operators/one_hot_op_npu.cc | 1 - paddle/fluid/operators/one_hot_v2_op.cc | 1 + paddle/fluid/operators/one_hot_v2_op_npu.cc | 1 - .../fluid/operators/optimizers/adagrad_op.cc | 5 +- .../fluid/operators/optimizers/adam_op_npu.cc | 29 +- .../fluid/operators/optimizers/adam_op_xpu.cc | 5 +- paddle/fluid/operators/optimizers/adamw_op.cc | 3 +- .../operators/optimizers/adamw_op_xpu.cc | 5 +- .../operators/optimizers/cast_with_ptr.h | 6 +- .../operators/optimizers/dgc_momentum_op.cc | 4 +- .../distributed_fused_lamb_init_op.cu | 29 +- .../optimizers/distributed_fused_lamb_op.cu | 99 +- paddle/fluid/operators/optimizers/dpsgd_op.h | 7 +- paddle/fluid/operators/optimizers/ftrl_op.h | 5 +- paddle/fluid/operators/optimizers/lamb_op.cc | 21 +- paddle/fluid/operators/optimizers/lamb_op.cu | 5 +- paddle/fluid/operators/optimizers/lamb_op.h | 22 +- .../fluid/operators/optimizers/lamb_op_xpu.cc | 2 +- .../operators/optimizers/lars_momentum_op.cu | 13 +- .../optimizers/merged_momentum_op_mlu.cc | 2 +- .../optimizers/merged_momentum_op_npu.cc | 10 +- .../fluid/operators/optimizers/momentum_op.cc | 43 +- .../fluid/operators/optimizers/momentum_op.h | 1 + .../operators/optimizers/momentum_op_mlu.cc | 7 +- .../operators/optimizers/momentum_op_npu.cc | 15 +- .../operators/optimizers/momentum_op_xpu.cc | 1 + .../operators/optimizers/multi_tensor_apply.h | 13 +- .../pow2_decay_with_linear_warmup_op.cc | 1 + .../pow2_decay_with_linear_warmup_op.h | 12 +- .../fluid/operators/optimizers/rmsprop_op.cc | 3 +- .../operators/optimizers/rmsprop_op_xpu.cc | 2 + paddle/fluid/operators/optimizers/sgd_op.cc | 13 +- paddle/fluid/operators/optimizers/sgd_op.cu | 11 +- .../fluid/operators/optimizers/sgd_op_xpu.cc | 3 +- .../optimizers/sparse_momentum_op.cc | 1 + .../operators/optimizers/sparse_momentum_op.h | 1 + paddle/fluid/operators/p_norm_op.cc | 12 +- paddle/fluid/operators/pad2d_op.cc | 1 + paddle/fluid/operators/pad2d_op.cu | 1 + paddle/fluid/operators/pad3d_op.cc | 1 + .../fluid/operators/pad_constant_like_op.cc | 1 + paddle/fluid/operators/pad_constant_like_op.h | 1 + paddle/fluid/operators/pad_op.cc | 1 + paddle/fluid/operators/partial_concat_op.cc | 6 +- paddle/fluid/operators/partial_concat_op.cu | 1 + paddle/fluid/operators/partial_concat_op.h | 1 + paddle/fluid/operators/partial_sum_op.cc | 6 +- paddle/fluid/operators/partial_sum_op.cu | 1 + paddle/fluid/operators/partial_sum_op.h | 1 + paddle/fluid/operators/pixel_shuffle_op.cc | 1 + paddle/fluid/operators/poisson_op.cc | 1 + paddle/fluid/operators/pool_op.cc | 4 +- paddle/fluid/operators/pool_op_xpu.cc | 12 +- paddle/fluid/operators/pool_with_index_op.cc | 1 + .../operators/positive_negative_pair_op.cc | 12 +- .../operators/positive_negative_pair_op.h | 1 + paddle/fluid/operators/prelu_op.cc | 1 + .../fluid/operators/prim_ops/prim_op_test.cc | 1 - paddle/fluid/operators/print_op.cc | 10 +- paddle/fluid/operators/prroi_pool_op.cc | 1 + paddle/fluid/operators/prroi_pool_op.h | 1 + .../operators/prune_gate_by_capacity_op.cu | 7 +- .../pscore/distributed_lookup_table_op.cc | 3 +- .../pscore/distributed_lookup_table_op.h | 1 + .../pscore/distributed_push_sparse_op.cc | 3 +- .../pscore/distributed_push_sparse_op.h | 1 + .../pscore/heter_listen_and_serv_op.cc | 6 +- .../pscore/heter_listen_and_serv_op.h | 1 + .../pscore/heter_listen_and_server_test.cc | 4 +- .../operators/pscore/heter_server_test.cc | 19 +- .../pscore/send_and_recv_op_cpu_test.cc | 10 +- .../pscore/send_and_recv_op_gpu_test.cc | 11 +- .../operators/pull_box_extended_sparse_op.h | 1 + paddle/fluid/operators/pull_gpups_sparse_op.h | 1 + paddle/fluid/operators/pull_sparse_op.cc | 1 + paddle/fluid/operators/pull_sparse_op.h | 1 + paddle/fluid/operators/pull_sparse_v2_op.cc | 1 + paddle/fluid/operators/pull_sparse_v2_op.h | 1 + paddle/fluid/operators/push_dense_op.cc | 1 + paddle/fluid/operators/push_dense_op.h | 1 + paddle/fluid/operators/py_func_op.cc | 1 + paddle/fluid/operators/py_layer_op.cc | 4 +- paddle/fluid/operators/py_layer_op.h | 1 + paddle/fluid/operators/pyramid_hash_op.cc | 7 +- paddle/fluid/operators/qr_op.cc | 2 + paddle/fluid/operators/qr_op.cu | 7 +- paddle/fluid/operators/qr_op.h | 11 +- paddle/fluid/operators/quantize_linear_op.cc | 2 + paddle/fluid/operators/quantize_linear_op.cu | 9 +- paddle/fluid/operators/quantize_linear_op.h | 1 + paddle/fluid/operators/quantize_op.cc | 21 +- paddle/fluid/operators/quantize_op.h | 1 + paddle/fluid/operators/queue_generator_op.cc | 7 +- paddle/fluid/operators/random_crop_op.h | 1 + paddle/fluid/operators/random_routing_op.cu | 6 +- paddle/fluid/operators/randperm_op.cc | 10 +- paddle/fluid/operators/randperm_op_npu.cc | 2 +- paddle/fluid/operators/range_op.cc | 2 + paddle/fluid/operators/range_op.h | 11 +- paddle/fluid/operators/range_op_xpu.cc | 2 +- paddle/fluid/operators/rank_attention_op.cc | 2 + paddle/fluid/operators/rank_attention_op.cu | 1 + .../fluid/operators/reader/blocking_queue.h | 7 +- .../fluid/operators/reader/buffered_reader.cc | 1 + .../operators/reader/create_ctr_reader_op.cc | 1 - paddle/fluid/operators/recurrent_op.cc | 53 +- .../operators/reduce_ops/frobenius_norm_op.cc | 1 + .../operators/reduce_ops/logsumexp_op.cc | 1 + .../operators/reduce_ops/reduce_amax_op.cc | 10 +- .../reduce_ops/reduce_amax_op.part.cu | 5 +- .../operators/reduce_ops/reduce_amin_op.cc | 10 +- .../reduce_ops/reduce_amin_op.part.cu | 5 +- .../operators/reduce_ops/reduce_max_op.cc | 3 +- .../operators/reduce_ops/reduce_max_op_xpu.cc | 1 + .../operators/reduce_ops/reduce_mean_op.cc | 1 + .../reduce_ops/reduce_mean_op_npu.cc | 2 +- .../operators/reduce_ops/reduce_min_op.cc | 3 +- .../fluid/operators/reduce_ops/reduce_op.cu.h | 1 - paddle/fluid/operators/reduce_ops/reduce_op.h | 6 +- .../operators/reduce_ops/reduce_op_function.h | 1 + .../operators/reduce_ops/reduce_op_mlu.h | 1 + .../operators/reduce_ops/reduce_op_xpu.h | 1 + .../operators/reduce_ops/reduce_sum_op_xpu.cc | 1 + paddle/fluid/operators/renorm_op.cu | 11 +- .../fluid/operators/repeat_interleave_op.cc | 12 +- .../fluid/operators/repeat_interleave_op.cu | 62 +- paddle/fluid/operators/repeat_interleave_op.h | 4 +- paddle/fluid/operators/requantize_op.cc | 1 + paddle/fluid/operators/requantize_op.h | 1 + paddle/fluid/operators/rnn_op.cc | 1 + paddle/fluid/operators/roi_align_op.cc | 1 + paddle/fluid/operators/roi_align_op_xpu.cc | 1 + paddle/fluid/operators/roi_pool_op.cc | 1 + paddle/fluid/operators/row_conv_op.cc | 2 + paddle/fluid/operators/row_conv_op.cu | 29 +- paddle/fluid/operators/rrelu_op.cc | 1 + paddle/fluid/operators/run_program_op.h | 13 +- paddle/fluid/operators/sample_logits_op.cc | 2 + paddle/fluid/operators/sample_logits_op.cu | 29 +- paddle/fluid/operators/sample_logits_op.h | 1 + paddle/fluid/operators/save_combine_op.cc | 4 +- paddle/fluid/operators/save_combine_op.h | 1 + .../operators/save_load_combine_op_test.cc | 1 + paddle/fluid/operators/save_op.cc | 5 +- paddle/fluid/operators/save_op.h | 1 + paddle/fluid/operators/scale_op.cc | 1 + paddle/fluid/operators/scale_op_xpu.cc | 1 + paddle/fluid/operators/scatter_nd_add_op.cc | 1 + paddle/fluid/operators/scatter_op.cc | 1 + paddle/fluid/operators/scatter_op_xpu.cc | 11 +- paddle/fluid/operators/seed_op.cc | 17 +- paddle/fluid/operators/segment_pool_op.cc | 1 + .../sequence_ops/sequence_concat_op.cc | 1 + .../sequence_ops/sequence_concat_op.cu.cc | 1 + .../sequence_ops/sequence_concat_op.h | 3 +- .../operators/sequence_ops/sequence_conv_op.h | 1 + .../sequence_ops/sequence_conv_op_xpu.cc | 30 +- .../sequence_ops/sequence_enumerate_op.cu | 1 + .../sequence_ops/sequence_erase_op.cc | 1 + .../sequence_ops/sequence_erase_op.cu | 1 + .../sequence_ops/sequence_erase_op.h | 1 + .../sequence_ops/sequence_expand_as_op.cc | 1 + .../sequence_ops/sequence_expand_as_op.cu | 1 + .../sequence_ops/sequence_expand_as_op.h | 1 + .../sequence_ops/sequence_expand_op.cc | 10 +- .../sequence_ops/sequence_expand_op.cu | 1 + .../sequence_ops/sequence_mask_op.cc | 1 + .../operators/sequence_ops/sequence_pad_op.cc | 1 + .../operators/sequence_ops/sequence_pad_op.h | 1 + .../sequence_ops/sequence_pool_op.cc | 12 +- .../operators/sequence_ops/sequence_pool_op.h | 8 +- .../sequence_ops/sequence_reshape_op.cc | 2 + .../sequence_ops/sequence_reverse_op.h | 1 + .../sequence_ops/sequence_scatter_op.cc | 2 + .../sequence_ops/sequence_slice_op.cc | 1 + .../sequence_ops/sequence_softmax_op.cc | 1 + .../sequence_ops/sequence_softmax_op.cu | 18 +- .../sequence_topk_avg_pooling_op.cc | 6 +- .../sequence_topk_avg_pooling_op.h | 1 + .../sequence_ops/sequence_unpad_op.cc | 1 + .../sequence_ops/sequence_unpad_op.h | 1 + paddle/fluid/operators/set_value_op.cc | 1 - paddle/fluid/operators/set_value_op_npu.cc | 1 - paddle/fluid/operators/shape_op.cc | 1 + paddle/fluid/operators/shape_op_xpu.cc | 1 + paddle/fluid/operators/share_buffer_op.h | 5 +- paddle/fluid/operators/share_data_op.cc | 6 +- .../fluid/operators/shrink_rnn_memory_op.cc | 3 +- paddle/fluid/operators/shuffle_batch_op.cc | 2 + paddle/fluid/operators/shuffle_batch_op.h | 1 + paddle/fluid/operators/shuffle_channel_op.cc | 6 +- paddle/fluid/operators/shuffle_channel_op.cu | 16 +- paddle/fluid/operators/shuffle_channel_op.h | 1 + .../sigmoid_cross_entropy_with_logits_op.cc | 1 + paddle/fluid/operators/similarity_focus_op.h | 19 +- paddle/fluid/operators/slice_op.cc | 7 +- paddle/fluid/operators/slice_op.h | 1 + paddle/fluid/operators/slice_op_mlu.cc | 3 +- paddle/fluid/operators/slice_op_npu.cc | 1 - paddle/fluid/operators/slice_op_xpu.cc | 8 +- paddle/fluid/operators/smooth_l1_loss_op.cc | 1 + .../fluid/operators/smooth_l1_loss_op_npu.cc | 2 +- paddle/fluid/operators/softmax_op.cc | 5 +- .../softmax_with_cross_entropy_op_xpu.cc | 10 +- paddle/fluid/operators/solve_op.cc | 2 + paddle/fluid/operators/solve_op.h | 7 +- paddle/fluid/operators/space_to_depth_op.cc | 5 +- paddle/fluid/operators/sparse_attention_op.cc | 1 + paddle/fluid/operators/sparse_attention_op.cu | 60 +- paddle/fluid/operators/spectral_norm_op.h | 1 + paddle/fluid/operators/spectral_op.cc | 1 + paddle/fluid/operators/spectral_op.h | 1 + paddle/fluid/operators/split_op.cc | 1 + paddle/fluid/operators/split_op.h | 1 + paddle/fluid/operators/split_op_mlu.cc | 2 +- paddle/fluid/operators/split_op_xpu.cc | 3 +- paddle/fluid/operators/spp_op.cc | 1 + paddle/fluid/operators/spp_op.h | 1 + paddle/fluid/operators/stack_op.cc | 1 + paddle/fluid/operators/stack_op_npu.cc | 10 +- paddle/fluid/operators/stack_op_xpu.cc | 1 + paddle/fluid/operators/stft_op.cc | 1 + paddle/fluid/operators/stft_op.h | 1 - .../fluid/operators/strided_slice_op_npu.cc | 20 +- .../operators/string/faster_tokenizer_op.cc | 11 +- .../operators/string/faster_tokenizer_op.h | 5 +- paddle/fluid/operators/sum_op.cc | 21 +- paddle/fluid/operators/sum_op.cu | 5 +- paddle/fluid/operators/sum_op.h | 1 + paddle/fluid/operators/sum_op_mlu.cc | 2 +- paddle/fluid/operators/sum_op_xpu.cc | 3 +- paddle/fluid/operators/svd_helper.h | 25 +- paddle/fluid/operators/svd_op.cc | 2 + paddle/fluid/operators/svd_op.cu | 2 + paddle/fluid/operators/svd_op.h | 1 + .../fluid/operators/sync_batch_norm_op.cu.h | 102 +-- .../fluid/operators/sync_batch_norm_op_npu.cc | 5 +- paddle/fluid/operators/tdm_child_op.cc | 2 + paddle/fluid/operators/tdm_child_op.h | 1 + paddle/fluid/operators/tdm_sampler_op.cc | 2 + paddle/fluid/operators/tdm_sampler_op.h | 1 + paddle/fluid/operators/temporal_shift_op.cc | 3 +- paddle/fluid/operators/temporal_shift_op.cu | 26 +- .../operators/tensor_array_to_tensor_op.cc | 18 +- paddle/fluid/operators/tensor_formatter.cc | 1 + paddle/fluid/operators/tensor_to_string.h | 3 +- .../operators/tensorrt/tensorrt_engine_op.h | 10 +- .../tensorrt/tensorrt_engine_op_test.cc | 2 + paddle/fluid/operators/tile_op_npu.cc | 9 +- paddle/fluid/operators/tile_op_xpu.cc | 9 +- paddle/fluid/operators/top_k_function_cuda.h | 1 + paddle/fluid/operators/top_k_op.cc | 6 +- paddle/fluid/operators/top_k_op.cu | 8 +- paddle/fluid/operators/top_k_op.h | 1 + paddle/fluid/operators/top_k_op_mlu.cc | 2 +- paddle/fluid/operators/top_k_v2_op_npu.cc | 1 + paddle/fluid/operators/trace_op.cc | 29 +- paddle/fluid/operators/transfer_layout_op.cc | 8 +- paddle/fluid/operators/transpose_op.cc | 1 + paddle/fluid/operators/transpose_op.cu.h | 57 +- paddle/fluid/operators/transpose_op.h | 1 + paddle/fluid/operators/transpose_op_mlu.cc | 2 +- paddle/fluid/operators/transpose_op_xpu.cc | 3 +- paddle/fluid/operators/tree_conv_op.h | 1 + paddle/fluid/operators/tril_indices_op.cc | 1 + paddle/fluid/operators/tril_triu_op.cc | 2 +- .../operators/truncated_gaussian_random_op.cc | 3 +- .../truncated_gaussian_random_op_npu.cc | 3 +- .../truncated_gaussian_random_op_xpu.cc | 3 +- paddle/fluid/operators/unbind_op.cc | 2 + paddle/fluid/operators/unbind_op.h | 1 + paddle/fluid/operators/uniform_random_op.h | 7 +- .../fluid/operators/uniform_random_op_mlu.cc | 2 +- .../fluid/operators/uniform_random_op_xpu.cc | 3 +- .../fluid/operators/unique_consecutive_op.cc | 1 + .../fluid/operators/unique_consecutive_op.cu | 2 + .../fluid/operators/unique_consecutive_op.h | 1 + paddle/fluid/operators/unique_op.cc | 2 + paddle/fluid/operators/unique_op.h | 1 + .../fluid/operators/unique_with_counts_op.h | 1 + paddle/fluid/operators/unpool_op.cc | 1 + paddle/fluid/operators/unpool_op.h | 1 + paddle/fluid/operators/unsqueeze_op.cc | 7 +- paddle/fluid/operators/unsqueeze_op.h | 8 +- paddle/fluid/operators/unstack_op.cc | 1 + paddle/fluid/operators/utils.h | 1 + paddle/fluid/operators/var_conv_2d_op.cc | 2 + paddle/fluid/platform/aligned_vector.h | 20 +- paddle/fluid/platform/bfloat16_test.cc | 1 + paddle/fluid/platform/bfloat16_test.cu | 2 + paddle/fluid/platform/collective_helper.cc | 1 + paddle/fluid/platform/complex_test.cc | 2 + paddle/fluid/platform/complex_test.cu | 1 + paddle/fluid/platform/cpu_info.cc | 1 + .../platform/cuda_graph_with_memory_pool.cc | 1 + .../platform/cuda_graph_with_memory_pool.h | 24 +- paddle/fluid/platform/denormal.cc | 1 + .../platform/device/gpu/cuda/cuda_graph.cc | 1 + .../platform/device/gpu/cuda/cuda_graph.h | 2 +- .../platform/device/gpu/cuda/cuda_helper.h | 2 +- .../device/gpu/cuda/cudnn_helper_test.cc | 10 +- .../platform/device/gpu/cuda_helper_test.cu | 6 +- .../platform/device/gpu/cudnn_desc_test.cc | 4 +- paddle/fluid/platform/device/gpu/gpu_info.cc | 6 +- paddle/fluid/platform/device/gpu/gpu_info.h | 1 + .../platform/device/gpu/gpu_launch_config.h | 26 +- .../platform/device/gpu/gpu_primitives.h | 1 + .../platform/device/gpu/gpu_resource_pool.cc | 1 + paddle/fluid/platform/device/gpu/gpu_types.h | 2 + .../fluid/platform/device/gpu/nccl_helper.h | 3 +- .../device/gpu/rocm/miopen_helper_test.cc | 6 +- .../platform/device/gpu/rocm/rocm_helper.h | 2 +- .../fluid/platform/device/ipu/ipu_device.cc | 7 +- .../fluid/platform/device/ipu/ipu_executor.cc | 6 +- paddle/fluid/platform/device/ipu/ipu_info.h | 1 + .../fluid/platform/device/ipu/ipu_strategy.h | 10 +- .../popart_canonicalization/activation_ops.cc | 16 +- .../ipu/popart_canonicalization/math_ops.cc | 15 +- .../ipu/popart_canonicalization/nn_ops.cc | 45 +- .../ipu/popart_canonicalization/op_builder.cc | 5 +- .../ipu/popart_canonicalization/search_ops.cc | 8 +- .../ipu/popart_canonicalization/tensor_ops.cc | 113 +-- .../fluid/platform/device/mlu/cncl_helper.h | 2 +- .../platform/device/mlu/device_context.h | 1 + .../device/mlu/device_context_test.cc | 10 +- .../device/mlu/mlu_collective_helper.cc | 1 + paddle/fluid/platform/device/mlu/mlu_info.cc | 7 +- .../fluid/platform/device/mlu/mlu_stream.cc | 1 + .../platform/device/npu/ascend_npu_info.cc | 2 + .../fluid/platform/device/npu/dynload/hccl.h | 1 + .../fluid/platform/device/npu/enforce_npu.h | 3 +- .../fluid/platform/device/npu/hccl_helper.h | 6 +- .../device/npu/npu_collective_helper.cc | 1 + paddle/fluid/platform/device/npu/npu_info.cc | 7 +- .../platform/device/npu/npu_op_runner.cc | 1 - .../platform/device/npu/npu_resource_pool.cc | 1 + .../fluid/platform/device/npu/npu_stream.cc | 1 + .../fluid/platform/device/xpu/bkcl_helper.h | 3 +- .../fluid/platform/device/xpu/enforce_xpu.h | 1 - .../device/xpu/tests/enforce_xpu_test.cc | 1 + paddle/fluid/platform/device/xpu/xpu_info.cc | 3 +- paddle/fluid/platform/device/xpu/xpu_info.h | 1 + .../fluid/platform/device/xpu/xpu_op_list.cc | 3 +- paddle/fluid/platform/device_code.cc | 4 +- paddle/fluid/platform/device_code_test.cc | 2 + paddle/fluid/platform/device_context.cc | 2 + paddle/fluid/platform/device_context.h | 3 +- paddle/fluid/platform/device_context_test.cu | 11 +- .../fluid/platform/device_context_xpu_test.cc | 7 +- paddle/fluid/platform/device_event.h | 2 +- paddle/fluid/platform/device_event_base.cc | 1 + paddle/fluid/platform/device_event_base.h | 1 + paddle/fluid/platform/device_event_cpu.h | 1 + paddle/fluid/platform/device_event_gpu.cc | 2 +- paddle/fluid/platform/device_event_test.cc | 5 +- paddle/fluid/platform/device_tracer.cc | 7 +- paddle/fluid/platform/dynload/cublas.h | 1 + paddle/fluid/platform/dynload/cublasLt.h | 1 + paddle/fluid/platform/dynload/cuda_driver.cc | 1 + paddle/fluid/platform/dynload/cuda_driver.h | 1 + paddle/fluid/platform/dynload/cudnn.cc | 1 + paddle/fluid/platform/dynload/cudnn.h | 1 + paddle/fluid/platform/dynload/cufft.cc | 1 + paddle/fluid/platform/dynload/cufft.h | 1 + paddle/fluid/platform/dynload/cupti.h | 1 + paddle/fluid/platform/dynload/curand.h | 1 + paddle/fluid/platform/dynload/cusolver.h | 1 + paddle/fluid/platform/dynload/cusparse.h | 1 + .../fluid/platform/dynload/dynamic_loader.cc | 1 + paddle/fluid/platform/dynload/hiprtc.cc | 1 + paddle/fluid/platform/dynload/hiprtc.h | 2 + paddle/fluid/platform/dynload/miopen.cc | 1 + paddle/fluid/platform/dynload/miopen.h | 3 +- paddle/fluid/platform/dynload/mklml.h | 1 + paddle/fluid/platform/dynload/mklrt.h | 1 + paddle/fluid/platform/dynload/nccl.h | 1 + paddle/fluid/platform/dynload/nvjpeg.h | 1 + paddle/fluid/platform/dynload/nvrtc.cc | 1 + paddle/fluid/platform/dynload/nvrtc.h | 1 + paddle/fluid/platform/dynload/nvtx.h | 1 + paddle/fluid/platform/dynload/rccl.h | 1 + paddle/fluid/platform/dynload/rocblas.h | 1 + paddle/fluid/platform/dynload/rocm_driver.cc | 1 + paddle/fluid/platform/dynload/rocm_driver.h | 1 + paddle/fluid/platform/dynload/tensorrt.cc | 1 + paddle/fluid/platform/enforce.h | 3 + paddle/fluid/platform/enforce_test.cc | 125 ++- paddle/fluid/platform/errors.h | 4 +- paddle/fluid/platform/errors_test.cc | 3 +- paddle/fluid/platform/fast_divmod.h | 1 + paddle/fluid/platform/flags.h | 1 + paddle/fluid/platform/float16_test.cu | 1 + paddle/fluid/platform/gen_comm_id_helper.cc | 1 + paddle/fluid/platform/init_test.cc | 1 + paddle/fluid/platform/lock_guard_ptr.h | 1 + paddle/fluid/platform/mkldnn_reuse.h | 18 +- paddle/fluid/platform/monitor.h | 1 + paddle/fluid/platform/os_info.cc | 1 + paddle/fluid/platform/os_info_test.cc | 10 +- paddle/fluid/platform/profiler.cc | 3 +- .../platform/profiler/chrometracing_logger.cc | 11 +- .../platform/profiler/chrometracing_logger.h | 1 + paddle/fluid/platform/profiler/common_event.h | 1 + .../platform/profiler/cpu_utilization.cc | 26 +- .../fluid/platform/profiler/cpu_utilization.h | 2 + paddle/fluid/platform/profiler/cuda_tracer.cc | 2 + paddle/fluid/platform/profiler/cuda_tracer.h | 1 + .../platform/profiler/cupti_data_process.cc | 2 + .../platform/profiler/cupti_data_process.h | 1 + .../profiler/dump/deserialization_reader.cc | 2 + .../profiler/dump/serialization_logger.cc | 4 +- .../dump/test_serialization_logger.cc | 15 +- paddle/fluid/platform/profiler/event_node.cc | 1 + .../fluid/platform/profiler/event_python.cc | 1 + .../fluid/platform/profiler/event_tracing.h | 10 +- .../platform/profiler/host_event_recorder.h | 11 +- paddle/fluid/platform/profiler/host_tracer.cc | 1 + .../profiler/mlu/cnpapi_data_process.cc | 2 + .../fluid/platform/profiler/mlu/mlu_tracer.cc | 2 + paddle/fluid/platform/profiler/profiler.cc | 1 + paddle/fluid/platform/profiler/profiler.h | 1 + .../fluid/platform/profiler/profiler_test.cc | 7 +- .../platform/profiler/test_event_node.cc | 17 +- .../platform/profiler/trace_event_collector.h | 1 + paddle/fluid/platform/profiler/utils.h | 6 +- paddle/fluid/platform/profiler_helper.h | 4 +- paddle/fluid/platform/profiler_test.cc | 20 +- paddle/fluid/platform/resource_pool.h | 1 + paddle/fluid/platform/stream/cuda_stream.cc | 1 + .../fluid/platform/stream_callback_manager.cc | 1 + paddle/fluid/platform/transform.h | 1 + paddle/fluid/platform/transform_test.cu | 5 +- paddle/fluid/pybind/ascend_wrapper_py.cc | 44 +- paddle/fluid/pybind/bind_cost_model.cc | 1 + paddle/fluid/pybind/bind_fleet_executor.cc | 66 +- paddle/fluid/pybind/communication.cc | 38 +- paddle/fluid/pybind/communicator_py.cc | 5 +- paddle/fluid/pybind/compatible.cc | 14 +- paddle/fluid/pybind/const_value.cc | 1 + paddle/fluid/pybind/crypto.cc | 11 +- paddle/fluid/pybind/cuda_streams_py.cc | 215 ++--- paddle/fluid/pybind/data_set_py.cc | 1 + paddle/fluid/pybind/distributed_py.cc | 299 +++--- paddle/fluid/pybind/eager.cc | 148 +-- paddle/fluid/pybind/eager.h | 4 +- paddle/fluid/pybind/eager_custom_python_api.h | 1 + paddle/fluid/pybind/eager_functions.cc | 5 +- paddle/fluid/pybind/eager_method.cc | 29 +- .../pybind/eager_op_function_generator.cc | 3 +- paddle/fluid/pybind/eager_py_layer.cc | 35 +- paddle/fluid/pybind/eager_utils.cc | 4 + paddle/fluid/pybind/eager_utils.h | 7 +- paddle/fluid/pybind/exception.cc | 1 + paddle/fluid/pybind/fleet_py.cc | 17 +- paddle/fluid/pybind/fleet_wrapper_py.cc | 6 +- paddle/fluid/pybind/generator_py.cc | 3 +- paddle/fluid/pybind/gloo_context_py.cc | 60 +- paddle/fluid/pybind/gloo_context_py.h | 1 + paddle/fluid/pybind/imperative.cc | 854 +++++++++--------- paddle/fluid/pybind/imperative.h | 1 + paddle/fluid/pybind/inference_api.cc | 38 +- paddle/fluid/pybind/io.cc | 1 + paddle/fluid/pybind/io.h | 1 + paddle/fluid/pybind/ir.cc | 40 +- paddle/fluid/pybind/ir.h | 1 + paddle/fluid/pybind/op_function_common.cc | 3 +- paddle/fluid/pybind/protobuf.cc | 86 +- paddle/fluid/pybind/pybind.cc | 817 +++++++++-------- paddle/fluid/pybind/reader_py.cc | 165 ++-- paddle/fluid/pybind/slice_utils.h | 1 + paddle/fluid/pybind/tensor_py.h | 2 + paddle/fluid/pybind/uva_utils.h | 1 + paddle/fluid/string/pretty_log.h | 2 +- paddle/infrt/api/infrt_api.cc | 2 + paddle/infrt/backends/host/phi_context.h | 4 +- .../tensorrt/plugin/pool_op_plugin.cu | 4 +- .../backends/tensorrt/plugin/pool_op_plugin.h | 8 +- .../backends/tensorrt/test_trt_engine.cc | 4 +- paddle/infrt/backends/tensorrt/trt_engine.cc | 1 + paddle/infrt/backends/tensorrt/trt_engine.h | 1 + paddle/infrt/backends/tensorrt/trt_options.h | 4 +- paddle/infrt/common/global.h | 1 + paddle/infrt/common/memory.h | 2 +- paddle/infrt/dialect/dense_tensor.h | 3 + paddle/infrt/dialect/diagnostic_utils.cc | 1 + .../infrt/dialect/infrt/ir/infrt_dialect.cc | 2 + paddle/infrt/dialect/infrt/ir/infrt_dialect.h | 2 +- .../dialect/infrt/pass/infrt_op_fuse_pass.cc | 1 + paddle/infrt/dialect/init_dialects.cc | 2 - paddle/infrt/dialect/mlir_loader.cc | 2 +- paddle/infrt/dialect/mlir_loader.h | 2 +- paddle/infrt/dialect/opt.cc | 1 + .../infrt/dialect/pd/pass/pd_op_fuse_pass.cc | 1 + .../infrt/dialect/phi/ir/infrt_phi_tensor.h | 2 + paddle/infrt/dialect/phi/ir/phi_base.cc | 1 + paddle/infrt/dialect/phi/ir/phi_base.h | 2 +- paddle/infrt/dialect/phi/ir/phi_kernels.cc | 2 +- paddle/infrt/dialect/phi/ir/phi_kernels.h | 2 - .../infrt/dialect/phi/pass/kernel_op_desc.cc | 2 + .../infrt/dialect/phi/pass/kernel_op_desc.h | 1 + .../dialect/phi/pass/kernel_op_desc_test.cc | 2 + .../dialect/phi/pass/phi_op_convert_pass.cc | 1 + .../dialect/phi/pass/phi_op_convert_pass.h | 1 + .../dialect/phi/pass/proto_arg_map_context.h | 2 + paddle/infrt/dialect/phi/phi_exec.cc | 4 +- paddle/infrt/dialect/print_ir.cc | 5 +- paddle/infrt/dialect/tensor_shape.cc | 1 - paddle/infrt/dialect/tensorrt/convert.h | 1 + paddle/infrt/dialect/tensorrt/trt_exec.cc | 5 + .../dialect/tensorrt/trt_graph_fuse_pass.cc | 1 + .../dialect/tensorrt/trt_graph_split_pass.cc | 1 + .../dialect/tensorrt/trt_op_teller_pass.cc | 1 + paddle/infrt/dialect/tensorrt/trt_ops.cc | 3 + paddle/infrt/dialect/tensorrt/trt_ops.h | 1 + .../dialect/tensorrt/trt_type_convert_pass.cc | 1 + paddle/infrt/host_context/core_runtime.cc | 3 +- paddle/infrt/host_context/core_runtime.h | 2 +- paddle/infrt/host_context/kernel_registry.cc | 8 +- paddle/infrt/host_context/mlir_exec.cc | 1 + .../host_context/mlir_program_executor.h | 2 +- .../host_context/mlir_to_runtime_translate.cc | 6 +- paddle/infrt/host_context/op_executable.cc | 1 + paddle/infrt/host_context/op_executable.h | 1 + paddle/infrt/host_context/paddle_mlir.h | 1 + .../host_context/paddle_mlir_converter.cc | 4 +- paddle/infrt/host_context/symbol_table.h | 3 +- paddle/infrt/host_context/value.h | 8 +- .../infrt/kernel/phi/dense_tensor_kernels.cc | 2 + .../infershaped_kernel_launcher.cc | 1 + .../phi/infershaped/infershaped_utils.h | 1 + .../phi/infershaped/phi_kernel_launcher.h | 1 + paddle/infrt/kernel/tensorrt/trt_kernels.cc | 2 + paddle/infrt/kernel/tensorrt/trt_kernels.h | 1 - paddle/infrt/kernel/test_kernels.cc | 10 +- paddle/infrt/paddle/scope.h | 3 +- paddle/infrt/support/type_traits.h | 3 +- paddle/infrt/tests/models/test_abs.cc | 2 + paddle/phi/api/ext/op_meta_info.h | 70 +- paddle/phi/api/lib/api_custom_impl.cc | 3 +- paddle/phi/api/lib/backend_set.h | 5 +- paddle/phi/api/lib/data_transform.cc | 2 + paddle/phi/api/lib/sparse_api_custom_impl.cc | 1 + paddle/phi/api/lib/tensor.cc | 2 + paddle/phi/api/lib/tensor_copy.cc | 1 + paddle/phi/api/lib/tensor_method.cc | 2 + paddle/phi/api/lib/utils/tensor_utils.h | 1 - paddle/phi/backends/callback_manager.cc | 5 +- paddle/phi/backends/custom/custom_context.h | 1 + .../phi/backends/custom/custom_device_test.cc | 1 + paddle/phi/backends/device_base.cc | 6 +- paddle/phi/backends/device_ext.h | 4 +- paddle/phi/backends/device_manager.h | 3 +- paddle/phi/backends/dynload/cublas.h | 1 + paddle/phi/backends/dynload/cublasLt.h | 1 + paddle/phi/backends/dynload/cuda_driver.h | 1 + paddle/phi/backends/dynload/cudnn.cc | 1 + paddle/phi/backends/dynload/cudnn.h | 1 + paddle/phi/backends/dynload/cufft.cc | 1 + paddle/phi/backends/dynload/cufft.h | 1 + paddle/phi/backends/dynload/cupti.h | 1 + paddle/phi/backends/dynload/curand.h | 1 + paddle/phi/backends/dynload/cusolver.h | 1 + paddle/phi/backends/dynload/cusparse.h | 1 + paddle/phi/backends/dynload/hiprand.h | 2 +- paddle/phi/backends/dynload/hiprtc.h | 2 + paddle/phi/backends/dynload/lapack.cc | 1 + paddle/phi/backends/dynload/lapack.h | 1 + paddle/phi/backends/dynload/miopen.cc | 1 + paddle/phi/backends/dynload/miopen.h | 3 +- paddle/phi/backends/dynload/mklml.h | 1 + paddle/phi/backends/dynload/mklrt.h | 1 + paddle/phi/backends/dynload/nccl.h | 1 + paddle/phi/backends/dynload/nvjpeg.h | 1 + paddle/phi/backends/dynload/nvrtc.h | 1 + paddle/phi/backends/dynload/nvtx.h | 1 + paddle/phi/backends/dynload/port.h | 2 + paddle/phi/backends/dynload/rccl.h | 1 + paddle/phi/backends/dynload/rocblas.h | 1 + paddle/phi/backends/dynload/rocm_driver.h | 1 + paddle/phi/backends/dynload/tensorrt.cc | 1 + paddle/phi/backends/event.cc | 1 + paddle/phi/backends/gpu/cuda/cuda_helper.h | 2 +- paddle/phi/backends/gpu/gpu_context.cc | 1 - paddle/phi/backends/gpu/gpu_context.h | 1 + paddle/phi/backends/gpu/gpu_info.h | 1 + paddle/phi/backends/gpu/gpu_launch_config.h | 8 +- paddle/phi/backends/gpu/gpu_resources.h | 1 + paddle/phi/backends/gpu/rocm/rocm_helper.h | 2 +- paddle/phi/backends/gpu/rocm/rocm_info.cc | 1 + paddle/phi/backends/stream.cc | 1 + paddle/phi/backends/xpu/enforce_xpu.h | 3 +- paddle/phi/backends/xpu/xpu_context.cc | 5 +- paddle/phi/backends/xpu/xpu_context.h | 6 +- paddle/phi/backends/xpu/xpu_header.h | 1 - paddle/phi/backends/xpu/xpu_info.h | 1 + paddle/phi/common/data_type.h | 3 +- paddle/phi/common/int_array.cc | 3 +- paddle/phi/common/place.cc | 1 - paddle/phi/common/scalar.cc | 5 +- paddle/phi/core/compat/op_utils.h | 1 - paddle/phi/core/ddim.h | 8 +- paddle/phi/core/dense_tensor.h | 2 +- paddle/phi/core/dense_tensor_impl.cc | 7 +- paddle/phi/core/device_context.cc | 1 + paddle/phi/core/device_context.h | 32 +- paddle/phi/core/enforce.cc | 3 +- paddle/phi/core/hostdevice.h | 1 + paddle/phi/core/kernel_factory.cc | 1 - paddle/phi/core/kernel_registry.h | 222 +++-- paddle/phi/core/kernel_utils.h | 5 +- paddle/phi/core/meta_tensor.h | 3 +- paddle/phi/core/string_tensor.cc | 1 + paddle/phi/core/tensor_base.cc | 1 + paddle/phi/core/utils/intrusive_ptr.h | 1 + paddle/phi/infermeta/binary.cc | 1 + paddle/phi/infermeta/multiary.cc | 2 + paddle/phi/infermeta/ternary.cc | 1 + paddle/phi/kernels/assign_kernel.cc | 3 +- paddle/phi/kernels/auc_kernel.h | 1 + paddle/phi/kernels/autotune/auto_tune_base.h | 1 + paddle/phi/kernels/autotune/auto_tune_test.cu | 5 +- paddle/phi/kernels/autotune/cache.cc | 2 + paddle/phi/kernels/autotune/cache.h | 1 + paddle/phi/kernels/autotune/cache_test.cc | 3 + paddle/phi/kernels/autotune/gpu_timer_test.cu | 2 + paddle/phi/kernels/autotune/switch_autotune.h | 1 + paddle/phi/kernels/batch_norm_grad_kernel.h | 1 + .../kernels/broadcast_tensors_grad_kernel.h | 1 + paddle/phi/kernels/broadcast_tensors_kernel.h | 1 + .../phi/kernels/channel_shuffle_grad_kernel.h | 1 + paddle/phi/kernels/channel_shuffle_kernel.h | 1 + paddle/phi/kernels/conv_kernel.cc | 7 +- .../phi/kernels/conv_transpose_grad_kernel.h | 1 + paddle/phi/kernels/conv_transpose_kernel.h | 1 + paddle/phi/kernels/cpu/abs_kernel.cc | 1 + paddle/phi/kernels/cpu/accuracy_kernel.cc | 1 + paddle/phi/kernels/cpu/activation_kernel.cc | 1 + paddle/phi/kernels/cpu/adagrad_kernel.cc | 1 + paddle/phi/kernels/cpu/allclose_kernel.cc | 1 + paddle/phi/kernels/cpu/arange_kernel.cc | 1 + paddle/phi/kernels/cpu/atan2_grad_kernel.cc | 3 +- paddle/phi/kernels/cpu/atan2_kernel.cc | 3 +- .../phi/kernels/cpu/batch_norm_grad_kernel.cc | 3 +- paddle/phi/kernels/cpu/batch_norm_kernel.cc | 4 +- .../phi/kernels/cpu/bce_loss_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/bce_loss_kernel.cc | 1 + paddle/phi/kernels/cpu/bernoulli_kernel.cc | 2 + .../bilinear_tensor_product_grad_kernel.cc | 2 +- .../cpu/bilinear_tensor_product_kernel.cc | 2 +- .../cpu/broadcast_tensors_grad_kernel.cc | 1 + .../kernels/cpu/broadcast_tensors_kernel.cc | 2 +- paddle/phi/kernels/cpu/cast_kernel.cc | 2 +- .../cpu/channel_shuffle_grad_kernel.cc | 2 +- .../phi/kernels/cpu/channel_shuffle_kernel.cc | 2 +- .../kernels/cpu/cholesky_solve_grad_kernel.cc | 3 +- .../phi/kernels/cpu/cholesky_solve_kernel.cc | 3 +- paddle/phi/kernels/cpu/clip_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/clip_kernel.cc | 1 + paddle/phi/kernels/cpu/compare_kernel.cc | 2 +- paddle/phi/kernels/cpu/complex_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/complex_kernel.cc | 2 +- .../phi/kernels/cpu/conv_grad_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/conv_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/conv_kernel.cc | 2 +- .../kernels/cpu/conv_transpose_grad_kernel.cc | 2 +- .../phi/kernels/cpu/conv_transpose_kernel.cc | 2 +- .../phi/kernels/cpu/cross_entropy_kernel.cc | 3 +- paddle/phi/kernels/cpu/cross_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/cross_kernel.cc | 2 +- paddle/phi/kernels/cpu/cumprod_kernel.cc | 1 + .../cpu/deformable_conv_grad_kernel.cc | 40 +- paddle/phi/kernels/cpu/diag_grad_kernel.cc | 1 + .../phi/kernels/cpu/diagonal_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/diagonal_kernel.cc | 1 + paddle/phi/kernels/cpu/digamma_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/digamma_kernel.cc | 1 + paddle/phi/kernels/cpu/dist_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/dist_kernel.cc | 2 +- paddle/phi/kernels/cpu/dot_grad_kernel.cc | 5 +- paddle/phi/kernels/cpu/dropout_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/dropout_kernel.cc | 1 + paddle/phi/kernels/cpu/eigh_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/eigh_kernel.cc | 2 +- paddle/phi/kernels/cpu/einsum_kernel.cc | 1 + paddle/phi/kernels/cpu/elementwise.h | 3 +- .../phi/kernels/cpu/elementwise_add_kernel.cc | 2 +- .../kernels/cpu/elementwise_divide_kernel.cc | 2 +- paddle/phi/kernels/cpu/elementwise_kernel.cc | 2 +- .../cpu/elementwise_multiply_kernel.cc | 2 +- .../cpu/elementwise_subtract_kernel.cc | 2 +- .../phi/kernels/cpu/embedding_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/embedding_kernel.cc | 2 +- paddle/phi/kernels/cpu/erf_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/erf_kernel.cc | 1 + paddle/phi/kernels/cpu/erfinv_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/erfinv_kernel.cc | 2 +- .../phi/kernels/cpu/expand_as_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/expand_as_kernel.cc | 2 +- paddle/phi/kernels/cpu/expand_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/expand_kernel.cc | 1 + paddle/phi/kernels/cpu/eye_kernel.cc | 2 +- .../kernels/cpu/frobenius_norm_grad_kernel.cc | 2 +- .../phi/kernels/cpu/frobenius_norm_kernel.cc | 2 +- paddle/phi/kernels/cpu/full_kernel.cc | 1 - .../phi/kernels/cpu/gather_nd_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/gather_nd_kernel.cc | 1 + paddle/phi/kernels/cpu/gather_tree_kernel.cc | 1 + .../phi/kernels/cpu/gaussian_random_kernel.cc | 3 +- paddle/phi/kernels/cpu/gelu_kernel.cc | 2 + .../phi/kernels/cpu/graph_reindex_kernel.cc | 4 +- .../cpu/graph_sample_neighbors_kernel.cc | 4 +- .../cpu/graph_send_recv_grad_kernel.cc | 2 +- .../phi/kernels/cpu/graph_send_recv_kernel.cc | 2 +- .../kernels/cpu/grid_sample_grad_kernel.cc | 5 +- .../kernels/cpu/gumbel_softmax_grad_kernel.cc | 2 +- .../phi/kernels/cpu/gumbel_softmax_kernel.cc | 2 +- paddle/phi/kernels/cpu/histogram_kernel.cc | 1 + .../phi/kernels/cpu/huber_loss_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/huber_loss_kernel.cc | 1 + .../kernels/cpu/index_sample_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/index_sample_kernel.cc | 2 + .../kernels/cpu/instance_norm_grad_kernel.cc | 12 +- .../phi/kernels/cpu/instance_norm_kernel.cc | 1 + .../kernels/cpu/interpolate_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/isclose_kernel.cc | 1 + .../phi/kernels/cpu/kldiv_loss_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/kldiv_loss_kernel.cc | 1 + .../kernels/cpu/label_smooth_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/label_smooth_kernel.cc | 1 + .../phi/kernels/cpu/layer_norm_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/layer_norm_kernel.cc | 1 + paddle/phi/kernels/cpu/lerp_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/lerp_kernel.cc | 1 + paddle/phi/kernels/cpu/lgamma_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/lgamma_kernel.cc | 1 + .../kernels/cpu/log_softmax_grad_kernel.cc | 7 +- paddle/phi/kernels/cpu/log_softmax_kernel.cc | 35 +- paddle/phi/kernels/cpu/logsumexp_kernel.cc | 1 - paddle/phi/kernels/cpu/matmul_grad_kernel.cc | 1 - paddle/phi/kernels/cpu/matmul_kernel.cc | 3 +- .../kernels/cpu/matrix_power_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/matrix_power_kernel.cc | 2 +- paddle/phi/kernels/cpu/matrix_rank_kernel.cc | 2 +- .../phi/kernels/cpu/matrix_rank_tol_kernel.cc | 1 + paddle/phi/kernels/cpu/maxout_grad_kernel.cc | 3 +- paddle/phi/kernels/cpu/maxout_kernel.cc | 3 +- .../phi/kernels/cpu/meshgrid_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/meshgrid_kernel.cc | 2 +- paddle/phi/kernels/cpu/momentum_kernel.cc | 1 + .../phi/kernels/cpu/multi_dot_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/multi_dot_kernel.cc | 2 +- .../phi/kernels/cpu/multiplex_grad_kernel.cc | 1 - paddle/phi/kernels/cpu/mv_kernel.cc | 1 - .../phi/kernels/cpu/nll_loss_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/nll_loss_kernel.cc | 1 + paddle/phi/kernels/cpu/norm_grad_kernel.cc | 8 +- paddle/phi/kernels/cpu/norm_kernel.cc | 1 + paddle/phi/kernels/cpu/one_hot_kernel.cc | 1 + paddle/phi/kernels/cpu/p_norm_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/p_norm_kernel.cc | 1 + .../kernels/cpu/pixel_shuffle_grad_kernel.cc | 2 +- .../phi/kernels/cpu/pixel_shuffle_kernel.cc | 2 +- .../cpu/pixel_unshuffle_grad_kernel.cc | 2 +- .../phi/kernels/cpu/pixel_unshuffle_kernel.cc | 2 +- paddle/phi/kernels/cpu/poisson_kernel.cc | 3 +- paddle/phi/kernels/cpu/pool_grad_kernel.cc | 3 +- paddle/phi/kernels/cpu/pool_kernel.cc | 3 +- .../phi/kernels/cpu/psroi_pool_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/psroi_pool_kernel.cc | 1 + paddle/phi/kernels/cpu/qr_kernel.cc | 4 +- paddle/phi/kernels/cpu/reduce.h | 3 +- .../phi/kernels/cpu/reduce_sum_grad_kernel.cc | 1 - paddle/phi/kernels/cpu/rmsprop_kernel.cc | 1 + paddle/phi/kernels/cpu/rnn_functor.h | 14 +- paddle/phi/kernels/cpu/rnn_grad_kernel.cc | 7 +- paddle/phi/kernels/cpu/rnn_kernel.cc | 3 +- paddle/phi/kernels/cpu/roi_align_kernel.cc | 12 +- paddle/phi/kernels/cpu/scatter_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/scatter_kernel.cc | 1 + .../kernels/cpu/scatter_nd_add_grad_kernel.cc | 1 + .../phi/kernels/cpu/scatter_nd_add_kernel.cc | 1 + .../kernels/cpu/segment_pool_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/segment_pool_kernel.cc | 2 +- paddle/phi/kernels/cpu/selu_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/sgd_kernel.cc | 1 + paddle/phi/kernels/cpu/sign_kernel.cc | 2 +- paddle/phi/kernels/cpu/size_kernel.cc | 2 +- paddle/phi/kernels/cpu/slice_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/slice_kernel.cc | 2 +- .../sparse_weight_embedding_grad_kernel.cc | 2 +- .../cpu/sparse_weight_embedding_kernel.cc | 5 +- paddle/phi/kernels/cpu/split_kernel.cc | 1 - .../kernels/cpu/temporal_shift_grad_kernel.cc | 1 + .../phi/kernels/cpu/temporal_shift_kernel.cc | 1 + .../phi/kernels/cpu/transpose_grad_kernel.cc | 1 + .../phi/kernels/cpu/tril_triu_grad_kernel.cc | 3 +- paddle/phi/kernels/cpu/tril_triu_kernel.cc | 3 +- paddle/phi/kernels/cpu/trunc_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/trunc_kernel.cc | 3 +- paddle/phi/kernels/cpu/unfold_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/unfold_kernel.cc | 1 + .../phi/kernels/cpu/uniform_random_kernel.cc | 1 + paddle/phi/kernels/cpu/unique_kernel.cc | 1 + paddle/phi/kernels/cpu/unstack_grad_kernel.cc | 1 + paddle/phi/kernels/cpu/unstack_kernel.cc | 1 + .../phi/kernels/cpu/viterbi_decode_kernel.cc | 6 +- paddle/phi/kernels/cpu/warpctc_grad_kernel.cc | 2 +- paddle/phi/kernels/cpu/warpctc_kernel.cc | 2 +- paddle/phi/kernels/cpu/yolo_box_kernel.cc | 1 + .../kernels/cpu/yolov3_loss_grad_kernel.cc | 4 +- paddle/phi/kernels/cpu/yolov3_loss_kernel.cc | 4 +- paddle/phi/kernels/cumprod_grad_kernel.h | 2 +- paddle/phi/kernels/cumprod_kernel.h | 2 +- paddle/phi/kernels/diagonal_kernel.h | 2 +- paddle/phi/kernels/digamma_grad_kernel.h | 2 +- paddle/phi/kernels/digamma_kernel.h | 2 +- paddle/phi/kernels/empty_kernel.cc | 3 +- paddle/phi/kernels/expand_kernel.h | 2 +- paddle/phi/kernels/flatten_grad_kernel.cc | 1 + paddle/phi/kernels/flatten_kernel.cc | 1 + .../phi/kernels/frobenius_norm_grad_kernel.h | 1 + paddle/phi/kernels/frobenius_norm_kernel.h | 1 + paddle/phi/kernels/full_kernel.h | 1 - paddle/phi/kernels/funcs/activation_functor.h | 37 +- paddle/phi/kernels/funcs/adam_functors.h | 6 +- paddle/phi/kernels/funcs/aligned_vector.h | 21 +- paddle/phi/kernels/funcs/blas/blas_impl.cu.h | 5 +- paddle/phi/kernels/funcs/broadcast_function.h | 32 +- .../kernels/funcs/concat_and_split_functor.cu | 3 +- .../kernels/funcs/deformable_conv_functor.cc | 10 +- .../kernels/funcs/deformable_conv_functor.cu | 57 +- .../funcs/detail/activation_functions.h | 2 + paddle/phi/kernels/funcs/detail/avx_mathfun.h | 10 +- .../phi/kernels/funcs/detail/gru_cpu_kernel.h | 1 + .../phi/kernels/funcs/detail/gru_gpu_kernel.h | 1 + paddle/phi/kernels/funcs/detail/gru_kernel.h | 1 + .../kernels/funcs/detail/lstm_cpu_kernel.h | 1 + .../kernels/funcs/detail/lstm_gpu_kernel.h | 76 +- paddle/phi/kernels/funcs/detail/lstm_kernel.h | 1 + paddle/phi/kernels/funcs/diagonal.h | 1 + .../phi/kernels/funcs/distribution_helper.h | 7 +- paddle/phi/kernels/funcs/eigen/extensions.h | 1 - paddle/phi/kernels/funcs/elementwise_base.h | 21 +- .../phi/kernels/funcs/elementwise_functor.h | 1 + .../phi/kernels/funcs/elementwise_grad_base.h | 227 +++-- paddle/phi/kernels/funcs/fc_functor.cc | 17 +- paddle/phi/kernels/funcs/fc_functor.cu | 13 +- paddle/phi/kernels/funcs/fc_functor.h | 1 + paddle/phi/kernels/funcs/gather.cu.h | 43 +- paddle/phi/kernels/funcs/gather.h | 1 + paddle/phi/kernels/funcs/gru_compute.cu | 73 +- paddle/phi/kernels/funcs/inclusive_scan.h | 29 +- paddle/phi/kernels/funcs/index_impl.cu.h | 12 +- .../kernels/funcs/lapack/lapack_function.cc | 1 + paddle/phi/kernels/funcs/math_function.cc | 1 + paddle/phi/kernels/funcs/math_function.cu | 17 +- paddle/phi/kernels/funcs/math_function_impl.h | 1 + paddle/phi/kernels/funcs/matrix_inverse.cu.cc | 3 +- paddle/phi/kernels/funcs/matrix_inverse.h | 2 +- paddle/phi/kernels/funcs/padding.h | 1 + paddle/phi/kernels/funcs/pooling.cc | 79 +- paddle/phi/kernels/funcs/pooling.cu | 504 +++++------ paddle/phi/kernels/funcs/pooling.h | 1 + paddle/phi/kernels/funcs/reduce_function.h | 171 ++-- paddle/phi/kernels/funcs/scatter.cu.h | 17 +- paddle/phi/kernels/funcs/scatter.h | 2 +- paddle/phi/kernels/funcs/segment_pooling.cu | 125 ++- paddle/phi/kernels/funcs/segment_pooling.h | 1 + paddle/phi/kernels/funcs/select_impl.cu.h | 67 +- paddle/phi/kernels/funcs/sequence2batch.h | 1 + paddle/phi/kernels/funcs/slice_utils.h | 1 + .../kernels/funcs/sparse/flatten_indices.h | 1 + paddle/phi/kernels/funcs/sparse/scatter.cu.h | 2 +- paddle/phi/kernels/gpu/abs_kernel.cu | 1 + paddle/phi/kernels/gpu/accuracy_kernel.cu | 21 +- .../phi/kernels/gpu/activation_grad_kernel.cu | 6 +- paddle/phi/kernels/gpu/activation_kernel.cu | 6 +- paddle/phi/kernels/gpu/adadelta_kernel.cu | 3 +- paddle/phi/kernels/gpu/adagrad_kernel.cu | 28 +- paddle/phi/kernels/gpu/adam_kernel.cu | 4 +- paddle/phi/kernels/gpu/adamax_kernel.cu | 3 +- paddle/phi/kernels/gpu/adamw_kernel.cu | 4 +- paddle/phi/kernels/gpu/add_n_kernel.cu | 8 +- paddle/phi/kernels/gpu/addmm_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/addmm_kernel.cu | 3 +- paddle/phi/kernels/gpu/allclose_kernel.cu | 3 +- paddle/phi/kernels/gpu/arange_kernel.cu | 3 +- paddle/phi/kernels/gpu/arg_min_max_kernel.cu | 46 +- paddle/phi/kernels/gpu/argsort_grad_kernel.cu | 4 +- paddle/phi/kernels/gpu/argsort_kernel.cu | 4 +- paddle/phi/kernels/gpu/atan2_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/atan2_kernel.cu | 3 +- paddle/phi/kernels/gpu/auc_kernel.cu | 3 +- .../phi/kernels/gpu/batch_norm_grad_kernel.cu | 272 +++--- paddle/phi/kernels/gpu/batch_norm_kernel.cu | 127 ++- .../phi/kernels/gpu/bce_loss_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/bce_loss_kernel.cu | 3 +- .../bilinear_tensor_product_grad_kernel.cu | 3 +- .../gpu/bilinear_tensor_product_kernel.cu | 3 +- paddle/phi/kernels/gpu/bincount_kernel.cu | 27 +- .../gpu/broadcast_tensors_grad_kernel.cu | 4 +- .../kernels/gpu/broadcast_tensors_kernel.cu | 5 +- paddle/phi/kernels/gpu/cast_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/cast_kernel.cu | 3 +- .../gpu/channel_shuffle_grad_kernel.cu | 5 +- .../phi/kernels/gpu/channel_shuffle_kernel.cu | 5 +- .../phi/kernels/gpu/cholesky_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/cholesky_kernel.cu | 5 +- .../kernels/gpu/cholesky_solve_grad_kernel.cu | 3 +- .../phi/kernels/gpu/cholesky_solve_kernel.cu | 3 +- paddle/phi/kernels/gpu/clip_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/clip_kernel.cu | 3 +- paddle/phi/kernels/gpu/complex_grad_kernel.cu | 5 +- paddle/phi/kernels/gpu/complex_kernel.cu | 5 +- paddle/phi/kernels/gpu/concat_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/concat_kernel.cu | 3 +- .../phi/kernels/gpu/conv_grad_grad_kernel.cu | 5 +- paddle/phi/kernels/gpu/conv_grad_kernel.cu | 5 +- paddle/phi/kernels/gpu/conv_kernel.cu | 5 +- .../kernels/gpu/conv_transpose_grad_kernel.cu | 5 +- .../phi/kernels/gpu/conv_transpose_kernel.cu | 5 +- paddle/phi/kernels/gpu/copy_kernel.cu | 3 +- .../kernels/gpu/cross_entropy_grad_kernel.cu | 49 +- .../phi/kernels/gpu/cross_entropy_kernel.cu | 121 ++- paddle/phi/kernels/gpu/cross_grad_kernel.cu | 5 +- paddle/phi/kernels/gpu/cross_kernel.cu | 5 +- paddle/phi/kernels/gpu/cumprod_grad_kernel.cu | 4 +- paddle/phi/kernels/gpu/cumprod_kernel.cu | 3 +- paddle/phi/kernels/gpu/cumsum_kernel.cu | 18 +- .../gpu/deformable_conv_grad_kernel.cu | 145 ++- .../phi/kernels/gpu/deformable_conv_kernel.cu | 3 +- paddle/phi/kernels/gpu/depthwise_conv.h | 361 ++++---- .../phi/kernels/gpu/depthwise_conv_kernel.cu | 7 +- .../kernels/gpu/determinant_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/determinant_kernel.cu | 3 +- paddle/phi/kernels/gpu/diag_grad_kernel.cu | 23 +- paddle/phi/kernels/gpu/diag_kernel.cu | 3 +- paddle/phi/kernels/gpu/dist_grad_kernel.cu | 5 +- paddle/phi/kernels/gpu/dist_kernel.cu | 5 +- paddle/phi/kernels/gpu/dot_grad_kernel.cu | 8 +- paddle/phi/kernels/gpu/dot_kernel.cu | 3 +- paddle/phi/kernels/gpu/dropout_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/dropout_kernel.cu | 3 +- paddle/phi/kernels/gpu/eigh_grad_kernel.cu | 5 +- paddle/phi/kernels/gpu/eigh_kernel.cu | 5 +- paddle/phi/kernels/gpu/einsum_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/einsum_kernel.cu | 3 +- .../gpu/elementwise_add_grad_kernel.cu | 3 +- .../gpu/elementwise_divide_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/elementwise_grad.h | 35 +- .../kernels/gpu/elementwise_grad_kernel.cu | 3 +- .../gpu/elementwise_multiply_grad_kernel.cu | 3 +- .../gpu/elementwise_subtract_grad_kernel.cu | 3 +- .../phi/kernels/gpu/embedding_grad_kernel.cu | 10 +- paddle/phi/kernels/gpu/embedding_kernel.cu | 5 +- paddle/phi/kernels/gpu/erfinv_grad_kernel.cu | 5 +- paddle/phi/kernels/gpu/erfinv_kernel.cu | 5 +- .../phi/kernels/gpu/expand_as_grad_kernel.cu | 5 +- paddle/phi/kernels/gpu/expand_as_kernel.cu | 5 +- paddle/phi/kernels/gpu/eye_kernel.cu | 5 +- paddle/phi/kernels/gpu/flip_kernel.cu | 21 +- .../kernels/gpu/frobenius_norm_grad_kernel.cu | 3 +- .../phi/kernels/gpu/frobenius_norm_kernel.cu | 3 +- paddle/phi/kernels/gpu/full_kernel.cu | 3 +- paddle/phi/kernels/gpu/gather_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/gather_kernel.cu | 3 +- paddle/phi/kernels/gpu/gather_tree_kernel.cu | 4 +- .../phi/kernels/gpu/gaussian_random_kernel.cu | 7 +- paddle/phi/kernels/gpu/gelu_funcs.h | 16 +- paddle/phi/kernels/gpu/gelu_grad_kernel.cu | 2 + paddle/phi/kernels/gpu/gelu_kernel.cu | 4 + paddle/phi/kernels/gpu/graph_reindex_funcs.h | 3 +- .../phi/kernels/gpu/graph_reindex_kernel.cu | 24 +- .../gpu/graph_sample_neighbors_kernel.cu | 73 +- .../phi/kernels/gpu/graph_send_recv_funcs.h | 4 +- .../gpu/graph_send_recv_grad_kernel.cu | 32 +- .../phi/kernels/gpu/graph_send_recv_kernel.cu | 42 +- .../kernels/gpu/grid_sample_grad_kernel.cu | 42 +- paddle/phi/kernels/gpu/grid_sample_kernel.cu | 33 +- .../kernels/gpu/gumbel_softmax_grad_kernel.cu | 3 +- .../phi/kernels/gpu/gumbel_softmax_kernel.cu | 21 +- paddle/phi/kernels/gpu/histogram_kernel.cu | 11 +- paddle/phi/kernels/gpu/increment_kernel.cu | 3 +- .../kernels/gpu/index_sample_grad_kernel.cu | 36 +- paddle/phi/kernels/gpu/index_sample_kernel.cu | 4 +- .../kernels/gpu/index_select_grad_kernel.cu | 40 +- paddle/phi/kernels/gpu/index_select_kernel.cu | 3 +- .../kernels/gpu/instance_norm_grad_kernel.cu | 79 +- .../phi/kernels/gpu/instance_norm_kernel.cu | 3 +- .../kernels/gpu/interpolate_grad_kernel.cu | 67 +- paddle/phi/kernels/gpu/interpolate_kernel.cu | 74 +- paddle/phi/kernels/gpu/isclose_kernel.cu | 3 +- .../phi/kernels/gpu/kldiv_loss_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/kldiv_loss_kernel.cu | 3 +- paddle/phi/kernels/gpu/kron_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/kron_kernel.cu | 3 +- .../phi/kernels/gpu/kthvalue_grad_kernel.cu | 9 +- paddle/phi/kernels/gpu/kthvalue_kernel.cu | 9 +- paddle/phi/kernels/gpu/label_smooth_kernel.cu | 1 + .../phi/kernels/gpu/layer_norm_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/layer_norm_kernel.cu | 74 +- paddle/phi/kernels/gpu/lgamma_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/lgamma_kernel.cu | 3 +- paddle/phi/kernels/gpu/linspace_kernel.cu | 3 +- .../phi/kernels/gpu/log_loss_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/log_loss_kernel.cu | 3 +- .../kernels/gpu/log_softmax_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/log_softmax_kernel.cu | 3 +- paddle/phi/kernels/gpu/logspace_kernel.cu | 7 +- .../phi/kernels/gpu/logsumexp_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/logsumexp_kernel.cu | 4 +- .../phi/kernels/gpu/masked_select_kernel.cu | 5 +- paddle/phi/kernels/gpu/matmul_grad_kernel.cu | 4 +- paddle/phi/kernels/gpu/matmul_kernel.cu | 6 +- .../kernels/gpu/matrix_power_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/matrix_power_kernel.cu | 3 +- paddle/phi/kernels/gpu/matrix_rank_kernel.cu | 5 +- .../phi/kernels/gpu/matrix_rank_tol_kernel.cu | 4 +- paddle/phi/kernels/gpu/maxout_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/maxout_kernel.cu | 3 +- .../phi/kernels/gpu/mean_all_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/mean_all_kernel.cu | 6 +- .../kernels/gpu/meshgrid_grad_kernel.cu.cc | 2 +- paddle/phi/kernels/gpu/meshgrid_kernel.cu.cc | 2 +- paddle/phi/kernels/gpu/mode_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/mode_kernel.cu | 3 +- paddle/phi/kernels/gpu/momentum_kernel.cu | 3 +- .../phi/kernels/gpu/multi_dot_grad_kernel.cu | 5 +- paddle/phi/kernels/gpu/multi_dot_kernel.cu | 5 +- paddle/phi/kernels/gpu/multinomial_kernel.cu | 30 +- .../phi/kernels/gpu/multiplex_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/multiplex_kernel.cu | 3 +- paddle/phi/kernels/gpu/mv_grad_kernel.cu | 9 +- paddle/phi/kernels/gpu/mv_kernel.cu | 4 +- .../phi/kernels/gpu/nanmedian_grad_kernel.cu | 9 +- paddle/phi/kernels/gpu/nanmedian_kernel.cu | 49 +- paddle/phi/kernels/gpu/nll_loss.h | 2 + .../phi/kernels/gpu/nll_loss_grad_kernel.cu | 83 +- paddle/phi/kernels/gpu/nll_loss_kernel.cu | 85 +- paddle/phi/kernels/gpu/norm_grad_kernel.cu | 9 +- paddle/phi/kernels/gpu/norm_kernel.cu | 9 +- paddle/phi/kernels/gpu/one_hot_kernel.cu | 3 +- paddle/phi/kernels/gpu/p_norm_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/p_norm_kernel.cu | 3 +- paddle/phi/kernels/gpu/pad3d_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/pad3d_kernel.cu | 3 +- paddle/phi/kernels/gpu/pad_grad_kernel.cu | 3 +- .../kernels/gpu/pixel_shuffle_grad_kernel.cu | 5 +- .../phi/kernels/gpu/pixel_shuffle_kernel.cu | 5 +- .../gpu/pixel_unshuffle_grad_kernel.cu | 5 +- .../phi/kernels/gpu/pixel_unshuffle_kernel.cu | 5 +- paddle/phi/kernels/gpu/pool_grad_kernel.cu | 6 +- paddle/phi/kernels/gpu/pool_kernel.cu | 6 +- paddle/phi/kernels/gpu/prelu_funcs.h | 1 + paddle/phi/kernels/gpu/prelu_grad_kernel.cu | 27 +- paddle/phi/kernels/gpu/prelu_kernel.cu | 3 +- .../phi/kernels/gpu/psroi_pool_grad_kernel.cu | 4 +- paddle/phi/kernels/gpu/psroi_pool_kernel.cu | 30 +- .../kernels/gpu/put_along_axis_grad_kernel.cu | 3 +- .../phi/kernels/gpu/put_along_axis_kernel.cu | 3 +- paddle/phi/kernels/gpu/randint_kernel.cu | 3 +- paddle/phi/kernels/gpu/randperm_kernel.cu | 2 + paddle/phi/kernels/gpu/reduce.h | 6 +- paddle/phi/kernels/gpu/reduce_any_kernel.cu | 3 +- .../phi/kernels/gpu/reduce_max_grad_kernel.cu | 3 +- .../kernels/gpu/reduce_mean_grad_kernel.cu | 3 +- .../phi/kernels/gpu/reduce_min_grad_kernel.cu | 3 +- .../kernels/gpu/reduce_prod_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/reduce_prod_kernel.cu | 3 +- .../phi/kernels/gpu/reduce_sum_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/rmsprop_kernel.cu | 3 +- paddle/phi/kernels/gpu/rnn_functor.h | 5 +- paddle/phi/kernels/gpu/rnn_grad_kernel.cu.cc | 4 +- paddle/phi/kernels/gpu/rnn_kernel.cu.cc | 6 +- .../phi/kernels/gpu/roi_align_grad_kernel.cu | 38 +- paddle/phi/kernels/gpu/roi_align_kernel.cu | 34 +- .../phi/kernels/gpu/roi_pool_grad_kernel.cu | 36 +- paddle/phi/kernels/gpu/roi_pool_kernel.cu | 56 +- paddle/phi/kernels/gpu/roll_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/roll_kernel.cu | 3 +- paddle/phi/kernels/gpu/roll_kernel_impl.h | 33 +- paddle/phi/kernels/gpu/rrelu_grad_kernel.cu | 9 +- paddle/phi/kernels/gpu/scale_kernel.cu | 3 +- paddle/phi/kernels/gpu/searchsorted_kernel.cu | 3 +- .../kernels/gpu/segment_pool_grad_kernel.cu | 5 +- paddle/phi/kernels/gpu/segment_pool_kernel.cu | 5 +- paddle/phi/kernels/gpu/selu_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/selu_kernel.cu | 3 +- .../phi/kernels/gpu/set_value_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/set_value_kernel.cu | 3 +- paddle/phi/kernels/gpu/sgd_kernel.cu | 6 +- paddle/phi/kernels/gpu/shard_index_kernel.cu | 15 +- .../gpu/sigmoid_cross_entropy_with_logits.h | 1 + ...d_cross_entropy_with_logits_grad_kernel.cu | 3 +- ...igmoid_cross_entropy_with_logits_kernel.cu | 3 +- paddle/phi/kernels/gpu/sign_kernel.cu.cc | 2 +- paddle/phi/kernels/gpu/size_kernel.cu | 5 +- .../phi/kernels/gpu/slice_grad_kernel.cu.cc | 2 +- paddle/phi/kernels/gpu/slice_kernel.cu.cc | 2 +- paddle/phi/kernels/gpu/softmax_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/softmax_kernel.cu | 3 +- paddle/phi/kernels/gpu/split_kernel.cu | 4 +- paddle/phi/kernels/gpu/squeeze_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/squeeze_kernel.cu | 3 +- paddle/phi/kernels/gpu/stack_grad_kernel.cu | 43 +- paddle/phi/kernels/gpu/stack_kernel.cu | 39 +- .../kernels/gpu/strided_slice_grad_kernel.cu | 3 +- .../phi/kernels/gpu/strided_slice_kernel.cu | 3 +- .../gpu/take_along_axis_grad_kernel.cu | 3 +- .../phi/kernels/gpu/take_along_axis_kernel.cu | 3 +- paddle/phi/kernels/gpu/tile_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/tile_kernel.cu | 3 +- paddle/phi/kernels/gpu/top_k_grad_kernel.cu | 9 +- paddle/phi/kernels/gpu/top_k_kernel.cu | 141 ++- paddle/phi/kernels/gpu/trace_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/trace_kernel.cu | 3 +- .../phi/kernels/gpu/transpose_grad_kernel.cu | 5 +- paddle/phi/kernels/gpu/transpose_kernel.cu | 5 +- .../kernels/gpu/triangular_solve_kernel.cu | 3 +- paddle/phi/kernels/gpu/tril_indices_kernel.cu | 3 +- .../phi/kernels/gpu/tril_triu_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/tril_triu_kernel.cu | 3 +- .../gpu/truncated_gaussian_random_kernel.cu | 4 +- paddle/phi/kernels/gpu/unbind_kernel.cu | 3 +- .../phi/kernels/gpu/uniform_random_kernel.cu | 5 +- paddle/phi/kernels/gpu/unique_kernel.cu | 5 +- .../phi/kernels/gpu/unsqueeze_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/unsqueeze_kernel.cu | 3 +- paddle/phi/kernels/gpu/unstack_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/unstack_kernel.cu | 3 +- .../phi/kernels/gpu/viterbi_decode_kernel.cu | 34 +- paddle/phi/kernels/gpu/warpctc_grad_kernel.cu | 3 +- paddle/phi/kernels/gpu/warpctc_kernel.cu | 3 +- paddle/phi/kernels/gpu/where_grad_kernel.cu | 9 +- paddle/phi/kernels/gpu/where_index_kernel.cu | 7 +- paddle/phi/kernels/gpu/where_kernel.cu | 3 +- .../kernels/gpudnn/conv_grad_grad_kernel.cu | 19 +- paddle/phi/kernels/gpudnn/conv_grad_kernel.cu | 18 +- paddle/phi/kernels/gpudnn/conv_kernel.cu | 20 +- .../gpudnn/conv_transpose_grad_kernel.cu | 4 +- .../kernels/gpudnn/conv_transpose_kernel.cu | 4 +- paddle/phi/kernels/gpudnn/pool_gpudnn.h | 1 + paddle/phi/kernels/gpudnn/pool_grad_kernel.cu | 6 +- paddle/phi/kernels/gpudnn/pool_kernel.cu | 6 +- paddle/phi/kernels/gpudnn/softmax_gpudnn.h | 82 +- .../phi/kernels/gpudnn/softmax_grad_kernel.cu | 3 +- paddle/phi/kernels/gpudnn/softmax_kernel.cu | 3 +- .../phi/kernels/graph_send_recv_grad_kernel.h | 1 + paddle/phi/kernels/graph_send_recv_kernel.h | 1 + .../phi/kernels/impl/activation_grad_impl.h | 3 +- paddle/phi/kernels/impl/activation_impl.h | 3 +- paddle/phi/kernels/impl/adagrad_kernel_impl.h | 3 +- .../phi/kernels/impl/addmm_grad_kernel_impl.h | 4 +- paddle/phi/kernels/impl/addmm_kernel_impl.h | 4 +- .../phi/kernels/impl/atan2_grad_kernel_impl.h | 3 +- paddle/phi/kernels/impl/atan2_kernel_impl.h | 3 +- .../impl/broadcast_tensors_kernel_impl.h | 4 +- .../kernels/impl/cholesky_grad_kernel_impl.h | 3 +- .../impl/cholesky_solve_grad_kernel_impl.h | 1 - .../kernels/impl/cholesky_solve_kernel_impl.h | 1 - .../phi/kernels/impl/clip_grad_kernel_impl.h | 8 +- paddle/phi/kernels/impl/clip_kernel_impl.h | 8 +- paddle/phi/kernels/impl/compare_kernel_impl.h | 3 +- .../kernels/impl/concat_grad_kernel_impl.h | 3 +- paddle/phi/kernels/impl/conv_cudnn_impl.h | 11 +- .../impl/conv_transpose_grad_kernel_impl.h | 3 +- .../kernels/impl/conv_transpose_kernel_impl.h | 3 +- .../impl/determinant_grad_kernel_impl.h | 3 +- .../kernels/impl/determinant_kernel_impl.h | 6 +- .../kernels/impl/digamma_grad_kernel_impl.h | 1 + paddle/phi/kernels/impl/digamma_kernel_impl.h | 1 + paddle/phi/kernels/impl/dist_kernel_impl.h | 2 + .../phi/kernels/impl/dot_grad_kernel_impl.h | 6 +- paddle/phi/kernels/impl/einsum_impl.h | 1 + .../impl/frobenius_norm_grad_kernel_impl.h | 1 - .../kernels/impl/frobenius_norm_kernel_impl.h | 3 +- .../kernels/impl/gumbel_softmax_kernel_impl.h | 1 + .../impl/kldiv_loss_grad_kernel_impl.h | 1 + .../phi/kernels/impl/kldiv_loss_kernel_impl.h | 1 + .../kernels/impl/lgamma_grad_kernel_impl.h | 1 + .../kernels/impl/matmul_grad_kernel_impl.h | 10 +- paddle/phi/kernels/impl/matmul_kernel_impl.h | 3 +- .../kernels/impl/maxout_grad_kernel_impl.h | 3 +- paddle/phi/kernels/impl/maxout_kernel_impl.h | 3 +- .../kernels/impl/meshgrid_grad_kernel_impl.h | 3 +- .../phi/kernels/impl/meshgrid_kernel_impl.h | 3 +- .../phi/kernels/impl/momentum_kernel_impl.h | 3 +- .../kernels/impl/nanmedian_grad_kernel_impl.h | 3 +- .../phi/kernels/impl/nanmedian_kernel_impl.h | 3 +- paddle/phi/kernels/impl/pad_kernel_impl.h | 1 + .../impl/pixel_shuffle_grad_kernel_impl.h | 1 + .../kernels/impl/pixel_shuffle_kernel_impl.h | 1 + .../phi/kernels/impl/pool_grad_kernel_impl.h | 3 +- paddle/phi/kernels/impl/pool_kernel_impl.h | 4 +- .../impl/reduce_max_grad_kernel_impl.h | 3 +- .../impl/reduce_min_grad_kernel_impl.h | 3 +- .../impl/reduce_prod_grad_kernel_impl.h | 3 +- paddle/phi/kernels/impl/reverse_kernel_impl.h | 3 +- paddle/phi/kernels/impl/rmsprop_kernel_impl.h | 3 +- .../impl/segment_pool_grad_kernel_impl.h | 1 + .../kernels/impl/segment_pool_kernel_impl.h | 1 + .../phi/kernels/impl/selu_grad_kernel_impl.h | 3 +- paddle/phi/kernels/impl/selu_kernel_impl.h | 1 + .../kernels/impl/set_value_grad_kernel_impl.h | 1 - .../phi/kernels/impl/set_value_kernel_impl.h | 1 - .../phi/kernels/impl/slice_grad_kernel_impl.h | 3 +- .../kernels/impl/softmax_grad_kernel_impl.h | 3 +- paddle/phi/kernels/impl/softmax_kernel_impl.h | 3 +- .../impl/strided_slice_grad_kernel_impl.h | 3 +- .../kernels/impl/strided_slice_kernel_impl.h | 3 +- .../impl/triangular_solve_grad_kernel_impl.h | 3 +- .../kernels/impl/tril_triu_grad_kernel_impl.h | 3 +- .../phi/kernels/impl/tril_triu_kernel_impl.h | 3 +- .../kernels/impl/unfold_grad_kernel_impl.h | 1 + paddle/phi/kernels/impl/unfold_kernel_impl.h | 1 + paddle/phi/kernels/kldiv_loss_kernel.h | 1 + paddle/phi/kernels/kps/compare_kernel.cu | 2 + paddle/phi/kernels/kps/reduce_all_kernel.cu | 3 +- paddle/phi/kernels/kps/reduce_max_kernel.cu | 3 +- paddle/phi/kernels/kps/reduce_mean_kernel.cu | 3 +- paddle/phi/kernels/kps/reduce_min_kernel.cu | 3 +- paddle/phi/kernels/kps/reduce_sum_kernel.cu | 3 +- .../phi/kernels/masked_select_grad_kernel.h | 2 +- paddle/phi/kernels/masked_select_kernel.h | 2 +- paddle/phi/kernels/matmul_kernel.h | 1 - paddle/phi/kernels/mv_kernel.h | 2 +- .../phi/kernels/pixel_shuffle_grad_kernel.h | 1 + paddle/phi/kernels/pixel_shuffle_kernel.h | 1 + .../phi/kernels/pixel_unshuffle_grad_kernel.h | 1 + paddle/phi/kernels/pixel_unshuffle_kernel.h | 1 + paddle/phi/kernels/pool_grad_kernel.h | 1 + paddle/phi/kernels/pool_kernel.h | 1 + .../kernels/primitive/compute_primitives.h | 208 ++--- .../primitive/compute_primitives_xpu2.h | 44 +- paddle/phi/kernels/reshape_grad_kernel.cc | 1 + paddle/phi/kernels/reshape_kernel.cc | 1 + .../selected_rows/activation_kernel.cc | 6 +- .../phi/kernels/selected_rows/full_kernel.cc | 5 +- .../kernels/selected_rows/gpu/adam_kernel.cu | 47 +- .../kernels/selected_rows/gpu/adamw_kernel.cu | 52 +- .../kernels/selected_rows/gpu/clip_kernel.cu | 3 +- .../selected_rows/impl/clip_kernel_impl.h | 3 +- .../phi/kernels/selected_rows/shape_kernel.cc | 1 + paddle/phi/kernels/selu_kernel.h | 2 +- paddle/phi/kernels/shape_kernel.cc | 1 + .../kernels/sparse/cpu/coalesced_kernel.cc | 1 + .../sparse/cpu/convolution_grad_kernel.cc | 1 + .../kernels/sparse/cpu/convolution_kernel.cc | 4 +- .../kernels/sparse/cpu/sparse_mask_kernel.cc | 6 +- .../sparse/cpu/sparse_pool_grad_kernel.cc | 1 + .../kernels/sparse/cpu/sparse_pool_kernel.cc | 3 +- .../kernels/sparse/cpu/sparse_utils_kernel.cc | 1 + .../kernels/sparse/gpu/coalesced_kernel.cu | 20 +- .../phi/kernels/sparse/gpu/convolution.cu.h | 34 +- .../sparse/gpu/convolution_grad_kernel.cu | 18 +- .../kernels/sparse/gpu/convolution_kernel.cu | 44 +- .../kernels/sparse/gpu/sparse_mask_kernel.cu | 2 +- .../sparse/gpu/sparse_pool_grad_kernel.cu | 27 +- .../kernels/sparse/gpu/sparse_pool_kernel.cu | 25 +- .../kernels/sparse/gpu/sparse_utils_kernel.cu | 25 +- .../sparse/sparse_utils_grad_kernel.cc | 1 + paddle/phi/kernels/split_kernel.h | 3 +- paddle/phi/kernels/strings/case_utils.h | 1 + .../strings/cpu/strings_copy_kernel.cc | 2 +- .../strings/gpu/strings_copy_kernel.cu | 8 +- .../strings/gpu/strings_lower_upper_kernel.cu | 8 +- .../strings/strings_lower_upper_kernel.h | 3 +- paddle/phi/kernels/strings/unicode.cc | 2 + paddle/phi/kernels/strings/unicode.h | 1 + paddle/phi/kernels/transpose_grad_kernel.h | 1 + paddle/phi/ops/compat/matrix_rank_sig.cc | 4 +- paddle/phi/ops/compat/segment_pool_sig.cc | 6 +- paddle/phi/tests/api/scale_api.h | 1 - paddle/phi/tests/api/test_cast_api.cc | 2 +- paddle/phi/tests/api/test_concat_api.cc | 2 +- paddle/phi/tests/api/test_conj_api.cc | 2 +- paddle/phi/tests/api/test_data_transform.cc | 1 + paddle/phi/tests/api/test_dot_api.cc | 2 +- paddle/phi/tests/api/test_elementwise_api.cc | 2 +- paddle/phi/tests/api/test_embedding_api.cc | 2 +- paddle/phi/tests/api/test_empty_api.cc | 2 +- paddle/phi/tests/api/test_fill_api.cc | 2 +- paddle/phi/tests/api/test_matmul_api.cc | 2 +- paddle/phi/tests/api/test_mean_api.cc | 2 +- paddle/phi/tests/api/test_pten_exception.cc | 1 + paddle/phi/tests/api/test_reshape_api.cc | 2 +- paddle/phi/tests/api/test_scale_api.cc | 2 +- paddle/phi/tests/api/test_scale_benchmark.cc | 2 +- paddle/phi/tests/api/test_slice_api.cc | 1 + paddle/phi/tests/api/test_sparse_conv_api.cc | 3 +- paddle/phi/tests/api/test_sparse_utils_api.cc | 3 +- paddle/phi/tests/api/test_split_api.cc | 2 +- .../phi/tests/api/test_strings_empty_api.cc | 1 + .../tests/api/test_strings_lower_upper_api.cc | 1 + paddle/phi/tests/api/test_sum_api.cc | 2 +- paddle/phi/tests/api/test_to_api.cc | 2 +- paddle/phi/tests/common/test_backend.cc | 1 + paddle/phi/tests/common/test_data_layout.cc | 1 + paddle/phi/tests/common/test_data_type.cc | 1 + paddle/phi/tests/common/test_int_array.cc | 7 +- paddle/phi/tests/common/test_place.cc | 4 +- paddle/phi/tests/common/test_scalar.cu | 1 + paddle/phi/tests/core/test_dense_tensor.cc | 3 +- paddle/phi/tests/core/test_dim.cu | 1 + paddle/phi/tests/core/test_intrusive_ptr.cc | 1 - paddle/phi/tests/core/test_kernel_factory.cc | 3 +- paddle/phi/tests/core/test_rw_lock.cc | 7 +- paddle/phi/tests/core/test_selected_rows.cc | 1 + .../phi/tests/core/test_sparse_coo_tensor.cc | 3 +- .../phi/tests/core/test_sparse_csr_tensor.cc | 1 - paddle/phi/tests/core/test_string_tensor.cc | 2 +- paddle/phi/tests/core/test_type_info.cc | 1 - .../phi/tests/core/unroll_array_ops_test.cc | 1 + paddle/phi/tests/kernels/test_cast_dev_api.cc | 6 +- .../phi/tests/kernels/test_concat_dev_api.cc | 4 +- paddle/phi/tests/kernels/test_conj_dev_api.cc | 6 +- paddle/phi/tests/kernels/test_copy_dev_api.cc | 8 +- .../tests/kernels/test_creation_dev_api.cc | 8 +- paddle/phi/tests/kernels/test_dot_dev_api.cc | 6 +- .../tests/kernels/test_elementwise_dev_api.cc | 10 +- .../phi/tests/kernels/test_flatten_dev_api.cc | 6 +- .../phi/tests/kernels/test_matmul_dev_api.cc | 4 +- paddle/phi/tests/kernels/test_mean_dev_api.cc | 4 +- .../phi/tests/kernels/test_reshape_dev_api.cc | 4 +- .../phi/tests/kernels/test_scale_dev_api.cc | 4 +- .../kernels/test_sparse_activation_dev_api.cc | 6 +- .../kernels/test_sparse_conv3d_dev_api.cc | 8 +- .../tests/kernels/test_sparse_pool_dev_api.cc | 59 +- .../kernels/test_sparse_utils_dev_api.cc | 11 +- .../phi/tests/kernels/test_split_dev_api.cc | 4 +- .../kernels/test_strings_copy_dev_api.cc | 2 + .../kernels/test_strings_copy_dev_api.cu | 2 + .../test_strings_lower_upper_dev_api.cc | 4 +- .../test_strings_lower_upper_dev_api.cu | 3 +- paddle/phi/tests/kernels/test_sum_dev_api.cc | 4 +- paddle/phi/tests/ops/test_op_signature.cc | 1 + paddle/phi/tests/ops/test_op_signature.h | 1 + paddle/utils/flat_hash_map.h | 20 +- paddle/utils/none.h | 2 +- paddle/utils/optional.h | 2 +- paddle/utils/small_vector.h | 21 +- paddle/utils/string/piece.cc | 6 +- paddle/utils/string/pretty_log.cc | 1 + paddle/utils/string/pretty_log.h | 14 +- paddle/utils/string/string_helper.cc | 1 + paddle/utils/string/string_helper.h | 1 + paddle/utils/string/tinyformat/tinyformat.h | 12 +- paddle/utils/string/to_string_test.cc | 1 + paddle/utils/variant.h | 231 ++--- paddle/utils/variant_test.cc | 1 + tools/codestyle/clang_format.hook | 7 +- 2757 files changed, 12664 insertions(+), 10779 deletions(-) mode change 100755 => 100644 paddle/fluid/distributed/ps/service/brpc_ps_client.cc mode change 100755 => 100644 paddle/fluid/distributed/ps/service/brpc_ps_server.cc mode change 100755 => 100644 paddle/fluid/distributed/ps/wrapper/ps_wrapper.h mode change 100755 => 100644 paddle/fluid/framework/data_feed.cc mode change 100755 => 100644 paddle/fluid/framework/string_array.cc mode change 100755 => 100644 paddle/fluid/operators/expand_as_v2_op.cc mode change 100755 => 100644 paddle/fluid/operators/fused/fused_dropout_act_bias.h mode change 100755 => 100644 paddle/fluid/operators/interpolate_op_npu.cc mode change 100755 => 100644 paddle/fluid/operators/pscore/heter_listen_and_serv_op.h mode change 100755 => 100644 paddle/fluid/operators/pscore/send_and_recv_op_cpu_test.cc diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 42181c8f959..4b588cbeb91 100755 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,6 +33,10 @@ repos: entry: bash ./tools/codestyle/clang_format.hook -i language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|xpu|kps)$ + exclude: | + (?x)^( + paddle/fluid/distributed/ps/thirdparty/round_robin.h + )$ - repo: local hooks: - id: cpplint-cpp-source diff --git a/paddle/fluid/distributed/collective/HCCLTools.cc b/paddle/fluid/distributed/collective/HCCLTools.cc index 526a683e057..676a71cb30d 100644 --- a/paddle/fluid/distributed/collective/HCCLTools.cc +++ b/paddle/fluid/distributed/collective/HCCLTools.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/distributed/collective/HCCLTools.h" + #include "paddle/fluid/distributed/collective/Types.h" namespace paddle { diff --git a/paddle/fluid/distributed/collective/HCCLTools.h b/paddle/fluid/distributed/collective/HCCLTools.h index a1dcf7cd9b6..4955e24eadb 100644 --- a/paddle/fluid/distributed/collective/HCCLTools.h +++ b/paddle/fluid/distributed/collective/HCCLTools.h @@ -15,6 +15,7 @@ #pragma once #include + #include #include "boost/variant.hpp" diff --git a/paddle/fluid/distributed/collective/NCCLTools.cc b/paddle/fluid/distributed/collective/NCCLTools.cc index 7e842ebf921..2cecaf0734d 100644 --- a/paddle/fluid/distributed/collective/NCCLTools.cc +++ b/paddle/fluid/distributed/collective/NCCLTools.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/distributed/collective/NCCLTools.h" + #include "paddle/fluid/distributed/collective/Types.h" namespace paddle { diff --git a/paddle/fluid/distributed/collective/NCCLTools.h b/paddle/fluid/distributed/collective/NCCLTools.h index 0454518b183..f38ce8faa7f 100644 --- a/paddle/fluid/distributed/collective/NCCLTools.h +++ b/paddle/fluid/distributed/collective/NCCLTools.h @@ -16,9 +16,11 @@ #include #include + #include #include "boost/variant.hpp" +#include "paddle/fluid/distributed/collective/Types.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/platform/cuda_device_guard.h" @@ -26,8 +28,6 @@ #include "paddle/fluid/platform/dynload/nccl.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/distributed/collective/Types.h" - namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/collective/ProcessGroup.h b/paddle/fluid/distributed/collective/ProcessGroup.h index 52e09792d5d..7ed6b188fd2 100644 --- a/paddle/fluid/distributed/collective/ProcessGroup.h +++ b/paddle/fluid/distributed/collective/ProcessGroup.h @@ -21,7 +21,6 @@ #include "paddle/fluid/distributed/collective/Types.h" #include "paddle/fluid/eager/api/utils/tensor_utils.h" - #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/variable.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/distributed/collective/ProcessGroupGloo.cc b/paddle/fluid/distributed/collective/ProcessGroupGloo.cc index 824341c3cd9..1a390e38755 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupGloo.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupGloo.cc @@ -27,6 +27,7 @@ #include #include #include + #include "paddle/fluid/distributed/collective/Common.h" #include "paddle/fluid/distributed/collective/ProcessGroupGloo.h" #include "paddle/fluid/framework/fleet/gloo_wrapper.h" @@ -485,8 +486,9 @@ std::shared_ptr<::gloo::transport::Device> ProcessGroupGloo::createDefaultDevice() { std::array hostname{}; auto ret = ::gethostname(hostname.data(), HOST_NAME_MAX); - PADDLE_ENFORCE_EQ(ret, 0, platform::errors::Fatal( - "Get hostname error for createDefaultDevice.")); + PADDLE_ENFORCE_EQ( + ret, 0, + platform::errors::Fatal("Get hostname error for createDefaultDevice.")); ::addrinfo* result; result = tcputils::get_addr_info(hostname.data(), "", 0, AF_UNSPEC); ::addrinfo* cur; diff --git a/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc b/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc index 9ed6c2198df..50249b03967 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupHCCL.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/distributed/collective/ProcessGroupHCCL.h" + #include "paddle/fluid/distributed/collective/Common.h" #include "paddle/fluid/distributed/collective/HCCLTools.h" #include "paddle/fluid/memory/malloc.h" @@ -216,15 +217,16 @@ std::shared_ptr ProcessGroupHCCL::AllReduce( std::vector& in_tensors, // NOLINT std::vector& out_tensors, // NOLINT const AllreduceOptions& opts) { - return Collective(in_tensors, out_tensors, - [&](phi::DenseTensor& input, phi::DenseTensor& output, - HcclComm comm, const aclrtStream& stream) { - return platform::dynload::HcclAllReduce( - input.data(), output.data(), input.numel(), - platform::ToHCCLDataType(input.dtype()), - ToHCCLRedType(opts.reduce_op), comm, stream); - }, - CommType::ALLREDUCE); + return Collective( + in_tensors, out_tensors, + [&](phi::DenseTensor& input, phi::DenseTensor& output, HcclComm comm, + const aclrtStream& stream) { + return platform::dynload::HcclAllReduce( + input.data(), output.data(), input.numel(), + platform::ToHCCLDataType(input.dtype()), + ToHCCLRedType(opts.reduce_op), comm, stream); + }, + CommType::ALLREDUCE); } std::shared_ptr ProcessGroupHCCL::Broadcast( diff --git a/paddle/fluid/distributed/collective/ProcessGroupHCCL.h b/paddle/fluid/distributed/collective/ProcessGroupHCCL.h index 2f0ff6b9565..a32984798fe 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupHCCL.h +++ b/paddle/fluid/distributed/collective/ProcessGroupHCCL.h @@ -21,12 +21,11 @@ #include #include +#include "paddle/fluid/distributed/collective/HCCLTools.h" #include "paddle/fluid/distributed/collective/ProcessGroup.h" +#include "paddle/fluid/distributed/store/store.h" #include "paddle/fluid/platform/device/npu/npu_stream.h" #include "paddle/fluid/platform/device_context.h" - -#include "paddle/fluid/distributed/collective/HCCLTools.h" -#include "paddle/fluid/distributed/store/store.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/gen_comm_id_helper.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/distributed/collective/ProcessGroupHeter.cc b/paddle/fluid/distributed/collective/ProcessGroupHeter.cc index 0911a4a3e3e..0b388a6a848 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupHeter.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupHeter.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/distributed/collective/ProcessGroupHeter.h" + #include + #include "paddle/fluid/platform/device/gpu/nccl_helper.h" #include "paddle/fluid/platform/place.h" #include "paddle/phi/api/include/api.h" @@ -129,8 +131,9 @@ std::shared_ptr ProcessGroupHeter::AllReduce( gid_, {dense_cpu_tensor.name()}, send_size, dense_cpu_tensor.data(), dense_cpu_tensor.numel() * framework::DataTypeSize(dense_cpu_tensor.dtype())); - PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet( - "Send to the switch module error.")); + PADDLE_ENFORCE_EQ(ret, 0, + platform::errors::PreconditionNotMet( + "Send to the switch module error.")); phi::DenseTensor cpu_tensor2; cpu_tensor2.AllocateFrom( std::make_unique( @@ -140,8 +143,9 @@ std::shared_ptr ProcessGroupHeter::AllReduce( ret = client_->Recv( gid_, {dense_cpu_tensor.name()}, cpu_tensor2.data(), cpu_tensor2.numel() * framework::DataTypeSize(cpu_tensor2.dtype())); - PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet( - "Recv from the switch module error.")); + PADDLE_ENFORCE_EQ(ret, 0, + platform::errors::PreconditionNotMet( + "Recv from the switch module error.")); switch (dense_cpu_tensor.dtype()) { case DataType::FLOAT32: @@ -226,8 +230,9 @@ std::shared_ptr ProcessGroupHeter::Broadcast( dense_cpu_tensor.data(), dense_cpu_tensor.numel() * framework::DataTypeSize(dense_cpu_tensor.dtype())); - PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet( - "Send to the switch module error.")); + PADDLE_ENFORCE_EQ(ret, 0, + platform::errors::PreconditionNotMet( + "Send to the switch module error.")); } else { int ret = client_->Recv( gid_, {dense_cpu_tensor.name()}, dense_cpu_tensor.data(), @@ -286,8 +291,9 @@ std::shared_ptr ProcessGroupHeter::Send( VLOG(2) << "tensor_name:" << tensor_name; int ret = client_->Send(gid_, {tensor_name}, send_size, cpu_tensor.data(), tensor_size); - PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet( - "Send to the switch module error.")); + PADDLE_ENFORCE_EQ( + ret, 0, + platform::errors::PreconditionNotMet("Send to the switch module error.")); return CreateTask(rank_, CommType::SEND, in_tensors); } @@ -319,8 +325,9 @@ std::shared_ptr ProcessGroupHeter::Recv( int ret = client_->Recv( gid_, {tensor_name}, cpu_tensor.data(), cpu_tensor.numel() * framework::DataTypeSize(cpu_tensor.dtype())); - PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet( - "receive to the switch module error.")); + PADDLE_ENFORCE_EQ(ret, 0, + platform::errors::PreconditionNotMet( + "receive to the switch module error.")); auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration diff = end - start; double goodput = cpu_tensor.numel() * diff --git a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc index f1b66864b29..dc67205c78f 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc +++ b/paddle/fluid/distributed/collective/ProcessGroupNCCL.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/distributed/collective/ProcessGroupNCCL.h" + #include "paddle/fluid/distributed/collective/Common.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h" @@ -320,15 +321,16 @@ std::shared_ptr ProcessGroupNCCL::AllReduce( PADDLE_ENFORCE_EQ( CheckTensorsInCudaPlace(in_tensors), true, platform::errors::InvalidArgument("All inputs should be in CudaPlace.")); - return Collective(in_tensors, out_tensors, - [&](const phi::DenseTensor& input, phi::DenseTensor& output, - ncclComm_t comm, const gpuStream_t& stream) { - return platform::dynload::ncclAllReduce( - input.data(), output.data(), input.numel(), - platform::ToNCCLDataType(input.type()), - ToNCCLRedType(opts.reduce_op), comm, stream); - }, - CommType::ALLREDUCE); + return Collective( + in_tensors, out_tensors, + [&](const phi::DenseTensor& input, phi::DenseTensor& output, + ncclComm_t comm, const gpuStream_t& stream) { + return platform::dynload::ncclAllReduce( + input.data(), output.data(), input.numel(), + platform::ToNCCLDataType(input.type()), + ToNCCLRedType(opts.reduce_op), comm, stream); + }, + CommType::ALLREDUCE); } std::shared_ptr ProcessGroupNCCL::Broadcast( @@ -338,17 +340,17 @@ std::shared_ptr ProcessGroupNCCL::Broadcast( CheckTensorsInCudaPlace(in_tensors), true, platform::errors::InvalidArgument("All inputs should be in CudaPlace.")); - return Collective(in_tensors, out_tensors, - [&](phi::DenseTensor& input, phi::DenseTensor& output, - ncclComm_t comm, const gpuStream_t& stream) { - const auto root = opts.source_rank * in_tensors.size() + - opts.source_root; - return platform::dynload::ncclBroadcast( - input.data(), output.data(), input.numel(), - platform::ToNCCLDataType(input.type()), root, comm, - stream); - }, - CommType::BROADCAST); + return Collective( + in_tensors, out_tensors, + [&](phi::DenseTensor& input, phi::DenseTensor& output, ncclComm_t comm, + const gpuStream_t& stream) { + const auto root = + opts.source_rank * in_tensors.size() + opts.source_root; + return platform::dynload::ncclBroadcast( + input.data(), output.data(), input.numel(), + platform::ToNCCLDataType(input.type()), root, comm, stream); + }, + CommType::BROADCAST); } std::shared_ptr ProcessGroupNCCL::Barrier( @@ -400,15 +402,15 @@ std::shared_ptr ProcessGroupNCCL::Send( std::vector& tensors, int dst_rank) { CheckTensorsInDifferentDevices(tensors, static_cast(GetSize())); - auto task = PointToPoint(tensors, - [&](phi::DenseTensor& input, ncclComm_t comm, - const gpuStream_t& stream, int dst_rank) { - return platform::dynload::ncclSend( - input.data(), input.numel(), - platform::ToNCCLDataType(input.dtype()), - dst_rank, comm, stream); - }, - dst_rank, CommType::SEND); + auto task = PointToPoint( + tensors, + [&](phi::DenseTensor& input, ncclComm_t comm, const gpuStream_t& stream, + int dst_rank) { + return platform::dynload::ncclSend( + input.data(), input.numel(), + platform::ToNCCLDataType(input.dtype()), dst_rank, comm, stream); + }, + dst_rank, CommType::SEND); return task; } @@ -416,15 +418,15 @@ std::shared_ptr ProcessGroupNCCL::Recv( std::vector& tensors, int src_rank) { CheckTensorsInDifferentDevices(tensors, static_cast(GetSize())); - auto task = PointToPoint(tensors, - [&](phi::DenseTensor& output, ncclComm_t comm, - const gpuStream_t& stream, int src_rank) { - return platform::dynload::ncclRecv( - output.data(), output.numel(), - platform::ToNCCLDataType(output.dtype()), - src_rank, comm, stream); - }, - src_rank, CommType::RECV); + auto task = PointToPoint( + tensors, + [&](phi::DenseTensor& output, ncclComm_t comm, const gpuStream_t& stream, + int src_rank) { + return platform::dynload::ncclRecv( + output.data(), output.numel(), + platform::ToNCCLDataType(output.dtype()), src_rank, comm, stream); + }, + src_rank, CommType::RECV); return task; } @@ -440,15 +442,15 @@ std::shared_ptr ProcessGroupNCCL::Send_Partial( std::vector shared_tensors; shared_tensors.push_back(shared_input); - auto task = PointToPoint(shared_tensors, - [&](phi::DenseTensor& input, ncclComm_t comm, - const gpuStream_t& stream, int dst_rank) { - return platform::dynload::ncclSend( - input.data(), input.numel(), - platform::ToNCCLDataType(input.dtype()), - dst_rank, comm, stream); - }, - dst_rank, CommType::SEND); + auto task = PointToPoint( + shared_tensors, + [&](phi::DenseTensor& input, ncclComm_t comm, const gpuStream_t& stream, + int dst_rank) { + return platform::dynload::ncclSend( + input.data(), input.numel(), + platform::ToNCCLDataType(input.dtype()), dst_rank, comm, stream); + }, + dst_rank, CommType::SEND); return task; } @@ -463,15 +465,15 @@ std::shared_ptr ProcessGroupNCCL::Recv_Partial( std::vector shared_tensors; shared_tensors.push_back(shared_input); - auto task = PointToPoint(shared_tensors, - [&](phi::DenseTensor& output, ncclComm_t comm, - const gpuStream_t& stream, int src_rank) { - return platform::dynload::ncclRecv( - output.data(), output.numel(), - platform::ToNCCLDataType(output.dtype()), - src_rank, comm, stream); - }, - src_rank, CommType::RECV); + auto task = PointToPoint( + shared_tensors, + [&](phi::DenseTensor& output, ncclComm_t comm, const gpuStream_t& stream, + int src_rank) { + return platform::dynload::ncclRecv( + output.data(), output.numel(), + platform::ToNCCLDataType(output.dtype()), src_rank, comm, stream); + }, + src_rank, CommType::RECV); return task; } @@ -484,15 +486,15 @@ std::shared_ptr ProcessGroupNCCL::AllGather( PADDLE_ENFORCE_EQ( CheckTensorsInCudaPlace(out_tensors), true, platform::errors::InvalidArgument("All outputs should be in CudaPlace.")); - return Collective(in_tensors, out_tensors, - [&](const phi::DenseTensor& input, phi::DenseTensor& output, - ncclComm_t comm, const gpuStream_t& stream) { - return platform::dynload::ncclAllGather( - input.data(), output.data(), input.numel(), - platform::ToNCCLDataType(input.dtype()), comm, - stream); - }, - CommType::ALLGATHER); + return Collective( + in_tensors, out_tensors, + [&](const phi::DenseTensor& input, phi::DenseTensor& output, + ncclComm_t comm, const gpuStream_t& stream) { + return platform::dynload::ncclAllGather( + input.data(), output.data(), input.numel(), + platform::ToNCCLDataType(input.dtype()), comm, stream); + }, + CommType::ALLGATHER); } void* GetPointerByOffset(void* raw_pointer, size_t offset, diff --git a/paddle/fluid/distributed/collective/ProcessGroupNCCL.h b/paddle/fluid/distributed/collective/ProcessGroupNCCL.h index 82ced6e135a..2325e645b4c 100644 --- a/paddle/fluid/distributed/collective/ProcessGroupNCCL.h +++ b/paddle/fluid/distributed/collective/ProcessGroupNCCL.h @@ -22,10 +22,9 @@ #include #include "paddle/fluid/distributed/collective/ProcessGroup.h" +#include "paddle/fluid/distributed/store/store.h" #include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/device_context.h" - -#include "paddle/fluid/distributed/store/store.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/gen_comm_id_helper.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/distributed/collective/reducer.cc b/paddle/fluid/distributed/collective/reducer.cc index 96009ce7229..9c04b95a732 100644 --- a/paddle/fluid/distributed/collective/reducer.cc +++ b/paddle/fluid/distributed/collective/reducer.cc @@ -403,8 +403,9 @@ void EagerReducer::InitializeDenseGroups( "Tensor %s is not initialized.", tensor_name)); const auto size = tensor.numel(); PADDLE_ENFORCE_GT( - size, 0, platform::errors::PreconditionNotMet( - "The number of tensor %s's elements is 0.", tensor_name)); + size, 0, + platform::errors::PreconditionNotMet( + "The number of tensor %s's elements is 0.", tensor_name)); all_length += size; p_group->length_.push_back(size); diff --git a/paddle/fluid/distributed/collective/reducer.h b/paddle/fluid/distributed/collective/reducer.h index 424bae0e5ac..0527ceb9b51 100644 --- a/paddle/fluid/distributed/collective/reducer.h +++ b/paddle/fluid/distributed/collective/reducer.h @@ -16,6 +16,7 @@ #include #include + #include "paddle/fluid/distributed/collective/ProcessGroup.h" #include "paddle/fluid/eager/accumulation/accumulation_node.h" #include "paddle/fluid/eager/api/utils/hook_utils.h" diff --git a/paddle/fluid/distributed/common/afs_warpper.cc b/paddle/fluid/distributed/common/afs_warpper.cc index d539ec60804..3a37c6be7c2 100644 --- a/paddle/fluid/distributed/common/afs_warpper.cc +++ b/paddle/fluid/distributed/common/afs_warpper.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/distributed/common/afs_warpper.h" + #include "paddle/fluid/framework/io/fs.h" namespace paddle { @@ -27,9 +28,10 @@ int AfsClient::initialize(const FsClientParameter& fs_client_param) { int AfsClient::initialize(const std::string& hadoop_bin, const std::string& uri, const std::string& user, const std::string& passwd, int buffer_size_param) { - return initialize(hadoop_bin, uri, paddle::string::format_string( - "%s,%s", user.c_str(), passwd.c_str()), - buffer_size_param); + return initialize( + hadoop_bin, uri, + paddle::string::format_string("%s,%s", user.c_str(), passwd.c_str()), + buffer_size_param); } int AfsClient::initialize(const std::string& hadoop_bin, const std::string& uri, const std::string& ugi, int buffer_size_param) { diff --git a/paddle/fluid/distributed/common/afs_warpper.h b/paddle/fluid/distributed/common/afs_warpper.h index d10668046c0..cef3e5ae35c 100644 --- a/paddle/fluid/distributed/common/afs_warpper.h +++ b/paddle/fluid/distributed/common/afs_warpper.h @@ -19,6 +19,7 @@ #include #include #include + #include "paddle/fluid/distributed/ps.pb.h" #include "paddle/fluid/string/string_helper.h" diff --git a/paddle/fluid/distributed/common/cost_timer.h b/paddle/fluid/distributed/common/cost_timer.h index 5073dc9cf50..1651121ee0c 100644 --- a/paddle/fluid/distributed/common/cost_timer.h +++ b/paddle/fluid/distributed/common/cost_timer.h @@ -15,6 +15,7 @@ #pragma once #include #include + #include "butil/time.h" #include "bvar/latency_recorder.h" #include "glog/logging.h" diff --git a/paddle/fluid/distributed/common/local_random.h b/paddle/fluid/distributed/common/local_random.h index 96b8d2d21a5..5a9a3b595d0 100644 --- a/paddle/fluid/distributed/common/local_random.h +++ b/paddle/fluid/distributed/common/local_random.h @@ -15,6 +15,7 @@ #pragma once #include #include + #include #include diff --git a/paddle/fluid/distributed/common/registerer.h b/paddle/fluid/distributed/common/registerer.h index 630be930c14..f4938c0f93f 100644 --- a/paddle/fluid/distributed/common/registerer.h +++ b/paddle/fluid/distributed/common/registerer.h @@ -15,6 +15,7 @@ #pragma once #include + #include #include #include diff --git a/paddle/fluid/distributed/fleet_executor/carrier.cc b/paddle/fluid/distributed/fleet_executor/carrier.cc index 53bae87c002..754a3f5d2b2 100644 --- a/paddle/fluid/distributed/fleet_executor/carrier.cc +++ b/paddle/fluid/distributed/fleet_executor/carrier.cc @@ -12,9 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/distributed/fleet_executor/carrier.h" + #include -#include "paddle/fluid/distributed/fleet_executor/carrier.h" #include "paddle/fluid/distributed/fleet_executor/global.h" #include "paddle/fluid/distributed/fleet_executor/interceptor.h" #include "paddle/fluid/distributed/fleet_executor/message_bus.h" @@ -148,8 +149,9 @@ void Carrier::WakeUp() { } void Carrier::Start() { - PADDLE_ENFORCE_EQ(is_init_, true, platform::errors::PreconditionNotMet( - "Using carrier before initialized.")); + PADDLE_ENFORCE_EQ(is_init_, true, + platform::errors::PreconditionNotMet( + "Using carrier before initialized.")); for (int64_t id : source_interceptor_ids_) { VLOG(3) << "Carrier Start is sending start to source interceptor " << id << "."; diff --git a/paddle/fluid/distributed/fleet_executor/carrier.h b/paddle/fluid/distributed/fleet_executor/carrier.h index d35a3260915..2846af97716 100644 --- a/paddle/fluid/distributed/fleet_executor/carrier.h +++ b/paddle/fluid/distributed/fleet_executor/carrier.h @@ -35,7 +35,7 @@ namespace paddle { namespace framework { class Scope; class ProgramDesc; -} +} // namespace framework namespace distributed { diff --git a/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc b/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc index fb907e3b5c2..4ba11fa7e32 100644 --- a/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc +++ b/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc @@ -13,8 +13,8 @@ // limitations under the License. #include "paddle/fluid/distributed/fleet_executor/compute_interceptor.h" -#include "paddle/fluid/distributed/fleet_executor/carrier.h" +#include "paddle/fluid/distributed/fleet_executor/carrier.h" #include "paddle/fluid/distributed/fleet_executor/task_node.h" #include "paddle/fluid/framework/executor_gc_helper.h" #include "paddle/fluid/framework/operator.h" diff --git a/paddle/fluid/distributed/fleet_executor/dist_model.cc b/paddle/fluid/distributed/fleet_executor/dist_model.cc index d8f937e218b..8fe73d77494 100644 --- a/paddle/fluid/distributed/fleet_executor/dist_model.cc +++ b/paddle/fluid/distributed/fleet_executor/dist_model.cc @@ -12,10 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/distributed/fleet_executor/dist_model.h" + #include + #include // NOLINT -#include "paddle/fluid/distributed/fleet_executor/dist_model.h" #include "paddle/fluid/distributed/fleet_executor/fleet_executor.h" #include "paddle/fluid/distributed/fleet_executor/task_node.h" #include "paddle/fluid/framework/block_desc.h" @@ -294,8 +296,9 @@ bool DistModel::PrepareProgram() { bool DistModel::LoadProgram() { VLOG(3) << "Loading program from " << config_.model_dir; - PADDLE_ENFORCE_NE(config_.model_dir, "", platform::errors::InvalidArgument( - "Model dir must be provided.")); + PADDLE_ENFORCE_NE( + config_.model_dir, "", + platform::errors::InvalidArgument("Model dir must be provided.")); std::string model_path = config_.model_dir + ".pdmodel"; framework::proto::ProgramDesc program_proto; std::string pb_content; diff --git a/paddle/fluid/distributed/fleet_executor/dist_model.h b/paddle/fluid/distributed/fleet_executor/dist_model.h index d0203c13135..f5c1d47afb1 100644 --- a/paddle/fluid/distributed/fleet_executor/dist_model.h +++ b/paddle/fluid/distributed/fleet_executor/dist_model.h @@ -31,7 +31,7 @@ namespace framework { class ProgramDesc; class Scope; class BlockDesc; -} +} // namespace framework namespace distributed { diff --git a/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.cc b/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.cc index b440d39c73a..b7f590e7a8c 100644 --- a/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.cc +++ b/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h" + #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h b/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h index dc8b2596803..459e609762d 100644 --- a/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h +++ b/paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h @@ -15,6 +15,7 @@ #pragma once #include #include + #include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/macros.h" diff --git a/paddle/fluid/distributed/fleet_executor/fleet_executor.cc b/paddle/fluid/distributed/fleet_executor/fleet_executor.cc index e946d78550f..c4d7f3c7a69 100644 --- a/paddle/fluid/distributed/fleet_executor/fleet_executor.cc +++ b/paddle/fluid/distributed/fleet_executor/fleet_executor.cc @@ -11,9 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/distributed/fleet_executor/fleet_executor.h" + #include -#include "paddle/fluid/distributed/fleet_executor/fleet_executor.h" #include "paddle/fluid/distributed/fleet_executor/global.h" #include "paddle/fluid/distributed/fleet_executor/message_bus.h" #include "paddle/fluid/distributed/fleet_executor/runtime_graph.h" diff --git a/paddle/fluid/distributed/fleet_executor/fleet_executor.h b/paddle/fluid/distributed/fleet_executor/fleet_executor.h index ccdb3dcc459..176e5dab0da 100644 --- a/paddle/fluid/distributed/fleet_executor/fleet_executor.h +++ b/paddle/fluid/distributed/fleet_executor/fleet_executor.h @@ -25,7 +25,7 @@ namespace paddle { namespace framework { class ProgramDesc; class Scope; -} +} // namespace framework namespace distributed { class RuntimeGraph; diff --git a/paddle/fluid/distributed/fleet_executor/interceptor.cc b/paddle/fluid/distributed/fleet_executor/interceptor.cc index 710ebda4124..2ff2bc04ff8 100644 --- a/paddle/fluid/distributed/fleet_executor/interceptor.cc +++ b/paddle/fluid/distributed/fleet_executor/interceptor.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/distributed/fleet_executor/interceptor.h" + #include "paddle/fluid/distributed/fleet_executor/carrier.h" #include "paddle/fluid/distributed/fleet_executor/task_loop.h" #include "paddle/fluid/distributed/fleet_executor/task_node.h" diff --git a/paddle/fluid/distributed/fleet_executor/interceptor.h b/paddle/fluid/distributed/fleet_executor/interceptor.h index 86ca7be7f44..00fe2154d28 100644 --- a/paddle/fluid/distributed/fleet_executor/interceptor.h +++ b/paddle/fluid/distributed/fleet_executor/interceptor.h @@ -33,7 +33,7 @@ namespace paddle { namespace framework { class Scope; class GarbageCollector; -} +} // namespace framework namespace distributed { class TaskNode; diff --git a/paddle/fluid/distributed/fleet_executor/message_bus.cc b/paddle/fluid/distributed/fleet_executor/message_bus.cc index 80a6b4667aa..76762af9e7e 100644 --- a/paddle/fluid/distributed/fleet_executor/message_bus.cc +++ b/paddle/fluid/distributed/fleet_executor/message_bus.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/distributed/fleet_executor/message_bus.h" + #include #include #include @@ -19,7 +21,6 @@ #include "paddle/fluid/distributed/fleet_executor/carrier.h" #include "paddle/fluid/distributed/fleet_executor/global.h" -#include "paddle/fluid/distributed/fleet_executor/message_bus.h" #include "paddle/fluid/platform/gen_comm_id_helper.h" namespace paddle { @@ -28,8 +29,9 @@ namespace distributed { void MessageBus::Init( int64_t rank, const std::unordered_map& rank_to_addr, const std::string& addr) { - PADDLE_ENFORCE_EQ(is_init_, false, platform::errors::AlreadyExists( - "MessageBus is already init.")); + PADDLE_ENFORCE_EQ( + is_init_, false, + platform::errors::AlreadyExists("MessageBus is already init.")); rank_ = rank; is_init_ = true; rank_to_addr_ = rank_to_addr; diff --git a/paddle/fluid/distributed/fleet_executor/message_service.cc b/paddle/fluid/distributed/fleet_executor/message_service.cc index 1c66d83ea34..9d42b0d73db 100644 --- a/paddle/fluid/distributed/fleet_executor/message_service.cc +++ b/paddle/fluid/distributed/fleet_executor/message_service.cc @@ -13,6 +13,7 @@ // limitations under the License. #if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE) #include "paddle/fluid/distributed/fleet_executor/message_service.h" + #include "brpc/server.h" #include "paddle/fluid/distributed/fleet_executor/global.h" #include "paddle/fluid/distributed/fleet_executor/message_bus.h" diff --git a/paddle/fluid/distributed/fleet_executor/runtime_graph.cc b/paddle/fluid/distributed/fleet_executor/runtime_graph.cc index 614b4c37e82..a5f90062dcf 100644 --- a/paddle/fluid/distributed/fleet_executor/runtime_graph.cc +++ b/paddle/fluid/distributed/fleet_executor/runtime_graph.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/distributed/fleet_executor/runtime_graph.h" + #include "paddle/fluid/distributed/fleet_executor/task_node.h" namespace paddle { diff --git a/paddle/fluid/distributed/fleet_executor/runtime_graph.h b/paddle/fluid/distributed/fleet_executor/runtime_graph.h index 1ca9f0174ed..a59a43cc200 100644 --- a/paddle/fluid/distributed/fleet_executor/runtime_graph.h +++ b/paddle/fluid/distributed/fleet_executor/runtime_graph.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/distributed/fleet_executor/fleet_executor_desc.pb.h" #include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/platform/macros.h" diff --git a/paddle/fluid/distributed/fleet_executor/sink_interceptor.cc b/paddle/fluid/distributed/fleet_executor/sink_interceptor.cc index 77fbb23a6c7..9d9e6c03565 100644 --- a/paddle/fluid/distributed/fleet_executor/sink_interceptor.cc +++ b/paddle/fluid/distributed/fleet_executor/sink_interceptor.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/distributed/fleet_executor/sink_interceptor.h" + #include "paddle/fluid/distributed/fleet_executor/task_node.h" namespace paddle { diff --git a/paddle/fluid/distributed/fleet_executor/source_interceptor.cc b/paddle/fluid/distributed/fleet_executor/source_interceptor.cc index 78b2bed66dd..6b2fd5565ea 100644 --- a/paddle/fluid/distributed/fleet_executor/source_interceptor.cc +++ b/paddle/fluid/distributed/fleet_executor/source_interceptor.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/distributed/fleet_executor/source_interceptor.h" + #include "paddle/fluid/distributed/fleet_executor/task_node.h" namespace paddle { diff --git a/paddle/fluid/distributed/fleet_executor/task_loop_thread.cc b/paddle/fluid/distributed/fleet_executor/task_loop_thread.cc index bb313ad3789..90765dbdd2d 100644 --- a/paddle/fluid/distributed/fleet_executor/task_loop_thread.cc +++ b/paddle/fluid/distributed/fleet_executor/task_loop_thread.cc @@ -31,8 +31,9 @@ TaskLoopThread::~TaskLoopThread() { } TaskLoop* TaskLoopThread::StartLoop() { - PADDLE_ENFORCE_EQ(start_, false, platform::errors::PreconditionNotMet( - "thread is already running.")); + PADDLE_ENFORCE_EQ( + start_, false, + platform::errors::PreconditionNotMet("thread is already running.")); start_ = true; thread_ = std::thread([this]() { Loop(); }); diff --git a/paddle/fluid/distributed/fleet_executor/task_loop_thread_pool.cc b/paddle/fluid/distributed/fleet_executor/task_loop_thread_pool.cc index ed34bbb87fc..e962a29b4a1 100644 --- a/paddle/fluid/distributed/fleet_executor/task_loop_thread_pool.cc +++ b/paddle/fluid/distributed/fleet_executor/task_loop_thread_pool.cc @@ -30,8 +30,9 @@ TaskLoopThreadPool::TaskLoopThreadPool(int thread_num) TaskLoopThreadPool::~TaskLoopThreadPool() = default; void TaskLoopThreadPool::Start() { - PADDLE_ENFORCE_EQ(start_, false, platform::errors::PreconditionNotMet( - "thread pool is already start.")); + PADDLE_ENFORCE_EQ( + start_, false, + platform::errors::PreconditionNotMet("thread pool is already start.")); PADDLE_ENFORCE_GT( thread_num_, 0, platform::errors::InvalidArgument( @@ -45,10 +46,12 @@ void TaskLoopThreadPool::Start() { } TaskLoop* TaskLoopThreadPool::GetLoop(int tid) { - PADDLE_ENFORCE_EQ(start_, true, platform::errors::PreconditionNotMet( - "thread pool must start first.")); - PADDLE_ENFORCE_GE(tid, 0, platform::errors::OutOfRange( - "tid must >= 0, but now is %d", tid)); + PADDLE_ENFORCE_EQ( + start_, true, + platform::errors::PreconditionNotMet("thread pool must start first.")); + PADDLE_ENFORCE_GE( + tid, 0, + platform::errors::OutOfRange("tid must >= 0, but now is %d", tid)); PADDLE_ENFORCE_LT(tid, thread_num_, platform::errors::OutOfRange( "tid must < thread_num, but now tid=%d thread_num=%d", @@ -57,8 +60,9 @@ TaskLoop* TaskLoopThreadPool::GetLoop(int tid) { } std::vector TaskLoopThreadPool::GetAllLoops() { - PADDLE_ENFORCE_EQ(start_, true, platform::errors::PreconditionNotMet( - "thread pool must start first.")); + PADDLE_ENFORCE_EQ( + start_, true, + platform::errors::PreconditionNotMet("thread pool must start first.")); return loops_; } diff --git a/paddle/fluid/distributed/fleet_executor/task_node.cc b/paddle/fluid/distributed/fleet_executor/task_node.cc index 232317333ea..00ae30d281e 100644 --- a/paddle/fluid/distributed/fleet_executor/task_node.cc +++ b/paddle/fluid/distributed/fleet_executor/task_node.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/distributed/fleet_executor/task_node.h" + #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" @@ -153,15 +154,17 @@ void TaskNode::SetRunAtOffset(int64_t value) { void TaskNode::SetReplyUpPerSteps(int64_t value) { PADDLE_ENFORCE_GE( - value, 1, platform::errors::InvalidArgument( - "reply_up_per_steps must >= 1, but received %ld", value)); + value, 1, + platform::errors::InvalidArgument( + "reply_up_per_steps must >= 1, but received %ld", value)); reply_up_per_steps_ = value; } void TaskNode::SetSendDownPerSteps(int64_t value) { PADDLE_ENFORCE_GE( - value, 1, platform::errors::InvalidArgument( - "send_down_per_steps must >= 1, but received %ld", value)); + value, 1, + platform::errors::InvalidArgument( + "send_down_per_steps must >= 1, but received %ld", value)); send_down_per_steps_ = value; } diff --git a/paddle/fluid/distributed/fleet_executor/task_node.h b/paddle/fluid/distributed/fleet_executor/task_node.h index 7dd4b545456..16e686a4401 100644 --- a/paddle/fluid/distributed/fleet_executor/task_node.h +++ b/paddle/fluid/distributed/fleet_executor/task_node.h @@ -26,7 +26,7 @@ namespace paddle { namespace framework { class OperatorBase; class OpDesc; -} +} // namespace framework namespace distributed { class TaskNode final { diff --git a/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc b/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc index 35857fc86b5..bd81d3644f4 100644 --- a/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc +++ b/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_run_op_test.cc @@ -16,7 +16,6 @@ limitations under the License. */ #include #include "gtest/gtest.h" - #include "paddle/fluid/distributed/fleet_executor/carrier.h" #include "paddle/fluid/distributed/fleet_executor/global.h" #include "paddle/fluid/distributed/fleet_executor/interceptor.h" diff --git a/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_test.cc b/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_test.cc index 954b52693f4..4992a8b34c9 100644 --- a/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_test.cc +++ b/paddle/fluid/distributed/fleet_executor/test/compute_interceptor_test.cc @@ -16,7 +16,6 @@ limitations under the License. */ #include #include "gtest/gtest.h" - #include "paddle/fluid/distributed/fleet_executor/carrier.h" #include "paddle/fluid/distributed/fleet_executor/global.h" #include "paddle/fluid/distributed/fleet_executor/interceptor.h" diff --git a/paddle/fluid/distributed/fleet_executor/test/interceptor_ping_pong_test.cc b/paddle/fluid/distributed/fleet_executor/test/interceptor_ping_pong_test.cc index 19c1d0a0d7a..54adf06fb67 100644 --- a/paddle/fluid/distributed/fleet_executor/test/interceptor_ping_pong_test.cc +++ b/paddle/fluid/distributed/fleet_executor/test/interceptor_ping_pong_test.cc @@ -16,7 +16,6 @@ limitations under the License. */ #include #include "gtest/gtest.h" - #include "paddle/fluid/distributed/fleet_executor/carrier.h" #include "paddle/fluid/distributed/fleet_executor/global.h" #include "paddle/fluid/distributed/fleet_executor/interceptor.h" diff --git a/paddle/fluid/distributed/fleet_executor/test/interceptor_ping_pong_with_brpc_test.cc b/paddle/fluid/distributed/fleet_executor/test/interceptor_ping_pong_with_brpc_test.cc index 78cff2606f6..3828c4478cb 100644 --- a/paddle/fluid/distributed/fleet_executor/test/interceptor_ping_pong_with_brpc_test.cc +++ b/paddle/fluid/distributed/fleet_executor/test/interceptor_ping_pong_with_brpc_test.cc @@ -14,11 +14,11 @@ limitations under the License. */ #include #include + #include #include #include "gtest/gtest.h" - #include "paddle/fluid/distributed/fleet_executor/carrier.h" #include "paddle/fluid/distributed/fleet_executor/global.h" #include "paddle/fluid/distributed/fleet_executor/interceptor.h" diff --git a/paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_long_path_test.cc b/paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_long_path_test.cc index e909744a4b5..a78cd6955f2 100644 --- a/paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_long_path_test.cc +++ b/paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_long_path_test.cc @@ -16,7 +16,6 @@ limitations under the License. */ #include #include "gtest/gtest.h" - #include "paddle/fluid/distributed/fleet_executor/carrier.h" #include "paddle/fluid/distributed/fleet_executor/global.h" #include "paddle/fluid/distributed/fleet_executor/interceptor.h" diff --git a/paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_short_path_test.cc b/paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_short_path_test.cc index 0e57596bacb..53755bf1a40 100644 --- a/paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_short_path_test.cc +++ b/paddle/fluid/distributed/fleet_executor/test/interceptor_pipeline_short_path_test.cc @@ -16,7 +16,6 @@ limitations under the License. */ #include #include "gtest/gtest.h" - #include "paddle/fluid/distributed/fleet_executor/carrier.h" #include "paddle/fluid/distributed/fleet_executor/global.h" #include "paddle/fluid/distributed/fleet_executor/interceptor.h" diff --git a/paddle/fluid/distributed/fleet_executor/test/sink_interceptor_test.cc b/paddle/fluid/distributed/fleet_executor/test/sink_interceptor_test.cc index 8ff908f90ec..879d7e9b029 100644 --- a/paddle/fluid/distributed/fleet_executor/test/sink_interceptor_test.cc +++ b/paddle/fluid/distributed/fleet_executor/test/sink_interceptor_test.cc @@ -16,7 +16,6 @@ #include #include "gtest/gtest.h" - #include "paddle/fluid/distributed/fleet_executor/carrier.h" #include "paddle/fluid/distributed/fleet_executor/global.h" #include "paddle/fluid/distributed/fleet_executor/interceptor.h" diff --git a/paddle/fluid/distributed/fleet_executor/test/source_interceptor_test.cc b/paddle/fluid/distributed/fleet_executor/test/source_interceptor_test.cc index e9c0437c829..21a1b4accc9 100644 --- a/paddle/fluid/distributed/fleet_executor/test/source_interceptor_test.cc +++ b/paddle/fluid/distributed/fleet_executor/test/source_interceptor_test.cc @@ -16,7 +16,6 @@ #include #include "gtest/gtest.h" - #include "paddle/fluid/distributed/fleet_executor/carrier.h" #include "paddle/fluid/distributed/fleet_executor/global.h" #include "paddle/fluid/distributed/fleet_executor/interceptor.h" diff --git a/paddle/fluid/distributed/index_dataset/index_sampler.cc b/paddle/fluid/distributed/index_dataset/index_sampler.cc index 306d11d333d..b8219322051 100644 --- a/paddle/fluid/distributed/index_dataset/index_sampler.cc +++ b/paddle/fluid/distributed/index_dataset/index_sampler.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/distributed/index_dataset/index_sampler.h" + #include "paddle/fluid/framework/data_feed.h" namespace paddle { diff --git a/paddle/fluid/distributed/index_dataset/index_sampler.h b/paddle/fluid/distributed/index_dataset/index_sampler.h index 02806b814c2..a82348c9ec5 100644 --- a/paddle/fluid/distributed/index_dataset/index_sampler.h +++ b/paddle/fluid/distributed/index_dataset/index_sampler.h @@ -14,6 +14,7 @@ #pragma once #include + #include "paddle/fluid/distributed/index_dataset/index_wrapper.h" #include "paddle/fluid/framework/data_feed.h" #include "paddle/fluid/framework/program_desc.h" diff --git a/paddle/fluid/distributed/index_dataset/index_wrapper.cc b/paddle/fluid/distributed/index_dataset/index_wrapper.cc index 27aa890f760..61941ef5133 100644 --- a/paddle/fluid/distributed/index_dataset/index_wrapper.cc +++ b/paddle/fluid/distributed/index_dataset/index_wrapper.cc @@ -9,15 +9,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/distributed/index_dataset/index_wrapper.h" + #include #include #include #include #include #include -#include "paddle/fluid/framework/io/fs.h" -#include "paddle/fluid/distributed/index_dataset/index_wrapper.h" +#include "paddle/fluid/framework/io/fs.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/index_dataset/index_wrapper.h b/paddle/fluid/distributed/index_dataset/index_wrapper.h index 8fb8faf6c84..1c652e60bbb 100644 --- a/paddle/fluid/distributed/index_dataset/index_wrapper.h +++ b/paddle/fluid/distributed/index_dataset/index_wrapper.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/distributed/index_dataset/index_dataset.pb.h" #include "paddle/fluid/platform/enforce.h" @@ -90,10 +91,11 @@ class IndexWrapper { } TreePtr tree = std::make_shared(); int ret = tree->Load(tree_path); - PADDLE_ENFORCE_EQ(ret, 0, paddle::platform::errors::InvalidArgument( - "Load tree[%s] from path[%s] failed. Please " - "check whether the file exists.", - name, tree_path)); + PADDLE_ENFORCE_EQ(ret, 0, + paddle::platform::errors::InvalidArgument( + "Load tree[%s] from path[%s] failed. Please " + "check whether the file exists.", + name, tree_path)); tree_map.insert(std::pair{name, tree}); } diff --git a/paddle/fluid/distributed/ps/service/brpc_ps_client.cc b/paddle/fluid/distributed/ps/service/brpc_ps_client.cc old mode 100755 new mode 100644 index 0959b651bb5..89466076b23 --- a/paddle/fluid/distributed/ps/service/brpc_ps_client.cc +++ b/paddle/fluid/distributed/ps/service/brpc_ps_client.cc @@ -12,11 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/distributed/ps/service/brpc_ps_client.h" + #include #include #include -#include "paddle/fluid/distributed/ps/service/brpc_ps_client.h" #include "paddle/fluid/framework/archive.h" static const int max_port = 65535; @@ -245,8 +246,9 @@ int32_t BrpcPsClient::Initialize() { int DownpourBrpcClosure::check_response(size_t request_idx, int cmd_id) { if (_cntls[request_idx]->Failed()) { - LOG(ERROR) << "resquest cmd_id:" << cmd_id << " failed, " - "err:" + LOG(ERROR) << "resquest cmd_id:" << cmd_id + << " failed, " + "err:" << _cntls[request_idx]->ErrorText(); return -1; } @@ -263,8 +265,9 @@ int DownpourBrpcClosure::check_response(size_t request_idx, int cmd_id) { int DownpourBrpcClosure::check_save_response(size_t request_idx, int cmd_id) { int32_t feasign_size = 0; if (_cntls[request_idx]->Failed()) { - LOG(ERROR) << "resquest cmd_id:" << cmd_id << " failed, " - "err:" + LOG(ERROR) << "resquest cmd_id:" << cmd_id + << " failed, " + "err:" << _cntls[request_idx]->ErrorText(); return -1; } diff --git a/paddle/fluid/distributed/ps/service/brpc_ps_client.h b/paddle/fluid/distributed/ps/service/brpc_ps_client.h index e2c16d496c4..17b6bbe22ce 100644 --- a/paddle/fluid/distributed/ps/service/brpc_ps_client.h +++ b/paddle/fluid/distributed/ps/service/brpc_ps_client.h @@ -15,6 +15,7 @@ #pragma once #include + #include #include #include diff --git a/paddle/fluid/distributed/ps/service/brpc_ps_server.cc b/paddle/fluid/distributed/ps/service/brpc_ps_server.cc old mode 100755 new mode 100644 index 8167c37b599..d859acbb42e --- a/paddle/fluid/distributed/ps/service/brpc_ps_server.cc +++ b/paddle/fluid/distributed/ps/service/brpc_ps_server.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/distributed/ps/service/brpc_ps_server.h" + #include // NOLINT + #include "butil/object_pool.h" #include "paddle/fluid/distributed/common/cost_timer.h" #include "paddle/fluid/distributed/ps/table/depends/sparse_utils.h" diff --git a/paddle/fluid/distributed/ps/service/brpc_utils.h b/paddle/fluid/distributed/ps/service/brpc_utils.h index e68e15058f7..d4332744ceb 100644 --- a/paddle/fluid/distributed/ps/service/brpc_utils.h +++ b/paddle/fluid/distributed/ps/service/brpc_utils.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include #include #include diff --git a/paddle/fluid/distributed/ps/service/communicator/communicator.cc b/paddle/fluid/distributed/ps/service/communicator/communicator.cc index c4b833f294e..c50f1d909cd 100644 --- a/paddle/fluid/distributed/ps/service/communicator/communicator.cc +++ b/paddle/fluid/distributed/ps/service/communicator/communicator.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/distributed/ps/service/communicator/communicator.h" + #include + #include "gflags/gflags.h" #include "paddle/fluid/distributed/ps/service/brpc_ps_client.h" #include "paddle/fluid/distributed/ps/wrapper/fleet.h" diff --git a/paddle/fluid/distributed/ps/service/communicator/communicator.h b/paddle/fluid/distributed/ps/service/communicator/communicator.h index 75676c39243..5f2a0cbb909 100644 --- a/paddle/fluid/distributed/ps/service/communicator/communicator.h +++ b/paddle/fluid/distributed/ps/service/communicator/communicator.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include #include #include @@ -30,6 +31,7 @@ limitations under the License. */ #include "gflags/gflags.h" #include "paddle/fluid/distributed/ps/service/communicator/communicator_common.h" +#include "paddle/fluid/distributed/ps/service/ps_client.h" #include "paddle/fluid/framework/channel.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/variable.h" @@ -42,8 +44,6 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/fluid/distributed/ps/service/ps_client.h" - namespace paddle { namespace distributed { class PSClient; @@ -157,8 +157,9 @@ template inline void MergeVars(const std::string &var_name, const std::vector> &vars, Scope *scope, bool merge_add = true) { - PADDLE_ENFORCE_NE(vars.empty(), true, platform::errors::InvalidArgument( - "vector vars are empty.")); + PADDLE_ENFORCE_NE( + vars.empty(), true, + platform::errors::InvalidArgument("vector vars are empty.")); auto cpu_place = platform::CPUPlace(); auto &var0 = vars[0]; auto *out_var = scope->Var(var_name); diff --git a/paddle/fluid/distributed/ps/service/env.h b/paddle/fluid/distributed/ps/service/env.h index 162ee6f0984..0fddb17da7c 100644 --- a/paddle/fluid/distributed/ps/service/env.h +++ b/paddle/fluid/distributed/ps/service/env.h @@ -18,11 +18,13 @@ #include #include #include + #include #include #include #include #include + #include "gflags/gflags.h" namespace paddle { diff --git a/paddle/fluid/distributed/ps/service/graph_brpc_client.cc b/paddle/fluid/distributed/ps/service/graph_brpc_client.cc index c1df490669d..ff9680044dd 100644 --- a/paddle/fluid/distributed/ps/service/graph_brpc_client.cc +++ b/paddle/fluid/distributed/ps/service/graph_brpc_client.cc @@ -13,12 +13,14 @@ // limitations under the License. #include "paddle/fluid/distributed/ps/service/graph_brpc_client.h" + #include #include #include #include #include #include + #include "Eigen/Dense" #include "paddle/fluid/distributed/ps/service/brpc_ps_client.h" #include "paddle/fluid/distributed/ps/table/table.h" @@ -149,7 +151,7 @@ std::future GraphBrpcClient::get_node_feat( std::future GraphBrpcClient::clear_nodes(uint32_t table_id, int type_id, int idx_) { DownpourBrpcClosure *closure = new DownpourBrpcClosure( - server_size, [&, server_size = this->server_size ](void *done) { + server_size, [&, server_size = this->server_size](void *done) { int ret = 0; auto *closure = (DownpourBrpcClosure *)done; size_t fail_num = 0; @@ -665,5 +667,5 @@ int32_t GraphBrpcClient::Initialize() { local_channel = NULL; return 0; } -} -} +} // namespace distributed +} // namespace paddle diff --git a/paddle/fluid/distributed/ps/service/graph_brpc_client.h b/paddle/fluid/distributed/ps/service/graph_brpc_client.h index 51f14bc57cd..c038c840df9 100644 --- a/paddle/fluid/distributed/ps/service/graph_brpc_client.h +++ b/paddle/fluid/distributed/ps/service/graph_brpc_client.h @@ -15,11 +15,12 @@ #pragma once #include + #include #include +#include #include -#include #include "ThreadPool.h" #include "brpc/channel.h" #include "brpc/controller.h" diff --git a/paddle/fluid/distributed/ps/service/graph_brpc_server.cc b/paddle/fluid/distributed/ps/service/graph_brpc_server.cc index 8ff12265269..5ce26b45250 100644 --- a/paddle/fluid/distributed/ps/service/graph_brpc_server.cc +++ b/paddle/fluid/distributed/ps/service/graph_brpc_server.cc @@ -13,13 +13,14 @@ // limitations under the License. #include "paddle/fluid/distributed/ps/service/graph_brpc_server.h" -#include "paddle/fluid/distributed/ps/service/brpc_ps_server.h" #include // NOLINT #include + #include "butil/endpoint.h" #include "iomanip" #include "paddle/fluid/distributed/ps/service/brpc_ps_client.h" +#include "paddle/fluid/distributed/ps/service/brpc_ps_server.h" #include "paddle/fluid/framework/archive.h" #include "paddle/fluid/platform/profiler.h" namespace paddle { diff --git a/paddle/fluid/distributed/ps/service/graph_brpc_server.h b/paddle/fluid/distributed/ps/service/graph_brpc_server.h index caf728701b2..726876bef16 100644 --- a/paddle/fluid/distributed/ps/service/graph_brpc_server.h +++ b/paddle/fluid/distributed/ps/service/graph_brpc_server.h @@ -14,12 +14,12 @@ #pragma once +#include +#include + #include "brpc/channel.h" #include "brpc/controller.h" #include "brpc/server.h" - -#include -#include #include "paddle/fluid/distributed/ps/service/brpc_ps_server.h" #include "paddle/fluid/distributed/ps/service/server.h" #include "paddle/fluid/distributed/ps/table/common_graph_table.h" diff --git a/paddle/fluid/distributed/ps/service/ps_client.cc b/paddle/fluid/distributed/ps/service/ps_client.cc index f7df99ec13c..a0216f2a795 100644 --- a/paddle/fluid/distributed/ps/service/ps_client.cc +++ b/paddle/fluid/distributed/ps/service/ps_client.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/distributed/ps/service/ps_client.h" + #include "glog/logging.h" #include "paddle/fluid/distributed/ps/service/brpc_ps_client.h" #include "paddle/fluid/distributed/ps/service/graph_brpc_client.h" diff --git a/paddle/fluid/distributed/ps/service/ps_client.h b/paddle/fluid/distributed/ps/service/ps_client.h index 926bb7e7c9f..adf096c8469 100644 --- a/paddle/fluid/distributed/ps/service/ps_client.h +++ b/paddle/fluid/distributed/ps/service/ps_client.h @@ -20,6 +20,7 @@ #include #include #include + #include "paddle/fluid/distributed/common/cost_timer.h" #include "paddle/fluid/distributed/ps.pb.h" #include "paddle/fluid/distributed/ps/service/env.h" diff --git a/paddle/fluid/distributed/ps/service/ps_local_client.cc b/paddle/fluid/distributed/ps/service/ps_local_client.cc index bc024ed3175..b6407ccebe5 100644 --- a/paddle/fluid/distributed/ps/service/ps_local_client.cc +++ b/paddle/fluid/distributed/ps/service/ps_local_client.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/distributed/ps/service/ps_local_client.h" + #include "paddle/fluid/distributed/ps/table/table.h" //#define pslib_debug_dense_compress @@ -316,5 +317,5 @@ int32_t PsLocalClient::Initialize() { table_ptr->Push(table_context); return done(); } -} -} +} // namespace distributed +} // namespace paddle diff --git a/paddle/fluid/distributed/ps/service/ps_local_client.h b/paddle/fluid/distributed/ps/service/ps_local_client.h index 439ecf79f2f..89c2f7446ac 100644 --- a/paddle/fluid/distributed/ps/service/ps_local_client.h +++ b/paddle/fluid/distributed/ps/service/ps_local_client.h @@ -223,5 +223,5 @@ class PsLocalClient : public PSClient { float _mse = 0; uint16_t _push_times = 0; }; -} -} +} // namespace distributed +} // namespace paddle diff --git a/paddle/fluid/distributed/ps/service/ps_local_server.h b/paddle/fluid/distributed/ps/service/ps_local_server.h index c09f8585b65..2075e9dd2be 100644 --- a/paddle/fluid/distributed/ps/service/ps_local_server.h +++ b/paddle/fluid/distributed/ps/service/ps_local_server.h @@ -16,6 +16,7 @@ #include #include + #include "paddle/fluid/distributed/ps/service/server.h" namespace paddle { @@ -37,5 +38,5 @@ class PsLocalServer : public PSServer { private: virtual int32_t Initialize() { return 0; } }; -} -} +} // namespace distributed +} // namespace paddle diff --git a/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.cc b/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.cc index ced51b8cbe3..255c0d3d655 100644 --- a/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.cc +++ b/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h" + #include // NOLINT + #include "butil/endpoint.h" #include "iomanip" #include "paddle/fluid/distributed/ps/table/table.h" @@ -501,5 +503,5 @@ void GraphPyClient::StopServer() { if (status.get() == 0) stoped_ = true; } void GraphPyClient::FinalizeWorker() { this->worker_ptr->FinalizeWorker(); } -} -} +} // namespace distributed +} // namespace paddle diff --git a/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h b/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h index 55beb9b3932..7dd03401256 100644 --- a/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h +++ b/paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h @@ -14,6 +14,7 @@ #pragma once #include + #include // NOLINT #include #include @@ -23,21 +24,20 @@ #include // NOLINT #include #include -#include "google/protobuf/text_format.h" +#include "google/protobuf/text_format.h" #include "gtest/gtest.h" -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/framework/variable.h" - #include "paddle/fluid/distributed/ps.pb.h" #include "paddle/fluid/distributed/ps/service/env.h" #include "paddle/fluid/distributed/ps/service/graph_brpc_client.h" #include "paddle/fluid/distributed/ps/service/graph_brpc_server.h" #include "paddle/fluid/distributed/ps/service/ps_service/service.h" #include "paddle/fluid/distributed/ps/service/sendrecv.pb.h" +#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/printf.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -198,5 +198,5 @@ class GraphPyClient : public GraphPyService { std::thread* client_thread; bool stoped_ = false; }; -} -} +} // namespace distributed +} // namespace paddle diff --git a/paddle/fluid/distributed/ps/service/ps_service/service.cc b/paddle/fluid/distributed/ps/service/ps_service/service.cc index 9c3a06c2212..9eb5d49a405 100644 --- a/paddle/fluid/distributed/ps/service/ps_service/service.cc +++ b/paddle/fluid/distributed/ps/service/ps_service/service.cc @@ -17,7 +17,9 @@ #include #include #include + #include + #include "paddle/fluid/distributed/ps/service/communicator/communicator.h" #include "paddle/fluid/string/string_helper.h" diff --git a/paddle/fluid/distributed/ps/service/server.h b/paddle/fluid/distributed/ps/service/server.h index c044e828846..55bbbc06d87 100644 --- a/paddle/fluid/distributed/ps/service/server.h +++ b/paddle/fluid/distributed/ps/service/server.h @@ -20,6 +20,7 @@ #include #include #include + #include "butil/endpoint.h" #include "google/protobuf/service.h" #include "paddle/fluid/distributed/common/registerer.h" diff --git a/paddle/fluid/distributed/ps/table/accessor.h b/paddle/fluid/distributed/ps/table/accessor.h index 7713c2bda29..4db8ad0a55a 100644 --- a/paddle/fluid/distributed/ps/table/accessor.h +++ b/paddle/fluid/distributed/ps/table/accessor.h @@ -15,8 +15,10 @@ #pragma once #include #include + #include #include + #include "paddle/fluid/distributed/common/afs_warpper.h" #include "paddle/fluid/distributed/common/registerer.h" #include "paddle/fluid/distributed/ps.pb.h" diff --git a/paddle/fluid/distributed/ps/table/common_graph_table.cc b/paddle/fluid/distributed/ps/table/common_graph_table.cc index 43dee275a3d..55a9c794e8e 100644 --- a/paddle/fluid/distributed/ps/table/common_graph_table.cc +++ b/paddle/fluid/distributed/ps/table/common_graph_table.cc @@ -13,11 +13,14 @@ // limitations under the License. #include "paddle/fluid/distributed/ps/table/common_graph_table.h" + #include + #include #include #include #include + #include "paddle/fluid/distributed/common/utils.h" #include "paddle/fluid/distributed/ps/table/graph/graph_node.h" #include "paddle/fluid/framework/generator.h" @@ -212,7 +215,6 @@ int64_t GraphTable::load_graph_to_memory_from_ssd(int idx, for (size_t i = 0; i < bags.size(); i++) { if (bags[i].size() > 0) { tasks.push_back(_shards_task_pool[i]->enqueue([&, i, idx, this]() -> int { - char ch[sizeof(int) * 2 + sizeof(int64_t)]; memset(ch, 0, sizeof(int)); memcpy(ch + sizeof(int), &idx, sizeof(int)); @@ -353,7 +355,6 @@ void GraphTable::export_partition_files(int idx, std::string file_path) { for (int i = 0; i < part_len; i++) { tasks.push_back(_shards_task_pool[i % task_pool_size_]->enqueue( [&, i, idx, this]() -> int { - std::string output_path = file_path + "partition_" + std::to_string(i); diff --git a/paddle/fluid/distributed/ps/table/common_graph_table.h b/paddle/fluid/distributed/ps/table/common_graph_table.h index 25bec5276e7..6dd24df921d 100644 --- a/paddle/fluid/distributed/ps/table/common_graph_table.h +++ b/paddle/fluid/distributed/ps/table/common_graph_table.h @@ -17,6 +17,7 @@ #include #include #include + #include #include #include @@ -36,6 +37,7 @@ #include #include #include + #include "paddle/fluid/distributed/ps/table/accessor.h" #include "paddle/fluid/distributed/ps/table/common_table.h" #include "paddle/fluid/distributed/ps/table/graph/class_macro.h" @@ -670,4 +672,4 @@ struct hash { return s.idx ^ s.node_key ^ s.sample_size; } }; -} +} // namespace std diff --git a/paddle/fluid/distributed/ps/table/common_table.h b/paddle/fluid/distributed/ps/table/common_table.h index f69d9ccbf14..280573f7194 100644 --- a/paddle/fluid/distributed/ps/table/common_table.h +++ b/paddle/fluid/distributed/ps/table/common_table.h @@ -19,9 +19,8 @@ #include // NOLINT #include -#include "paddle/fluid/distributed/ps/table/table.h" - #include "paddle/fluid/distributed/common/utils.h" +#include "paddle/fluid/distributed/ps/table/table.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/ps/table/ctr_accessor.cc b/paddle/fluid/distributed/ps/table/ctr_accessor.cc index ef7311824fa..254bbb96cad 100644 --- a/paddle/fluid/distributed/ps/table/ctr_accessor.cc +++ b/paddle/fluid/distributed/ps/table/ctr_accessor.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/distributed/ps/table/ctr_accessor.h" + #include + #include "glog/logging.h" #include "paddle/fluid/string/string_helper.h" diff --git a/paddle/fluid/distributed/ps/table/ctr_accessor.h b/paddle/fluid/distributed/ps/table/ctr_accessor.h index 327c4cea760..96ec5b8398d 100644 --- a/paddle/fluid/distributed/ps/table/ctr_accessor.h +++ b/paddle/fluid/distributed/ps/table/ctr_accessor.h @@ -15,7 +15,9 @@ #pragma once #include #include + #include + #include "paddle/fluid/distributed/common/registerer.h" #include "paddle/fluid/distributed/ps.pb.h" #include "paddle/fluid/distributed/ps/table/accessor.h" diff --git a/paddle/fluid/distributed/ps/table/ctr_double_accessor.cc b/paddle/fluid/distributed/ps/table/ctr_double_accessor.cc index 4b84b7e8c36..2bde5271a0c 100644 --- a/paddle/fluid/distributed/ps/table/ctr_double_accessor.cc +++ b/paddle/fluid/distributed/ps/table/ctr_double_accessor.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/distributed/ps/table/ctr_double_accessor.h" + #include + #include "glog/logging.h" #include "paddle/fluid/string/string_helper.h" diff --git a/paddle/fluid/distributed/ps/table/ctr_double_accessor.h b/paddle/fluid/distributed/ps/table/ctr_double_accessor.h index 5b781b2621c..3134b469604 100644 --- a/paddle/fluid/distributed/ps/table/ctr_double_accessor.h +++ b/paddle/fluid/distributed/ps/table/ctr_double_accessor.h @@ -15,7 +15,9 @@ #pragma once #include #include + #include + #include "paddle/fluid/distributed/common/registerer.h" #include "paddle/fluid/distributed/ps.pb.h" #include "paddle/fluid/distributed/ps/table/accessor.h" diff --git a/paddle/fluid/distributed/ps/table/ctr_dymf_accessor.cc b/paddle/fluid/distributed/ps/table/ctr_dymf_accessor.cc index 68f28640fc6..6fb6675edde 100644 --- a/paddle/fluid/distributed/ps/table/ctr_dymf_accessor.cc +++ b/paddle/fluid/distributed/ps/table/ctr_dymf_accessor.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/distributed/ps/table/ctr_dymf_accessor.h" + #include + #include "glog/logging.h" #include "paddle/fluid/string/string_helper.h" diff --git a/paddle/fluid/distributed/ps/table/ctr_dymf_accessor.h b/paddle/fluid/distributed/ps/table/ctr_dymf_accessor.h index 6a9f5d28f5e..c4bcd2bb3c9 100644 --- a/paddle/fluid/distributed/ps/table/ctr_dymf_accessor.h +++ b/paddle/fluid/distributed/ps/table/ctr_dymf_accessor.h @@ -15,7 +15,9 @@ #pragma once #include #include + #include + #include "paddle/fluid/distributed/common/registerer.h" #include "paddle/fluid/distributed/ps.pb.h" #include "paddle/fluid/distributed/ps/table/accessor.h" diff --git a/paddle/fluid/distributed/ps/table/depends/dense.h b/paddle/fluid/distributed/ps/table/depends/dense.h index aea757e8d59..5e7c1cd438d 100644 --- a/paddle/fluid/distributed/ps/table/depends/dense.h +++ b/paddle/fluid/distributed/ps/table/depends/dense.h @@ -15,13 +15,14 @@ #pragma once #include // for sqrt in CPU and CUDA + #include #include #include #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/distributed/common/utils.h" namespace paddle { diff --git a/paddle/fluid/distributed/ps/table/depends/feature_value.h b/paddle/fluid/distributed/ps/table/depends/feature_value.h index 36dc34808bd..e6ab278787d 100644 --- a/paddle/fluid/distributed/ps/table/depends/feature_value.h +++ b/paddle/fluid/distributed/ps/table/depends/feature_value.h @@ -14,10 +14,10 @@ #pragma once +#include #include -#include "gflags/gflags.h" -#include +#include "gflags/gflags.h" #include "paddle/fluid/distributed/common/chunk_allocator.h" namespace paddle { diff --git a/paddle/fluid/distributed/ps/table/depends/geo_recorder.h b/paddle/fluid/distributed/ps/table/depends/geo_recorder.h index adab0ee344b..99530f72b1f 100644 --- a/paddle/fluid/distributed/ps/table/depends/geo_recorder.h +++ b/paddle/fluid/distributed/ps/table/depends/geo_recorder.h @@ -15,6 +15,7 @@ #pragma once #include + #include // NOLINT #include #include diff --git a/paddle/fluid/distributed/ps/table/depends/initializers.h b/paddle/fluid/distributed/ps/table/depends/initializers.h index f46e659a88b..7c707feacec 100644 --- a/paddle/fluid/distributed/ps/table/depends/initializers.h +++ b/paddle/fluid/distributed/ps/table/depends/initializers.h @@ -20,10 +20,9 @@ #include #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/framework/generator.h" - #include "paddle/fluid/operators/truncated_gaussian_random_op.h" namespace paddle { diff --git a/paddle/fluid/distributed/ps/table/depends/rocksdb_warpper.h b/paddle/fluid/distributed/ps/table/depends/rocksdb_warpper.h index 223c8fafd26..4ae3aa7459a 100644 --- a/paddle/fluid/distributed/ps/table/depends/rocksdb_warpper.h +++ b/paddle/fluid/distributed/ps/table/depends/rocksdb_warpper.h @@ -20,6 +20,7 @@ #include #include #include + #include #include @@ -153,5 +154,5 @@ class RocksDBHandler { std::vector _handles; rocksdb::DB* _db; }; -} // distributed -} // paddle +} // namespace distributed +} // namespace paddle diff --git a/paddle/fluid/distributed/ps/table/graph/graph_edge.cc b/paddle/fluid/distributed/ps/table/graph/graph_edge.cc index 004a536e8e5..f2f346232d3 100644 --- a/paddle/fluid/distributed/ps/table/graph/graph_edge.cc +++ b/paddle/fluid/distributed/ps/table/graph/graph_edge.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/distributed/ps/table/graph/graph_edge.h" + #include namespace paddle { namespace distributed { @@ -25,5 +26,5 @@ void WeightedGraphEdgeBlob::add_edge(int64_t id, float weight = 1) { id_arr.push_back(id); weight_arr.push_back(weight); } -} -} +} // namespace distributed +} // namespace paddle diff --git a/paddle/fluid/distributed/ps/table/graph/graph_edge.h b/paddle/fluid/distributed/ps/table/graph/graph_edge.h index 5fc785fe256..6b929af679e 100644 --- a/paddle/fluid/distributed/ps/table/graph/graph_edge.h +++ b/paddle/fluid/distributed/ps/table/graph/graph_edge.h @@ -43,5 +43,5 @@ class WeightedGraphEdgeBlob : public GraphEdgeBlob { protected: std::vector weight_arr; }; -} -} +} // namespace distributed +} // namespace paddle diff --git a/paddle/fluid/distributed/ps/table/graph/graph_node.cc b/paddle/fluid/distributed/ps/table/graph/graph_node.cc index 366e607261f..d966bd69653 100644 --- a/paddle/fluid/distributed/ps/table/graph/graph_node.cc +++ b/paddle/fluid/distributed/ps/table/graph/graph_node.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/distributed/ps/table/graph/graph_node.h" + #include namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/ps/table/graph/graph_node.h b/paddle/fluid/distributed/ps/table/graph/graph_node.h index c6c594036d4..13fdcf4c64e 100644 --- a/paddle/fluid/distributed/ps/table/graph/graph_node.h +++ b/paddle/fluid/distributed/ps/table/graph/graph_node.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.cc b/paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.cc index 8186acec1be..4f5c86db314 100644 --- a/paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.cc +++ b/paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.cc @@ -13,9 +13,11 @@ // limitations under the License. #include "paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.h" + #include #include #include + #include "paddle/fluid/framework/generator.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.h b/paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.h index c10617022de..cf83d27d7a2 100644 --- a/paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.h +++ b/paddle/fluid/distributed/ps/table/graph/graph_weighted_sampler.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/distributed/ps/table/graph/graph_edge.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/ps/table/memory_dense_table.h b/paddle/fluid/distributed/ps/table/memory_dense_table.h index 73653fbc2eb..87a3f8661ae 100644 --- a/paddle/fluid/distributed/ps/table/memory_dense_table.h +++ b/paddle/fluid/distributed/ps/table/memory_dense_table.h @@ -17,7 +17,9 @@ #include #include #include + #include + #include "Eigen/Dense" #include "paddle/fluid/distributed/ps/table/accessor.h" #include "paddle/fluid/distributed/ps/table/common_table.h" diff --git a/paddle/fluid/distributed/ps/table/memory_sparse_geo_table.h b/paddle/fluid/distributed/ps/table/memory_sparse_geo_table.h index 60ba5d9602e..bce9c774f12 100644 --- a/paddle/fluid/distributed/ps/table/memory_sparse_geo_table.h +++ b/paddle/fluid/distributed/ps/table/memory_sparse_geo_table.h @@ -17,6 +17,7 @@ #include // #include #include + #include #include // NOLINT #include diff --git a/paddle/fluid/distributed/ps/table/memory_sparse_table.cc b/paddle/fluid/distributed/ps/table/memory_sparse_table.cc index ee6a801fa91..464f788b454 100644 --- a/paddle/fluid/distributed/ps/table/memory_sparse_table.cc +++ b/paddle/fluid/distributed/ps/table/memory_sparse_table.cc @@ -12,15 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/distributed/ps/table/memory_sparse_table.h" + #include -#include -#include "paddle/fluid/distributed/common/cost_timer.h" -#include "paddle/fluid/distributed/ps/table/memory_sparse_table.h" -#include "paddle/fluid/framework/io/fs.h" +#include #include "boost/lexical_cast.hpp" #include "glog/logging.h" +#include "paddle/fluid/distributed/common/cost_timer.h" +#include "paddle/fluid/framework/io/fs.h" #include "paddle/fluid/platform/enforce.h" DEFINE_bool(pserver_print_missed_key_num_every_push, false, @@ -272,9 +273,8 @@ int32_t MemorySparseTable::Save(const std::string& dirname, if (_value_accesor->Save(it.value().data(), save_param)) { std::string format_value = _value_accesor->ParseToString( it.value().data(), it.value().size()); - if (0 != - write_channel->write_line(paddle::string::format_string( - "%lu %s", it.key(), format_value.c_str()))) { + if (0 != write_channel->write_line(paddle::string::format_string( + "%lu %s", it.key(), format_value.c_str()))) { ++retry_num; is_write_failed = true; LOG(ERROR) diff --git a/paddle/fluid/distributed/ps/table/memory_sparse_table.h b/paddle/fluid/distributed/ps/table/memory_sparse_table.h index 6516c75a5d6..7b7a47ff998 100644 --- a/paddle/fluid/distributed/ps/table/memory_sparse_table.h +++ b/paddle/fluid/distributed/ps/table/memory_sparse_table.h @@ -17,12 +17,14 @@ #include #include #include + #include #include // NOLINT #include #include #include #include + #include "Eigen/Dense" #include "paddle/fluid/distributed/ps/table/accessor.h" #include "paddle/fluid/distributed/ps/table/common_table.h" diff --git a/paddle/fluid/distributed/ps/table/sparse_accessor.cc b/paddle/fluid/distributed/ps/table/sparse_accessor.cc index bc537880f1c..772ff5d1fc5 100644 --- a/paddle/fluid/distributed/ps/table/sparse_accessor.cc +++ b/paddle/fluid/distributed/ps/table/sparse_accessor.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/distributed/ps/table/sparse_accessor.h" + #include + #include "glog/logging.h" #include "paddle/fluid/string/string_helper.h" diff --git a/paddle/fluid/distributed/ps/table/sparse_accessor.h b/paddle/fluid/distributed/ps/table/sparse_accessor.h index 875904847b2..5e76365901c 100644 --- a/paddle/fluid/distributed/ps/table/sparse_accessor.h +++ b/paddle/fluid/distributed/ps/table/sparse_accessor.h @@ -15,7 +15,9 @@ #pragma once #include #include + #include + #include "paddle/fluid/distributed/common/registerer.h" #include "paddle/fluid/distributed/ps.pb.h" #include "paddle/fluid/distributed/ps/table/accessor.h" diff --git a/paddle/fluid/distributed/ps/table/sparse_sgd_rule.cc b/paddle/fluid/distributed/ps/table/sparse_sgd_rule.cc index 8471b936128..a9a4c9beae2 100644 --- a/paddle/fluid/distributed/ps/table/sparse_sgd_rule.cc +++ b/paddle/fluid/distributed/ps/table/sparse_sgd_rule.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/distributed/ps/table/sparse_sgd_rule.h" + #include + #include "glog/logging.h" DEFINE_bool(enable_show_scale_gradient, true, "enable show scale gradient"); diff --git a/paddle/fluid/distributed/ps/table/sparse_sgd_rule.h b/paddle/fluid/distributed/ps/table/sparse_sgd_rule.h index 55a37b59419..0f7766e20a3 100644 --- a/paddle/fluid/distributed/ps/table/sparse_sgd_rule.h +++ b/paddle/fluid/distributed/ps/table/sparse_sgd_rule.h @@ -14,8 +14,10 @@ #pragma once #include + #include #include + #include "glog/logging.h" // for CHECK #include "paddle/fluid/distributed/common/local_random.h" // for local_uniform_real_distribution #include "paddle/fluid/distributed/common/registerer.h" diff --git a/paddle/fluid/distributed/ps/table/ssd_sparse_table.cc b/paddle/fluid/distributed/ps/table/ssd_sparse_table.cc index b1359d1323d..7e1128baa0c 100644 --- a/paddle/fluid/distributed/ps/table/ssd_sparse_table.cc +++ b/paddle/fluid/distributed/ps/table/ssd_sparse_table.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/distributed/ps/table/ssd_sparse_table.h" + #include "paddle/fluid/distributed/common/cost_timer.h" #include "paddle/fluid/distributed/common/local_random.h" #include "paddle/fluid/distributed/common/topk_calculator.h" @@ -362,9 +363,8 @@ int32_t SSDSparseTable::Save(const std::string& path, if (_value_accesor->Save(it.value().data(), save_param)) { std::string format_value = _value_accesor->ParseToString( it.value().data(), it.value().size()); - if (0 != - write_channel->write_line(paddle::string::format_string( - "%lu %s", it.key(), format_value.c_str()))) { + if (0 != write_channel->write_line(paddle::string::format_string( + "%lu %s", it.key(), format_value.c_str()))) { ++retry_num; is_write_failed = true; LOG(ERROR) << "SSDSparseTable save failed, retry it! path:" @@ -597,9 +597,8 @@ int32_t SSDSparseTable::SaveCache( while (shuffled_channel->Read(data)) { for (auto& t : data) { ++feasign_size; - if (0 != - write_channel->write_line(paddle::string::format_string( - "%lu %s", t.first, t.second.c_str()))) { + if (0 != write_channel->write_line(paddle::string::format_string( + "%lu %s", t.first, t.second.c_str()))) { LOG(ERROR) << "Cache Table save failed, " "path:" << channel_config.path << ", retry it!"; diff --git a/paddle/fluid/distributed/ps/table/table.cc b/paddle/fluid/distributed/ps/table/table.cc index ef2eb3a746f..cfa286f1c3f 100644 --- a/paddle/fluid/distributed/ps/table/table.cc +++ b/paddle/fluid/distributed/ps/table/table.cc @@ -16,13 +16,11 @@ #include "glog/logging.h" #include "paddle/fluid/distributed/common/registerer.h" - #include "paddle/fluid/distributed/ps/table/common_graph_table.h" -#include "paddle/fluid/distributed/ps/table/memory_dense_table.h" - #include "paddle/fluid/distributed/ps/table/ctr_accessor.h" #include "paddle/fluid/distributed/ps/table/ctr_double_accessor.h" #include "paddle/fluid/distributed/ps/table/ctr_dymf_accessor.h" +#include "paddle/fluid/distributed/ps/table/memory_dense_table.h" #include "paddle/fluid/distributed/ps/table/memory_sparse_geo_table.h" #include "paddle/fluid/distributed/ps/table/memory_sparse_table.h" #include "paddle/fluid/distributed/ps/table/sparse_accessor.h" diff --git a/paddle/fluid/distributed/ps/table/table.h b/paddle/fluid/distributed/ps/table/table.h index 48fda782d48..0c56b48a246 100644 --- a/paddle/fluid/distributed/ps/table/table.h +++ b/paddle/fluid/distributed/ps/table/table.h @@ -15,11 +15,13 @@ #pragma once #include + #include #include // NOLINT #include #include #include + #include "paddle/fluid/distributed/common/afs_warpper.h" #include "paddle/fluid/distributed/ps/table/accessor.h" #include "paddle/fluid/distributed/ps/table/depends/sparse_utils.h" diff --git a/paddle/fluid/distributed/ps/table/tensor_accessor.cc b/paddle/fluid/distributed/ps/table/tensor_accessor.cc index 5d1f69b7463..880583f3684 100644 --- a/paddle/fluid/distributed/ps/table/tensor_accessor.cc +++ b/paddle/fluid/distributed/ps/table/tensor_accessor.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/distributed/ps/table/tensor_accessor.h" + #include "Eigen/Dense" namespace paddle { diff --git a/paddle/fluid/distributed/ps/table/tensor_accessor.h b/paddle/fluid/distributed/ps/table/tensor_accessor.h index fad31d5df7f..a5225127534 100644 --- a/paddle/fluid/distributed/ps/table/tensor_accessor.h +++ b/paddle/fluid/distributed/ps/table/tensor_accessor.h @@ -15,6 +15,7 @@ #pragma once #include #include + #include #include diff --git a/paddle/fluid/distributed/ps/wrapper/fleet.cc b/paddle/fluid/distributed/ps/wrapper/fleet.cc index 955ba75e672..b9754d7b9de 100644 --- a/paddle/fluid/distributed/ps/wrapper/fleet.cc +++ b/paddle/fluid/distributed/ps/wrapper/fleet.cc @@ -12,11 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/distributed/ps/wrapper/fleet.h" + #include #include "paddle/fluid/distributed/ps/service/communicator/communicator.h" #include "paddle/fluid/distributed/ps/table/table.h" -#include "paddle/fluid/distributed/ps/wrapper/fleet.h" namespace paddle { namespace distributed { diff --git a/paddle/fluid/distributed/ps/wrapper/fleet.h b/paddle/fluid/distributed/ps/wrapper/fleet.h index ce109b63cce..f88c478724b 100644 --- a/paddle/fluid/distributed/ps/wrapper/fleet.h +++ b/paddle/fluid/distributed/ps/wrapper/fleet.h @@ -49,8 +49,8 @@ class PSCore; using framework::LoDTensor; using framework::Scope; -using phi::SelectedRows; using framework::Variable; +using phi::SelectedRows; using RpcCtxMap = std::unordered_map; diff --git a/paddle/fluid/distributed/ps/wrapper/ps_wrapper.h b/paddle/fluid/distributed/ps/wrapper/ps_wrapper.h old mode 100755 new mode 100644 index ca02ad31195..0156c0b42db --- a/paddle/fluid/distributed/ps/wrapper/ps_wrapper.h +++ b/paddle/fluid/distributed/ps/wrapper/ps_wrapper.h @@ -49,8 +49,8 @@ class PSCore; using framework::LoDTensor; using framework::Scope; -using phi::SelectedRows; using framework::Variable; +using phi::SelectedRows; using RpcCtxMap = std::unordered_map; diff --git a/paddle/fluid/distributed/store/tcp_store.cc b/paddle/fluid/distributed/store/tcp_store.cc index ec6f0e26a08..a46b4b32c9f 100644 --- a/paddle/fluid/distributed/store/tcp_store.cc +++ b/paddle/fluid/distributed/store/tcp_store.cc @@ -12,11 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/distributed/store/tcp_store.h" + #include #include #include -#include "paddle/fluid/distributed/store/tcp_store.h" #include "paddle/fluid/distributed/store/tcp_utils.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/flags.h" diff --git a/paddle/fluid/distributed/store/tcp_utils.cc b/paddle/fluid/distributed/store/tcp_utils.cc index a28cba28833..466cd11fa5d 100644 --- a/paddle/fluid/distributed/store/tcp_utils.cc +++ b/paddle/fluid/distributed/store/tcp_utils.cc @@ -13,9 +13,11 @@ // limitations under the License. #include "paddle/fluid/distributed/store/tcp_utils.h" + #include #include #include + #include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -51,12 +53,13 @@ void close_socket(SocketType socket) { int n; n = ::getaddrinfo(node, port_cstr, &hints, &res); const char* gai_err = ::gai_strerror(n); - const char* proto = - (family == AF_INET ? "IPv4" : family == AF_INET6 ? "IPv6" : ""); - PADDLE_ENFORCE_EQ( - n, 0, platform::errors::InvalidArgument( - "%s network %s:%s cannot be obtained. Details: %s.", proto, - host, port, gai_err)); + const char* proto = (family == AF_INET ? "IPv4" + : family == AF_INET6 ? "IPv6" + : ""); + PADDLE_ENFORCE_EQ(n, 0, + platform::errors::InvalidArgument( + "%s network %s:%s cannot be obtained. Details: %s.", + proto, host, port, gai_err)); return res; } @@ -79,10 +82,11 @@ SocketType tcp_connect(const std::string host, const std::string port, do { for (::addrinfo* cur = res; cur != nullptr; cur = cur->ai_next) { sockfd = ::socket(cur->ai_family, cur->ai_socktype, cur->ai_protocol); - PADDLE_ENFORCE_GT(sockfd, 0, platform::errors::InvalidArgument( - "Create socket to connect %s:%s failed. " - "Details: %s. ", - host, port, socket_error().message())); + PADDLE_ENFORCE_GT(sockfd, 0, + platform::errors::InvalidArgument( + "Create socket to connect %s:%s failed. " + "Details: %s. ", + host, port, socket_error().message())); if (::connect(sockfd, cur->ai_addr, cur->ai_addrlen) == 0) { retry = false; diff --git a/paddle/fluid/distributed/store/tcp_utils.h b/paddle/fluid/distributed/store/tcp_utils.h index 60cb3de124d..ec9f610a18c 100644 --- a/paddle/fluid/distributed/store/tcp_utils.h +++ b/paddle/fluid/distributed/store/tcp_utils.h @@ -29,6 +29,7 @@ #include #include #include + #include "paddle/fluid/platform/enforce.h" // Utility functions for TCP socket. @@ -73,9 +74,10 @@ void send_bytes(SocketType socket, const T* buffer, size_t len) { while (to_send > 0) { auto byte_sent = ::send(socket, ptr, to_send, 0); - PADDLE_ENFORCE_GT(byte_sent, 0, platform::errors::InvalidArgument( - "TCP send error. Details: %s.", - socket_error().message())); + PADDLE_ENFORCE_GT( + byte_sent, 0, + platform::errors::InvalidArgument("TCP send error. Details: %s.", + socket_error().message())); to_send -= byte_sent; ptr += byte_sent; } @@ -91,9 +93,10 @@ void receive_bytes(SocketType socket, T* buffer, size_t len) { while (to_recv > 0) { auto byte_received = ::recv(socket, ptr, to_recv, 0); - PADDLE_ENFORCE_GT(byte_received, 0, platform::errors::InvalidArgument( - "TCP receive error. Details: %s.", - socket_error().message())); + PADDLE_ENFORCE_GT( + byte_received, 0, + platform::errors::InvalidArgument("TCP receive error. Details: %s.", + socket_error().message())); to_recv -= byte_received; ptr += byte_received; diff --git a/paddle/fluid/distributed/test/barrier_table_test.cc b/paddle/fluid/distributed/test/barrier_table_test.cc index c4c5b229928..f540939c6fd 100644 --- a/paddle/fluid/distributed/test/barrier_table_test.cc +++ b/paddle/fluid/distributed/test/barrier_table_test.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include #include + #include "gtest/gtest.h" #include "paddle/fluid/distributed/ps.pb.h" #include "paddle/fluid/distributed/ps/table/common_table.h" diff --git a/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc b/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc index f9d57be95af..c1467dae9a7 100644 --- a/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc +++ b/paddle/fluid/distributed/test/brpc_service_dense_sgd_test.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include #include // NOLINT diff --git a/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc b/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc index 29195d99857..bade56f239f 100644 --- a/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc +++ b/paddle/fluid/distributed/test/brpc_service_sparse_sgd_test.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include #include // NOLINT diff --git a/paddle/fluid/distributed/test/brpc_utils_test.cc b/paddle/fluid/distributed/test/brpc_utils_test.cc index 16ff9bd7584..33367bf16b7 100644 --- a/paddle/fluid/distributed/test/brpc_utils_test.cc +++ b/paddle/fluid/distributed/test/brpc_utils_test.cc @@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/distributed/ps/service/brpc_utils.h" + #include #include "gtest/gtest.h" - -#include "paddle/fluid/distributed/ps/service/brpc_utils.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace paddle { diff --git a/paddle/fluid/distributed/test/ctr_accessor_test.cc b/paddle/fluid/distributed/test/ctr_accessor_test.cc index 27b6ddf722b..51254391a42 100644 --- a/paddle/fluid/distributed/test/ctr_accessor_test.cc +++ b/paddle/fluid/distributed/test/ctr_accessor_test.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/distributed/ps/table/ctr_accessor.h" + #include #include + #include "gtest/gtest.h" #include "paddle/fluid/distributed/common/registerer.h" #include "paddle/fluid/distributed/ps.pb.h" diff --git a/paddle/fluid/distributed/test/ctr_dymf_accessor_test.cc b/paddle/fluid/distributed/test/ctr_dymf_accessor_test.cc index f6e773a414c..fbf179dbeee 100644 --- a/paddle/fluid/distributed/test/ctr_dymf_accessor_test.cc +++ b/paddle/fluid/distributed/test/ctr_dymf_accessor_test.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/distributed/ps/table/ctr_dymf_accessor.h" + #include #include + #include "gtest/gtest.h" #include "paddle/fluid/distributed/common/registerer.h" #include "paddle/fluid/distributed/ps.pb.h" diff --git a/paddle/fluid/distributed/test/dense_table_test.cc b/paddle/fluid/distributed/test/dense_table_test.cc index 9529c776c12..185d9d3aed1 100644 --- a/paddle/fluid/distributed/test/dense_table_test.cc +++ b/paddle/fluid/distributed/test/dense_table_test.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include + #include "gtest/gtest.h" #include "paddle/fluid/distributed/ps.pb.h" #include "paddle/fluid/distributed/ps/table/memory_dense_table.h" diff --git a/paddle/fluid/distributed/test/feature_value_test.cc b/paddle/fluid/distributed/test/feature_value_test.cc index 32e3944d35a..6e848c3e2f4 100644 --- a/paddle/fluid/distributed/test/feature_value_test.cc +++ b/paddle/fluid/distributed/test/feature_value_test.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/distributed/ps/table/depends/feature_value.h" + #include + #include "gtest/gtest.h" namespace paddle { diff --git a/paddle/fluid/distributed/test/graph_node_split_test.cc b/paddle/fluid/distributed/test/graph_node_split_test.cc index 395d7c1eace..fa9b89d75c8 100644 --- a/paddle/fluid/distributed/test/graph_node_split_test.cc +++ b/paddle/fluid/distributed/test/graph_node_split_test.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include // NOLINT #include #include @@ -17,8 +18,8 @@ limitations under the License. */ #include // NOLINT #include #include -#include "google/protobuf/text_format.h" +#include "google/protobuf/text_format.h" #include "gtest/gtest.h" #include "paddle/fluid/distributed/ps.pb.h" #include "paddle/fluid/distributed/ps/service/brpc_ps_client.h" diff --git a/paddle/fluid/distributed/test/graph_node_test.cc b/paddle/fluid/distributed/test/graph_node_test.cc index 3b43c2779ee..9cb244a9ec4 100644 --- a/paddle/fluid/distributed/test/graph_node_test.cc +++ b/paddle/fluid/distributed/test/graph_node_test.cc @@ -9,7 +9,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/distributed/ps/table/graph/graph_node.h" + #include + #include // NOLINT #include #include @@ -17,8 +20,8 @@ limitations under the License. */ #include // NOLINT #include #include -#include "google/protobuf/text_format.h" +#include "google/protobuf/text_format.h" #include "gtest/gtest.h" #include "paddle/fluid/distributed/ps.pb.h" #include "paddle/fluid/distributed/ps/service/brpc_ps_client.h" @@ -30,7 +33,6 @@ limitations under the License. */ #include "paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h" #include "paddle/fluid/distributed/ps/service/ps_service/service.h" #include "paddle/fluid/distributed/ps/service/sendrecv.pb.h" -#include "paddle/fluid/distributed/ps/table/graph/graph_node.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" diff --git a/paddle/fluid/distributed/test/graph_table_sample_test.cc b/paddle/fluid/distributed/test/graph_table_sample_test.cc index d7f6f2f34d7..a3463162d27 100644 --- a/paddle/fluid/distributed/test/graph_table_sample_test.cc +++ b/paddle/fluid/distributed/test/graph_table_sample_test.cc @@ -13,6 +13,8 @@ // limitations under the License. #include + +#include #include // NOLINT #include #include @@ -20,9 +22,8 @@ #include // NOLINT #include #include -#include "google/protobuf/text_format.h" -#include +#include "google/protobuf/text_format.h" #include "gtest/gtest.h" #include "paddle/fluid/distributed/ps.pb.h" #include "paddle/fluid/distributed/ps/table/common_graph_table.h" diff --git a/paddle/fluid/distributed/test/memory_geo_table_test.cc b/paddle/fluid/distributed/test/memory_geo_table_test.cc index ca3b51fade1..507211e69fa 100644 --- a/paddle/fluid/distributed/test/memory_geo_table_test.cc +++ b/paddle/fluid/distributed/test/memory_geo_table_test.cc @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include - #include + #include #include // NOLINT diff --git a/paddle/fluid/distributed/test/memory_sparse_table_test.cc b/paddle/fluid/distributed/test/memory_sparse_table_test.cc index 68bc50373ff..1689b7716bb 100644 --- a/paddle/fluid/distributed/test/memory_sparse_table_test.cc +++ b/paddle/fluid/distributed/test/memory_sparse_table_test.cc @@ -12,16 +12,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include +#include "paddle/fluid/distributed/ps/table/memory_sparse_table.h" +#include #include + #include #include // NOLINT #include "google/protobuf/text_format.h" #include "gtest/gtest.h" #include "paddle/fluid/distributed/ps.pb.h" -#include "paddle/fluid/distributed/ps/table/memory_sparse_table.h" #include "paddle/fluid/distributed/ps/table/table.h" namespace paddle { diff --git a/paddle/fluid/distributed/test/sparse_sgd_rule_test.cc b/paddle/fluid/distributed/test/sparse_sgd_rule_test.cc index 1a4e16b9266..3a9a8d0b39c 100644 --- a/paddle/fluid/distributed/test/sparse_sgd_rule_test.cc +++ b/paddle/fluid/distributed/test/sparse_sgd_rule_test.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/distributed/ps/table/sparse_sgd_rule.h" + #include #include + #include "gtest/gtest.h" #include "paddle/fluid/distributed/ps.pb.h" diff --git a/paddle/fluid/distributed/test/table_test.cc b/paddle/fluid/distributed/test/table_test.cc index 4f73519ef5e..56809abad0c 100644 --- a/paddle/fluid/distributed/test/table_test.cc +++ b/paddle/fluid/distributed/test/table_test.cc @@ -30,4 +30,4 @@ TEST(Table, Initialize) { ASSERT_EQ(ret, -1); } } // namespace distributed -} // // namespace paddle +} // namespace paddle diff --git a/paddle/fluid/eager/accumulation/accumulation_node.cc b/paddle/fluid/eager/accumulation/accumulation_node.cc index 544e7c8fe85..09db68399f3 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.cc +++ b/paddle/fluid/eager/accumulation/accumulation_node.cc @@ -13,17 +13,15 @@ // limitations under the License. #include "paddle/fluid/eager/accumulation/accumulation_node.h" + +#include "glog/logging.h" #include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/imperative/gradient_accumulator.h" - -#include "paddle/phi/api/all.h" -#include "paddle/phi/core/dense_tensor.h" - #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/errors.h" - -#include "glog/logging.h" +#include "paddle/phi/api/all.h" +#include "paddle/phi/core/dense_tensor.h" namespace egr { @@ -72,8 +70,7 @@ paddle::small_vector, GradNodeAccumulation::operator()( paddle::small_vector, kSlotSmallVectorSize>& grads, // NOLINT - bool create_graph, - bool is_new_grad) { + bool create_graph, bool is_new_grad) { VLOG(3) << "Running Eager Backward Node: GradNodeAccumulation"; PADDLE_ENFORCE(grads.size() == 1, paddle::platform::errors::Fatal( diff --git a/paddle/fluid/eager/accumulation/accumulation_node.h b/paddle/fluid/eager/accumulation/accumulation_node.h index 6374534578c..7694e290bab 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.h +++ b/paddle/fluid/eager/accumulation/accumulation_node.h @@ -41,8 +41,7 @@ class GradNodeAccumulation : public GradNodeBase { kSlotSmallVectorSize> operator()(paddle::small_vector, kSlotSmallVectorSize>& grads, // NOLINT - bool create_graph = false, - bool is_new_grad = false) override; + bool create_graph = false, bool is_new_grad = false) override; void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } diff --git a/paddle/fluid/eager/amp_utils.h b/paddle/fluid/eager/amp_utils.h index 2145f4a1196..2834f7d5dc0 100644 --- a/paddle/fluid/eager/amp_utils.h +++ b/paddle/fluid/eager/amp_utils.h @@ -14,6 +14,7 @@ #pragma once #include + #include "paddle/fluid/eager/api/utils/global_utils.h" #include "paddle/fluid/imperative/amp_auto_cast.h" diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc index 38f67cb5bdf..5adceb7e79a 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc @@ -13,16 +13,14 @@ // limitations under the License. #include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" + +#include "glog/logging.h" #include "paddle/fluid/eager/api/utils/global_utils.h" #include "paddle/fluid/eager/eager_tensor.h" - -#include "paddle/phi/kernels/scale_kernel.h" - #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/errors.h" - -#include "glog/logging.h" +#include "paddle/phi/kernels/scale_kernel.h" namespace egr { @@ -147,8 +145,7 @@ paddle::small_vector, GradNodeScale::operator()( paddle::small_vector, kSlotSmallVectorSize>& grads, // NOLINT - bool create_graph, - bool is_new_grad) { + bool create_graph, bool is_new_grad) { // 1. Check Output Size VLOG(6) << "grad size is: " << grads.size(); PADDLE_ENFORCE( diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h index 04ff510944d..45872c97002 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h @@ -42,8 +42,7 @@ class GradNodeScale : public GradNodeBase { kSlotSmallVectorSize> operator()(paddle::small_vector, kSlotSmallVectorSize>& grads, // NOLINT - bool create_graph = false, - bool is_new_grad = false) override; + bool create_graph = false, bool is_new_grad = false) override; void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } diff --git a/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc b/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc index 7a374d567d5..836216d64b0 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc +++ b/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc @@ -23,11 +23,11 @@ * **/ #include "paddle/fluid/eager/api/generated/eager_generated/forwards/scale.h" + #include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/eager/utils.h" - #include "paddle/phi/api/all.h" namespace egr { diff --git a/paddle/fluid/eager/api/utils/global_utils.h b/paddle/fluid/eager/api/utils/global_utils.h index 3c18efea203..6a6a443f693 100644 --- a/paddle/fluid/eager/api/utils/global_utils.h +++ b/paddle/fluid/eager/api/utils/global_utils.h @@ -17,6 +17,7 @@ #include #include + #include "paddle/fluid/eager/type_defs.h" #include "paddle/fluid/imperative/tracer.h" #include "paddle/phi/api/ext/op_meta_info.h" @@ -73,8 +74,9 @@ class Controller { return op_meta_info_map_; } - void MergeOpMetaInfoMap(const std::unordered_map< - std::string, std::vector>& map) { + void MergeOpMetaInfoMap( + const std::unordered_map>& + map) { op_meta_info_map_.insert(map.begin(), map.end()); } diff --git a/paddle/fluid/eager/api/utils/hook_utils.cc b/paddle/fluid/eager/api/utils/hook_utils.cc index 8ee646b718c..6493135141f 100644 --- a/paddle/fluid/eager/api/utils/hook_utils.cc +++ b/paddle/fluid/eager/api/utils/hook_utils.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/eager/api/utils/hook_utils.h" + #include "paddle/fluid/eager/accumulation/accumulation_node.h" #include "paddle/fluid/eager/api/utils/tensor_utils.h" #include "paddle/fluid/eager/autograd_meta.h" diff --git a/paddle/fluid/eager/api/utils/tensor_utils.cc b/paddle/fluid/eager/api/utils/tensor_utils.cc index 81ea92d1c3c..84a9eb6dea6 100644 --- a/paddle/fluid/eager/api/utils/tensor_utils.cc +++ b/paddle/fluid/eager/api/utils/tensor_utils.cc @@ -13,17 +13,16 @@ // limitations under the License. #include "paddle/fluid/eager/api/utils/tensor_utils.h" + #include "paddle/fluid/eager/accumulation/accumulation_node.h" #include "paddle/fluid/eager/api/utils/global_utils.h" #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/utils.h" - -#include "paddle/phi/api/all.h" - #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/phi_utils.h" #include "paddle/fluid/framework/variable.h" +#include "paddle/phi/api/all.h" namespace egr { namespace egr_utils_api { diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc index 9de647a21ad..36cfb4db113 100644 --- a/paddle/fluid/eager/backward.cc +++ b/paddle/fluid/eager/backward.cc @@ -13,28 +13,28 @@ // limitations under the License. #include "paddle/fluid/eager/backward.h" + #include +#include "glog/logging.h" +#include "paddle/fluid/eager/accumulation/accumulation_node.h" #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/grad_tensor_holder.h" #include "paddle/fluid/eager/utils.h" -#include "paddle/fluid/platform/profiler.h" -#include "paddle/fluid/platform/profiler/event_tracing.h" - -#include "glog/logging.h" -#include "paddle/fluid/eager/accumulation/accumulation_node.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/errors.h" +#include "paddle/fluid/platform/profiler.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/phi/kernels/autotune/switch_autotune.h" namespace egr { /* -* GeneralGrad is Helpper class to implement custom grad operation between -* outputs and inputs. -* -* **/ + * GeneralGrad is Helpper class to implement custom grad operation between + * outputs and inputs. + * + * **/ class GeneralGrad { public: static GeneralGrad& Instance() { return *general_grad_; } diff --git a/paddle/fluid/eager/custom_operator/custom_operator_node.cc b/paddle/fluid/eager/custom_operator/custom_operator_node.cc index abdd8cadeed..3efcf3b21a4 100644 --- a/paddle/fluid/eager/custom_operator/custom_operator_node.cc +++ b/paddle/fluid/eager/custom_operator/custom_operator_node.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/eager/custom_operator/custom_operator_node.h" + #include "paddle/fluid/framework/custom_operator.h" #include "paddle/fluid/framework/op_meta_info_helper.h" #include "paddle/fluid/platform/profiler/event_tracing.h" diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc index af387bb3238..71ccb072ce9 100644 --- a/paddle/fluid/eager/grad_node_info.cc +++ b/paddle/fluid/eager/grad_node_info.cc @@ -13,27 +13,24 @@ // limitations under the License. #include "paddle/fluid/eager/grad_node_info.h" + +#include "glog/logging.h" #include "paddle/fluid/eager/accumulation/accumulation_node.h" #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/utils.h" - -#include "paddle/phi/common/data_type.h" -#include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/sparse_coo_tensor.h" - #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/var_type.h" - #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/errors.h" - -#include "glog/logging.h" +#include "paddle/phi/common/data_type.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/sparse_coo_tensor.h" /** * Implementation of GradNodeBase, Edge and GradTensorHolder. -**/ + **/ namespace egr { static void CheckTensor(const paddle::experimental::Tensor& pre, diff --git a/paddle/fluid/eager/grad_node_info.h b/paddle/fluid/eager/grad_node_info.h index 747e98b8466..9070ac9e5b6 100644 --- a/paddle/fluid/eager/grad_node_info.h +++ b/paddle/fluid/eager/grad_node_info.h @@ -179,14 +179,13 @@ class GradNodeBase { kSlotSmallVectorSize> operator()(paddle::small_vector, kSlotSmallVectorSize>& grads, // NOLINT - bool create_graph = false, - bool is_new_grad = false) = 0; + bool create_graph = false, bool is_new_grad = false) = 0; virtual void ClearTensorWrappers() = 0; /** - * Self-Copy interface designed for use in DoubleGrad - * **/ + * Self-Copy interface designed for use in DoubleGrad + * **/ virtual std::shared_ptr Copy() const = 0; // adj_edges were moved inside OutputMeta(), so no available direct access @@ -230,8 +229,8 @@ class GradNodeBase { std::shared_ptr&& hook); /** - * Remove GradientHook - * **/ + * Remove GradientHook + * **/ bool RemoveGradientHook(const int64_t& hook_id) { auto remove_cnt = gradient_hooks_.erase(hook_id); if (remove_cnt == 0) { @@ -252,8 +251,8 @@ class GradNodeBase { kSlotSmallVectorSize>& tensors); /** - * Handle Complex - Real Type Promotion - * **/ + * Handle Complex - Real Type Promotion + * **/ void HandleComplexGradToRealGrad( paddle::small_vector, kSlotSmallVectorSize>* out_grads); @@ -262,8 +261,8 @@ class GradNodeBase { virtual std::string name() { return "GradNodeBase"; } /** - * The following interfaces are designed for no_need_buffer - * **/ + * The following interfaces are designed for no_need_buffer + * **/ bool IsTensorWrappersCleared() { return is_tensor_wrappers_cleared_; } void SetIsTensorWrappersCleared(bool is_tensor_wrappers_cleared) { diff --git a/paddle/fluid/eager/grad_tensor_holder.cc b/paddle/fluid/eager/grad_tensor_holder.cc index 64fb8b53b47..6abf759cdba 100644 --- a/paddle/fluid/eager/grad_tensor_holder.cc +++ b/paddle/fluid/eager/grad_tensor_holder.cc @@ -13,11 +13,11 @@ // limitations under the License. #include "paddle/fluid/eager/grad_tensor_holder.h" -#include "paddle/fluid/imperative/gradient_accumulator.h" #include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h" #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/var_type.h" +#include "paddle/fluid/imperative/gradient_accumulator.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace egr { diff --git a/paddle/fluid/eager/hooks.h b/paddle/fluid/eager/hooks.h index 097150cf5ed..a98b3d9f8e4 100644 --- a/paddle/fluid/eager/hooks.h +++ b/paddle/fluid/eager/hooks.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/phi/api/include/tensor.h" namespace egr { diff --git a/paddle/fluid/eager/pylayer/py_layer_node.cc b/paddle/fluid/eager/pylayer/py_layer_node.cc index a00b292fe09..ec17a324b1e 100644 --- a/paddle/fluid/eager/pylayer/py_layer_node.cc +++ b/paddle/fluid/eager/pylayer/py_layer_node.cc @@ -13,18 +13,16 @@ // limitations under the License. #include "paddle/fluid/eager/pylayer/py_layer_node.h" -#include "paddle/fluid/eager/eager_tensor.h" - -#include "paddle/phi/api/all.h" -#include "paddle/phi/core/dense_tensor.h" +#include "glog/logging.h" +#include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/errors.h" #include "paddle/fluid/pybind/eager.h" #include "paddle/fluid/pybind/eager_utils.h" - -#include "glog/logging.h" +#include "paddle/phi/api/all.h" +#include "paddle/phi/core/dense_tensor.h" #pragma GCC diagnostic ignored "-Wattributes" #include "pybind11/pytypes.h" @@ -34,8 +32,7 @@ paddle::small_vector, GradNodePyLayer::operator()( paddle::small_vector, kSlotSmallVectorSize>& grads, // NOLINT - bool create_graph, - bool is_new_grad) { + bool create_graph, bool is_new_grad) { VLOG(3) << "Running Eager Backward Node: " << name(); paddle::small_vector, diff --git a/paddle/fluid/eager/pylayer/py_layer_node.h b/paddle/fluid/eager/pylayer/py_layer_node.h index c1a8c6e626b..998480bbfeb 100644 --- a/paddle/fluid/eager/pylayer/py_layer_node.h +++ b/paddle/fluid/eager/pylayer/py_layer_node.h @@ -38,8 +38,7 @@ class GradNodePyLayer : public GradNodeBase { kSlotSmallVectorSize> operator()(paddle::small_vector, kSlotSmallVectorSize>& grads, // NOLINT - bool create_graph = false, - bool is_new_grad = false) override; + bool create_graph = false, bool is_new_grad = false) override; void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } diff --git a/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc b/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc index c159084d683..c53ffe823ab 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/eager/accumulation/accumulation_node.h" + #include #include "gtest/gtest.h" - -#include "paddle/fluid/eager/accumulation/accumulation_node.h" #include "paddle/fluid/eager/api/utils/hook_utils.h" #include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/eager/grad_node_info.h" diff --git a/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc b/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc index 48b4b9c5748..f7415dd1f71 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/autograd_meta_test.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/eager/autograd_meta.h" + #include "glog/logging.h" #include "gtest/gtest.h" - -#include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h" diff --git a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc index edbb441f27a..a82965303af 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/eager_tensor_test.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/eager/eager_tensor.h" + #include "glog/logging.h" #include "gtest/gtest.h" - -#include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/imperative/var_helper.h" #include "paddle/phi/api/lib/utils/allocator.h" #include "paddle/phi/common/layout.h" @@ -35,7 +35,7 @@ class AutogradMetaTest : public AbstractAutogradMeta { explicit AutogradMetaTest(int val) : val_(val) {} int val_ = 0; }; -} +} // namespace eager_test TEST(Tensor, Constructor) { paddle::experimental::Tensor et1 = paddle::experimental::Tensor(); paddle::experimental::Tensor et2 = paddle::experimental::Tensor("et2"); diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc index 6687b6621ad..63a4a72b631 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/eager/grad_node_info.h" + #include "glog/logging.h" #include "gtest/gtest.h" - #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/eager_tensor.h" -#include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/hooks.h" #include "paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h" #include "paddle/phi/api/lib/utils/allocator.h" @@ -85,8 +85,8 @@ void TestGradNodeBase(bool is_remove_gradient_hook) { CHECK_EQ(grad_test_node2->OutputMeta()[0].size(), size_t(1)); VLOG(6) << "Test Gradient Hook"; - auto gradient_hook = []( - const paddle::experimental::Tensor& et) -> paddle::experimental::Tensor { + auto gradient_hook = [](const paddle::experimental::Tensor& et) + -> paddle::experimental::Tensor { paddle::experimental::Tensor res; phi::DenseTensorMeta meta = phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1})); diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h b/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h index a00e629d102..eb9bd6007bf 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h @@ -14,7 +14,6 @@ #pragma once #include "glog/logging.h" #include "gtest/gtest.h" - #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/eager/grad_node_info.h" @@ -35,8 +34,7 @@ class GradTestNode : public egr::GradNodeBase { egr::kSlotSmallVectorSize> operator()(paddle::small_vector, egr::kSlotSmallVectorSize>& grads, // NOLINT - bool create_graph = false, - bool is_new_grad = false) override { + bool create_graph = false, bool is_new_grad = false) override { val_ = std::dynamic_pointer_cast(grads[0][0].impl()) ->data()[0]; phi::DenseTensorMeta meta = diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc b/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc index 0fe349294b4..17f593e2490 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_tensor_holder_test.cc @@ -12,17 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/eager/grad_tensor_holder.h" + #include #include "gtest/gtest.h" - #include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/eager/grad_node_info.h" -#include "paddle/fluid/eager/grad_tensor_holder.h" #include "paddle/phi/api/lib/utils/allocator.h" -#include "paddle/phi/core/selected_rows.h" - #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/selected_rows.h" PD_DECLARE_KERNEL(full_like, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); diff --git a/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc b/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc index 28c3472f90d..8813f364840 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/tensor_wrapper_test.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/eager/tensor_wrapper.h" + #include "glog/logging.h" #include "gtest/gtest.h" - -#include "paddle/fluid/eager/tensor_wrapper.h" #include "paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h" #include "paddle/fluid/eager/utils.h" diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc index 056c7102f66..3b0e6a3fdb6 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cpu.cc @@ -15,19 +15,17 @@ // Eager Dygraph #include + #include #include "gtest/gtest.h" -#include "paddle/fluid/platform/flags.h" - #include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/backward.h" - -#include "paddle/fluid/imperative/tracer.h" - #include "paddle/fluid/eager/tests/performance_tests/benchmark_utils.h" #include "paddle/fluid/eager/tests/test_utils.h" +#include "paddle/fluid/imperative/tracer.h" +#include "paddle/fluid/platform/flags.h" #ifdef WITH_GPERFTOOLS #include "gperftools/profiler.h" diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc index 287d6e770de..5dd5cde548f 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_eager_cuda.cc @@ -14,19 +14,17 @@ // Eager Dygraph #include + #include #include "gtest/gtest.h" -#include "paddle/fluid/platform/flags.h" - #include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/backward.h" - -#include "paddle/fluid/imperative/tracer.h" - #include "paddle/fluid/eager/tests/performance_tests/benchmark_utils.h" #include "paddle/fluid/eager/tests/test_utils.h" +#include "paddle/fluid/imperative/tracer.h" +#include "paddle/fluid/platform/flags.h" #ifdef WITH_GPERFTOOLS #include "gperftools/profiler.h" diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc index b4b47a85f66..bf1d955b900 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cpu.cc @@ -23,7 +23,6 @@ #include "glog/logging.h" #include "gtest/gtest.h" - #include "paddle/fluid/eager/tests/performance_tests/benchmark_utils.h" #include "paddle/fluid/eager/tests/test_utils.h" #include "paddle/fluid/imperative/basic_engine.h" diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc index d9afd7cc965..0cd33a72e1a 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_fluid_cuda.cc @@ -23,7 +23,6 @@ #include "glog/logging.h" #include "gtest/gtest.h" - #include "paddle/fluid/eager/tests/performance_tests/benchmark_utils.h" #include "paddle/fluid/eager/tests/test_utils.h" #include "paddle/fluid/imperative/basic_engine.h" diff --git a/paddle/fluid/eager/tests/performance_tests/benchmark_utils.h b/paddle/fluid/eager/tests/performance_tests/benchmark_utils.h index 86bf13707ed..5b37e973f1d 100644 --- a/paddle/fluid/eager/tests/performance_tests/benchmark_utils.h +++ b/paddle/fluid/eager/tests/performance_tests/benchmark_utils.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/imperative/layer.h" #include "paddle/phi/api/all.h" diff --git a/paddle/fluid/eager/tests/task_tests/backward_test.cc b/paddle/fluid/eager/tests/task_tests/backward_test.cc index 7552ad83fa2..c6d4514fa8e 100644 --- a/paddle/fluid/eager/tests/task_tests/backward_test.cc +++ b/paddle/fluid/eager/tests/task_tests/backward_test.cc @@ -12,25 +12,22 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/eager/backward.h" + #include #include "glog/logging.h" #include "gtest/gtest.h" - #include "paddle/fluid/eager/accumulation/accumulation_node.h" +#include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" #include "paddle/fluid/eager/api/utils/tensor_utils.h" #include "paddle/fluid/eager/autograd_meta.h" -#include "paddle/fluid/eager/backward.h" #include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/tests/test_utils.h" - -#include "paddle/fluid/eager/api/all.h" - #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/tensor_meta.h" - #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_meta.h" PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(copy, CPU, ALL_LAYOUT); diff --git a/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc b/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc index 4337c0d092c..847c082a301 100644 --- a/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc +++ b/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc @@ -16,22 +16,17 @@ #include "glog/logging.h" #include "gtest/gtest.h" - #include "paddle/fluid/eager/accumulation/accumulation_node.h" +#include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" #include "paddle/fluid/eager/api/utils/tensor_utils.h" #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/backward.h" #include "paddle/fluid/eager/grad_node_info.h" - -#include "paddle/fluid/eager/api/all.h" - -#include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/tensor_meta.h" - #include "paddle/fluid/eager/tests/test_utils.h" - +#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_meta.h" PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); diff --git a/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc b/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc index 551262d259e..e4ca8dd164b 100644 --- a/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc +++ b/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc @@ -15,14 +15,12 @@ #include #include "gtest/gtest.h" - #include "paddle/fluid/eager/accumulation/accumulation_node.h" #include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h" #include "paddle/fluid/eager/tests/test_utils.h" #include "paddle/fluid/eager/utils.h" - #include "paddle/phi/api/lib/utils/allocator.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/fluid/eager/tests/task_tests/forward_autograd_test.cc b/paddle/fluid/eager/tests/task_tests/forward_autograd_test.cc index 4cb316380aa..ebf396bebfa 100644 --- a/paddle/fluid/eager/tests/task_tests/forward_autograd_test.cc +++ b/paddle/fluid/eager/tests/task_tests/forward_autograd_test.cc @@ -16,18 +16,15 @@ #include "glog/logging.h" #include "gtest/gtest.h" - #include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" #include "paddle/fluid/eager/api/utils/tensor_utils.h" #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/tests/test_utils.h" - #include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/tensor_meta.h" - #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_meta.h" PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); diff --git a/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc b/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc index 1f8fdb7de0c..a4da315f44a 100644 --- a/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc +++ b/paddle/fluid/eager/tests/task_tests/fwd_bwd_joint_test.cc @@ -16,21 +16,17 @@ #include "glog/logging.h" #include "gtest/gtest.h" - #include "paddle/fluid/eager/accumulation/accumulation_node.h" #include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/backward.h" #include "paddle/fluid/eager/grad_node_info.h" - -#include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/tensor_meta.h" - #include "paddle/fluid/eager/hooks.h" #include "paddle/fluid/eager/tests/test_utils.h" - +#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_meta.h" PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); PD_DECLARE_KERNEL(add, CPU, ALL_LAYOUT); diff --git a/paddle/fluid/eager/tests/task_tests/generated_test.cc b/paddle/fluid/eager/tests/task_tests/generated_test.cc index 3c237b76e64..b53cdf55d43 100644 --- a/paddle/fluid/eager/tests/task_tests/generated_test.cc +++ b/paddle/fluid/eager/tests/task_tests/generated_test.cc @@ -17,17 +17,14 @@ #include #include "gtest/gtest.h" - #include "paddle/fluid/eager/api/all.h" +#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h" #include "paddle/fluid/eager/api/utils/tensor_utils.h" #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/backward.h" -#include "paddle/fluid/eager/utils.h" - #include "paddle/fluid/eager/tests/test_utils.h" +#include "paddle/fluid/eager/utils.h" #include "paddle/fluid/imperative/tracer.h" - -#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h" #include "paddle/phi/core/kernel_registry.h" PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); diff --git a/paddle/fluid/eager/tests/task_tests/grad_test.cc b/paddle/fluid/eager/tests/task_tests/grad_test.cc index 72a94b40ed7..8d6c4d7843f 100644 --- a/paddle/fluid/eager/tests/task_tests/grad_test.cc +++ b/paddle/fluid/eager/tests/task_tests/grad_test.cc @@ -16,17 +16,14 @@ #include "glog/logging.h" #include "gtest/gtest.h" - #include "paddle/fluid/eager/accumulation/accumulation_node.h" +#include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" #include "paddle/fluid/eager/api/utils/tensor_utils.h" #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/backward.h" #include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/tests/test_utils.h" - -#include "paddle/fluid/eager/api/all.h" - #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_meta.h" diff --git a/paddle/fluid/eager/tests/task_tests/hook_test.cc b/paddle/fluid/eager/tests/task_tests/hook_test.cc index d7b887b28bd..badbe871597 100644 --- a/paddle/fluid/eager/tests/task_tests/hook_test.cc +++ b/paddle/fluid/eager/tests/task_tests/hook_test.cc @@ -16,22 +16,17 @@ #include "glog/logging.h" #include "gtest/gtest.h" - #include "paddle/fluid/eager/accumulation/accumulation_node.h" +#include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/backward.h" #include "paddle/fluid/eager/grad_node_info.h" - -#include "paddle/fluid/eager/api/all.h" - -#include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/tensor_meta.h" - #include "paddle/fluid/eager/hooks.h" #include "paddle/fluid/eager/tests/test_utils.h" - +#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/tensor_meta.h" PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); diff --git a/paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc b/paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc index c4d4ff91106..dbe2c138945 100644 --- a/paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc +++ b/paddle/fluid/eager/tests/task_tests/hook_test_intermidiate.cc @@ -15,16 +15,14 @@ #include #include "gtest/gtest.h" - #include "paddle/fluid/eager/api/all.h" +#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h" #include "paddle/fluid/eager/backward.h" #include "paddle/fluid/eager/grad_node_info.h" +#include "paddle/fluid/eager/hooks.h" #include "paddle/fluid/eager/tests/test_utils.h" #include "paddle/fluid/imperative/tracer.h" #include "paddle/phi/core/dense_tensor.h" - -#include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h" -#include "paddle/fluid/eager/hooks.h" #include "paddle/phi/core/kernel_registry.h" PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); diff --git a/paddle/fluid/eager/tests/task_tests/nan_inf_utils_test.cc b/paddle/fluid/eager/tests/task_tests/nan_inf_utils_test.cc index be0563fbeed..73d213f7114 100644 --- a/paddle/fluid/eager/tests/task_tests/nan_inf_utils_test.cc +++ b/paddle/fluid/eager/tests/task_tests/nan_inf_utils_test.cc @@ -12,13 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/eager/nan_inf_utils.h" + #include #include #include #include "gtest/gtest.h" - -#include "paddle/fluid/eager/nan_inf_utils.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/phi/api/include/api.h" diff --git a/paddle/fluid/eager/tests/task_tests/tensor_utils_test.cc b/paddle/fluid/eager/tests/task_tests/tensor_utils_test.cc index 24e5da06011..aeddeb6fae7 100644 --- a/paddle/fluid/eager/tests/task_tests/tensor_utils_test.cc +++ b/paddle/fluid/eager/tests/task_tests/tensor_utils_test.cc @@ -12,17 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/eager/api/utils/tensor_utils.h" + #include #include "gtest/gtest.h" - -#include "paddle/fluid/eager/api/utils/tensor_utils.h" #include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/grad_tensor_holder.h" #include "paddle/fluid/eager/tests/test_utils.h" #include "paddle/phi/api/lib/utils/allocator.h" - #include "paddle/phi/core/kernel_registry.h" PD_DECLARE_KERNEL(full, CPU, ALL_LAYOUT); diff --git a/paddle/fluid/eager/tests/test_utils.h b/paddle/fluid/eager/tests/test_utils.h index 47bfe9a7cab..cb1e531d82d 100644 --- a/paddle/fluid/eager/tests/test_utils.h +++ b/paddle/fluid/eager/tests/test_utils.h @@ -18,14 +18,12 @@ #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/eager/utils.h" - -#include "paddle/phi/api/all.h" -#include "paddle/phi/core/dense_tensor.h" -#include "paddle/phi/core/tensor_meta.h" - #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/init.h" +#include "paddle/phi/api/all.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/tensor_meta.h" namespace eager_test { diff --git a/paddle/fluid/eager/to_static/run_program_op_node.h b/paddle/fluid/eager/to_static/run_program_op_node.h index 5a730e4dbf1..3254b3bf892 100644 --- a/paddle/fluid/eager/to_static/run_program_op_node.h +++ b/paddle/fluid/eager/to_static/run_program_op_node.h @@ -17,7 +17,6 @@ #include "paddle/fluid/eager/api/utils/global_utils.h" #include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/tensor_wrapper.h" - #include "paddle/fluid/operators/run_program_op.h" #include "paddle/fluid/platform/enforce.h" @@ -273,7 +272,7 @@ inline void RunProgramGradAPI( const paddle::framework::AttributeMap &attrs, std::vector &x_grad, // NOLINT std::vector ¶ms_grad // NOLINT - ) { +) { // if all output vars are set to stop_gradient, grad op no need to executed if (x_grad.empty() && params_grad.empty()) return; @@ -368,8 +367,7 @@ class GradNodeRunProgram : public egr::GradNodeBase { egr::kSlotSmallVectorSize> operator()(paddle::small_vector, egr::kSlotSmallVectorSize> &grads, // NOLINT - bool create_graph, - bool is_new_grad) override { + bool create_graph, bool is_new_grad) override { VLOG(3) << "Running Eager Backward Node: GradNodeRunProgram"; paddle::small_vector, egr::kSlotSmallVectorSize> diff --git a/paddle/fluid/eager/utils.h b/paddle/fluid/eager/utils.h index c6389e99831..783afcc1e2c 100644 --- a/paddle/fluid/eager/utils.h +++ b/paddle/fluid/eager/utils.h @@ -18,7 +18,6 @@ #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/eager/grad_node_info.h" - #include "paddle/phi/api/all.h" namespace egr { @@ -161,10 +160,11 @@ class EagerUtils { if (require_any_grad && autograd_meta) { PADDLE_ENFORCE_EQ(!autograd_meta->StopGradient() && egr::egr_utils_api::IsLeafTensor(target), - false, paddle::platform::errors::InvalidArgument( - "Leaf Var (%s) that doesn't stop gradient " - "can't use inplace strategy.", - target.name())); + false, + paddle::platform::errors::InvalidArgument( + "Leaf Var (%s) that doesn't stop gradient " + "can't use inplace strategy.", + target.name())); } } @@ -234,8 +234,8 @@ class EagerUtils { const paddle::experimental::Tensor& tensor); /** - * Fill Zero - * **/ + * Fill Zero + * **/ static void FillZeroForEmptyOptionalGradInput( std::vector* in_grads, const std::vector& grad_in_metas); diff --git a/paddle/fluid/framework/archive.h b/paddle/fluid/framework/archive.h index d0589383863..6a8f4ff47f3 100644 --- a/paddle/fluid/framework/archive.h +++ b/paddle/fluid/framework/archive.h @@ -20,6 +20,7 @@ #endif #include + #include #include #include @@ -31,6 +32,7 @@ #include #include #include + #include "paddle/fluid/framework/expect.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/async_executor.cc b/paddle/fluid/framework/async_executor.cc index ae3d8379bdb..d6cc5dc639f 100644 --- a/paddle/fluid/framework/async_executor.cc +++ b/paddle/fluid/framework/async_executor.cc @@ -13,11 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/async_executor.h" + +#include "gflags/gflags.h" #include "google/protobuf/io/zero_copy_stream_impl.h" #include "google/protobuf/message.h" #include "google/protobuf/text_format.h" - -#include "gflags/gflags.h" #include "paddle/fluid/framework/data_feed_factory.h" #include "paddle/fluid/framework/executor_thread_worker.h" #include "paddle/fluid/framework/feed_fetch_method.h" diff --git a/paddle/fluid/framework/async_executor.h b/paddle/fluid/framework/async_executor.h index b0c6c8a0164..01daf3c1118 100644 --- a/paddle/fluid/framework/async_executor.h +++ b/paddle/fluid/framework/async_executor.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include #include #include // NOLINT @@ -24,6 +25,7 @@ limitations under the License. */ #include // NOLINT #include #include + #include "paddle/fluid/framework/data_feed.pb.h" #include "paddle/fluid/framework/data_set.h" #include "paddle/fluid/framework/executor.h" diff --git a/paddle/fluid/framework/attribute.h b/paddle/fluid/framework/attribute.h index 2164a21f3f8..b2c5bfde3aa 100644 --- a/paddle/fluid/framework/attribute.h +++ b/paddle/fluid/framework/attribute.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include #include #include diff --git a/paddle/fluid/framework/attribute_test.cc b/paddle/fluid/framework/attribute_test.cc index 27a6afb49f5..8a47e41d383 100644 --- a/paddle/fluid/framework/attribute_test.cc +++ b/paddle/fluid/framework/attribute_test.cc @@ -12,13 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/attribute.h" + #include #include -#include "paddle/fluid/framework/attribute.h" -#include "paddle/fluid/framework/program_desc.h" - #include "gtest/gtest.h" +#include "paddle/fluid/framework/program_desc.h" #include "paddle/utils/any.h" TEST(Attribute, GetAttrValueToAny) { diff --git a/paddle/fluid/framework/channel.h b/paddle/fluid/framework/channel.h index 80fee94f1c8..1eb3585fa33 100644 --- a/paddle/fluid/framework/channel.h +++ b/paddle/fluid/framework/channel.h @@ -20,6 +20,7 @@ #endif #include + #include #include // NOLINT #include @@ -28,6 +29,7 @@ #include // NOLINT #include #include + #include "paddle/fluid/framework/expect.h" namespace paddle { diff --git a/paddle/fluid/framework/convert_utils_test.cc b/paddle/fluid/framework/convert_utils_test.cc index 140806dfd7c..e3f5a4a8dcd 100644 --- a/paddle/fluid/framework/convert_utils_test.cc +++ b/paddle/fluid/framework/convert_utils_test.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/convert_utils.h" + #include "gtest/gtest.h" namespace phi { diff --git a/paddle/fluid/framework/copy_same_tensor_test.cc b/paddle/fluid/framework/copy_same_tensor_test.cc index d8c27ad280d..d4f36be5e87 100644 --- a/paddle/fluid/framework/copy_same_tensor_test.cc +++ b/paddle/fluid/framework/copy_same_tensor_test.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include #include "gflags/gflags.h" diff --git a/paddle/fluid/framework/custom_operator.cc b/paddle/fluid/framework/custom_operator.cc index 65c41e19ac4..0130fd4b57f 100644 --- a/paddle/fluid/framework/custom_operator.cc +++ b/paddle/fluid/framework/custom_operator.cc @@ -867,43 +867,43 @@ void RegisterOperatorWithMetaInfo(const std::vector& op_meta_infos, bool is_double_grad = (i == 2); // GradOpDescMaker - info.grad_op_maker_ = [grad_op_name, grad_op_inputs, grad_op_outputs, - is_double_grad]( - const OpDesc& fwd_op, - const std::unordered_set& no_grad_set, - std::unordered_map* grad_to_var, - const std::vector& grad_block) { - CustomGradOpMaker maker( - fwd_op, no_grad_set, grad_to_var, grad_block, grad_op_name, - grad_op_inputs, grad_op_outputs, is_double_grad); - return maker(); - }; + info.grad_op_maker_ = + [grad_op_name, grad_op_inputs, grad_op_outputs, is_double_grad]( + const OpDesc& fwd_op, + const std::unordered_set& no_grad_set, + std::unordered_map* grad_to_var, + const std::vector& grad_block) { + CustomGradOpMaker maker( + fwd_op, no_grad_set, grad_to_var, grad_block, grad_op_name, + grad_op_inputs, grad_op_outputs, is_double_grad); + return maker(); + }; // GradOpBaseMaker - info.dygraph_grad_op_maker_ = [grad_op_name, grad_op_inputs, - grad_op_outputs, is_double_grad]( - const std::string& type, - const imperative::NameVarBaseMap& var_base_map_in, - const imperative::NameVarBaseMap& var_base_map_out, - const framework::AttributeMap& attrs, - const framework::AttributeMap& default_attrs, - const std::map& inplace_map) { - CustomGradOpMaker maker( - type, var_base_map_in, var_base_map_out, attrs, inplace_map, - grad_op_name, grad_op_inputs, grad_op_outputs, is_double_grad); - maker.SetDygraphDefaultAttrsMap(default_attrs); - return maker(); - }; + info.dygraph_grad_op_maker_ = + [grad_op_name, grad_op_inputs, grad_op_outputs, is_double_grad]( + const std::string& type, + const imperative::NameVarBaseMap& var_base_map_in, + const imperative::NameVarBaseMap& var_base_map_out, + const framework::AttributeMap& attrs, + const framework::AttributeMap& default_attrs, + const std::map& inplace_map) { + CustomGradOpMaker maker( + type, var_base_map_in, var_base_map_out, attrs, inplace_map, + grad_op_name, grad_op_inputs, grad_op_outputs, is_double_grad); + maker.SetDygraphDefaultAttrsMap(default_attrs); + return maker(); + }; /* Grad op register */ OpInfo grad_info; // Grad Op - grad_info.creator_ = []( - const std::string& type, const VariableNameMap& inputs, - const VariableNameMap& outputs, const AttributeMap& attrs) { - return new CustomOperator(type, inputs, outputs, attrs); - }; + grad_info.creator_ = + [](const std::string& type, const VariableNameMap& inputs, + const VariableNameMap& outputs, const AttributeMap& attrs) { + return new CustomOperator(type, inputs, outputs, attrs); + }; // Grad InferShape if (grad_infer_shape_fn == nullptr) { diff --git a/paddle/fluid/framework/data_device_transform_test.cu b/paddle/fluid/framework/data_device_transform_test.cu index 4757eb60f43..d51707970ff 100644 --- a/paddle/fluid/framework/data_device_transform_test.cu +++ b/paddle/fluid/framework/data_device_transform_test.cu @@ -13,18 +13,16 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "gtest/gtest.h" - #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/framework/phi_utils.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/init.h" #include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/fluid/framework/phi_utils.h" - namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/data_feed.cc b/paddle/fluid/framework/data_feed.cc old mode 100755 new mode 100644 index 0801aa0e56a..1808caddabc --- a/paddle/fluid/framework/data_feed.cc +++ b/paddle/fluid/framework/data_feed.cc @@ -18,6 +18,7 @@ limitations under the License. */ #endif #include "paddle/fluid/framework/data_feed.h" + #include "paddle/fluid/framework/fleet/ps_gpu_wrapper.h" #ifdef _LINUX #include @@ -231,8 +232,9 @@ bool DataFeed::PickOneFile(std::string* filename) { } void DataFeed::CheckInit() { - PADDLE_ENFORCE_EQ(finish_init_, true, platform::errors::PreconditionNotMet( - "DataFeed initialization failed.")); + PADDLE_ENFORCE_EQ( + finish_init_, true, + platform::errors::PreconditionNotMet("DataFeed initialization failed.")); } void DataFeed::CheckSetFileList() { @@ -1619,9 +1621,10 @@ template class PrivateInstantDataFeed>; bool MultiSlotFileInstantDataFeed::Preprocess(const std::string& filename) { fd_ = open(filename.c_str(), O_RDONLY); PADDLE_ENFORCE_NE( - fd_, -1, platform::errors::Unavailable( - "Fail to open file: %s in MultiSlotFileInstantDataFeed.", - filename.c_str())); + fd_, -1, + platform::errors::Unavailable( + "Fail to open file: %s in MultiSlotFileInstantDataFeed.", + filename.c_str())); struct stat sb; fstat(fd_, &sb); @@ -2182,7 +2185,7 @@ void SlotRecordInMemoryDataFeed::LoadIntoMemoryByLine(void) { SlotRecordPool().get(&record_vec, OBJPOOL_BLOCK_SIZE); // get slotrecord object function auto record_func = [this, &offset, &record_vec, &old_offset]( - std::vector& vec, int num) { + std::vector& vec, int num) { vec.resize(num); if (offset + num > OBJPOOL_BLOCK_SIZE) { input_channel_->WriteMove(offset, &record_vec[0]); @@ -2675,8 +2678,8 @@ void SlotRecordInMemoryDataFeed::BuildSlotBatchGPU(const int ins_num) { size_t* off_start_ptr = &offsets[j * offset_cols_size]; int total_instance = static_cast(off_start_ptr[offset_cols_size - 1]); - CHECK(total_instance >= 0) << "slot idx:" << j - << ", total instance:" << total_instance; + CHECK(total_instance >= 0) + << "slot idx:" << j << ", total instance:" << total_instance; auto& info = used_slots_info_[j]; // fill slot value with default value 0 diff --git a/paddle/fluid/framework/data_feed_factory.cc b/paddle/fluid/framework/data_feed_factory.cc index e46e4aeb012..e058b194690 100644 --- a/paddle/fluid/framework/data_feed_factory.cc +++ b/paddle/fluid/framework/data_feed_factory.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/data_feed_factory.h" #include + #include #include diff --git a/paddle/fluid/framework/data_feed_test.cc b/paddle/fluid/framework/data_feed_test.cc index 2cc441bbd34..8375ed80e83 100644 --- a/paddle/fluid/framework/data_feed_test.cc +++ b/paddle/fluid/framework/data_feed_test.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/data_feed.h" + #include + #include // NOLINT #include #include @@ -23,6 +25,7 @@ #include // NOLINT #include #include + #include "google/protobuf/io/zero_copy_stream_impl.h" #include "google/protobuf/text_format.h" #include "gtest/gtest.h" diff --git a/paddle/fluid/framework/data_set.cc b/paddle/fluid/framework/data_set.cc index 0c762ab2e77..f89d0f969ab 100644 --- a/paddle/fluid/framework/data_set.cc +++ b/paddle/fluid/framework/data_set.cc @@ -13,6 +13,7 @@ * limitations under the License. */ #include "paddle/fluid/framework/data_set.h" + #include "google/protobuf/text_format.h" #if (defined PADDLE_WITH_DISTRIBUTE) && (defined PADDLE_WITH_PSCORE) #include "paddle/fluid/distributed/index_dataset/index_sampler.h" diff --git a/paddle/fluid/framework/data_set.h b/paddle/fluid/framework/data_set.h index 3d096eaebe3..5d961841a25 100644 --- a/paddle/fluid/framework/data_set.h +++ b/paddle/fluid/framework/data_set.h @@ -15,6 +15,7 @@ #pragma once #include + #include #include #include // NOLINT @@ -26,6 +27,7 @@ #include #ifdef PADDLE_WITH_GLOO #include + #include "paddle/fluid/framework/fleet/gloo_wrapper.h" #endif diff --git a/paddle/fluid/framework/data_type_test.cc b/paddle/fluid/framework/data_type_test.cc index 15cf30c1cf3..01802c11d52 100644 --- a/paddle/fluid/framework/data_type_test.cc +++ b/paddle/fluid/framework/data_type_test.cc @@ -44,8 +44,8 @@ TEST(DataType, float16) { TEST(DataType, bfloat16) { using paddle::framework::Tensor; - using paddle::platform::CPUPlace; using paddle::platform::bfloat16; + using paddle::platform::CPUPlace; namespace f = paddle::framework; f::proto::VarType::Type dtype = f::proto::VarType::BF16; diff --git a/paddle/fluid/framework/data_type_transform_test.cu b/paddle/fluid/framework/data_type_transform_test.cu index 4fab3a78454..3420298297b 100644 --- a/paddle/fluid/framework/data_type_transform_test.cu +++ b/paddle/fluid/framework/data_type_transform_test.cu @@ -12,11 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "gtest/gtest.h" #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/tensor_util.h" -#include "gtest/gtest.h" - TEST(DataTypeTransform, GPUTransform) { auto cpu_place = paddle::platform::CPUPlace(); auto gpu_place = paddle::platform::CUDAPlace(0); diff --git a/paddle/fluid/framework/details/bind_threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/bind_threaded_ssa_graph_executor.cc index 75baf15dc5e..ebdf66cdde1 100644 --- a/paddle/fluid/framework/details/bind_threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/bind_threaded_ssa_graph_executor.cc @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/details/bind_threaded_ssa_graph_executor.h" + #include #include #include #include #include #include + #include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/fetch_op_handle.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" diff --git a/paddle/fluid/framework/details/bind_threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/bind_threaded_ssa_graph_executor.h index 5e973f13cc6..c907a4b4afc 100644 --- a/paddle/fluid/framework/details/bind_threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/bind_threaded_ssa_graph_executor.h @@ -14,12 +14,14 @@ #pragma once #include + #include // NOLINT #include #include // NOLINT #include #include #include + #include "paddle/fluid/framework/blocking_queue.h" #include "paddle/fluid/framework/details/exception_holder.h" #include "paddle/fluid/framework/details/execution_strategy.h" diff --git a/paddle/fluid/framework/details/bkcl_op_handle.h b/paddle/fluid/framework/details/bkcl_op_handle.h index 1a098f06f08..b0c2275b3a5 100644 --- a/paddle/fluid/framework/details/bkcl_op_handle.h +++ b/paddle/fluid/framework/details/bkcl_op_handle.h @@ -14,8 +14,6 @@ #pragma once -#include "xpu/bkcl.h" - #include #include #include @@ -24,6 +22,7 @@ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/platform/device/xpu/bkcl_helper.h" +#include "xpu/bkcl.h" DECLARE_bool(sync_bkcl_allreduce); diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc index fdf74d2f769..9ed76c87d84 100644 --- a/paddle/fluid/framework/details/build_strategy.cc +++ b/paddle/fluid/framework/details/build_strategy.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include "paddle/fluid/framework/details/build_strategy.h" #include + #include "paddle/fluid/framework/details/reduce_op_handle.h" #include "paddle/fluid/framework/ir/graph_printer.h" #include "paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h" diff --git a/paddle/fluid/framework/details/build_strategy_test.cc b/paddle/fluid/framework/details/build_strategy_test.cc index 69af77d23fb..1914c1d33de 100644 --- a/paddle/fluid/framework/details/build_strategy_test.cc +++ b/paddle/fluid/framework/details/build_strategy_test.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/details/build_strategy.h" + #include #include #include @@ -23,8 +25,6 @@ #include "gtest/gtest-test-part.h" #include "gtest/gtest.h" #include "gtest/gtest_pred_impl.h" - -#include "paddle/fluid/framework/details/build_strategy.h" #include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/var_type_inference.h" diff --git a/paddle/fluid/framework/details/cow_ptr_test.cc b/paddle/fluid/framework/details/cow_ptr_test.cc index 5b055d7cb4d..b440da9f1df 100644 --- a/paddle/fluid/framework/details/cow_ptr_test.cc +++ b/paddle/fluid/framework/details/cow_ptr_test.cc @@ -13,6 +13,7 @@ limitations under the License. */ #include "paddle/fluid/framework/details/cow_ptr.h" + #include "gtest/gtest.h" namespace paddle { diff --git a/paddle/fluid/framework/details/execution_strategy.h b/paddle/fluid/framework/details/execution_strategy.h index 7f51de435ba..57440ed9aa2 100644 --- a/paddle/fluid/framework/details/execution_strategy.h +++ b/paddle/fluid/framework/details/execution_strategy.h @@ -14,6 +14,7 @@ #pragma once #include // for size_t + #include "paddle/fluid/platform/device_context.h" namespace paddle { diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h index 4477702900a..19b00615715 100644 --- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h @@ -14,10 +14,12 @@ #pragma once #include + #include #include #include #include + #include "paddle/fluid/framework/blocking_queue.h" #include "paddle/fluid/framework/details/exception_holder.h" #include "paddle/fluid/framework/details/execution_strategy.h" diff --git a/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc b/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc index f4ca4907d48..7f44e68af6b 100644 --- a/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/fused_all_reduce_op_handle.cc @@ -325,9 +325,10 @@ void FusedAllReduceOpHandle::GetGradLoDTensor( PADDLE_ENFORCE_EQ( platform::is_same_place(lod_tensor.place(), places_.at(scope_idx)), - true, platform::errors::InvalidArgument( - "The variable '%s' at scope %d is not in the right place.", - var_name, scope_idx)); + true, + platform::errors::InvalidArgument( + "The variable '%s' at scope %d is not in the right place.", + var_name, scope_idx)); grad_tensor->emplace_back(std::make_pair(var_name, &lod_tensor)); } } @@ -356,10 +357,11 @@ void FusedAllReduceOpHandle::GetDTypeAndNumel( // Get element number int64_t len = grad_tensor.at(i).second->numel(); PADDLE_ENFORCE_GT( - len, 0, platform::errors::InvalidArgument( - "The size of grad tensors of fused_all_reduce_op_handle " - "must be > 0, but got %d.", - len)); + len, 0, + platform::errors::InvalidArgument( + "The size of grad tensors of fused_all_reduce_op_handle " + "must be > 0, but got %d.", + len)); *numel += platform::Alignment(len * size_of_dtype, places_[0]) / size_of_dtype; } diff --git a/paddle/fluid/framework/details/grad_merge_all_reduce_op_handle.cc b/paddle/fluid/framework/details/grad_merge_all_reduce_op_handle.cc index 44b9ca90fc5..18de9f443a7 100644 --- a/paddle/fluid/framework/details/grad_merge_all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/grad_merge_all_reduce_op_handle.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/details/grad_merge_all_reduce_op_handle.h" + #include "paddle/fluid/platform/profiler/event_tracing.h" #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) diff --git a/paddle/fluid/framework/details/graph_test_base.h b/paddle/fluid/framework/details/graph_test_base.h index d139f848830..08d9c999a8a 100644 --- a/paddle/fluid/framework/details/graph_test_base.h +++ b/paddle/fluid/framework/details/graph_test_base.h @@ -18,6 +18,7 @@ #include #include #include + #include "glog/logging.h" #include "gtest/gtest.h" #include "paddle/fluid/framework/ir/graph.h" diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.cc b/paddle/fluid/framework/details/nan_inf_utils_detail.cc index e6790de92d0..7b93baddb4a 100644 --- a/paddle/fluid/framework/details/nan_inf_utils_detail.cc +++ b/paddle/fluid/framework/details/nan_inf_utils_detail.cc @@ -12,8 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/details/nan_inf_utils.h" #include "paddle/fluid/framework/details/nan_inf_utils_detail.h" + +#include "paddle/fluid/framework/details/nan_inf_utils.h" #include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/scope.h" @@ -261,7 +262,7 @@ void CheckNanInf>( } template <> - void CheckNanInf>> + void CheckNanInf < paddle::platform::complex < double >>> (const paddle::platform::complex* value, const size_t numel, int print_num, const std::string& op_type, const std::string& var_name) { double real_sum = 0.0; @@ -563,8 +564,9 @@ static void NPUCheckOpHasNanOrInf(const framework::OperatorBase& op, if (sum >= 1.0) PrintNPUOpValueInfo(op, scope, place); - PADDLE_ENFORCE_LT(sum, 1.0, platform::errors::PreconditionNotMet( - "Operator %s contains Nan/Inf.", op.Type())); + PADDLE_ENFORCE_LT(sum, 1.0, + platform::errors::PreconditionNotMet( + "Operator %s contains Nan/Inf.", op.Type())); } #endif diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.cu b/paddle/fluid/framework/details/nan_inf_utils_detail.cu index 7cf11f7829d..b8b5537c93c 100644 --- a/paddle/fluid/framework/details/nan_inf_utils_detail.cu +++ b/paddle/fluid/framework/details/nan_inf_utils_detail.cu @@ -12,15 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/details/nan_inf_utils.h" -#include "paddle/fluid/framework/details/nan_inf_utils_detail.h" - #include #include #include #include #include "paddle/fluid/framework/convert_utils.h" +#include "paddle/fluid/framework/details/nan_inf_utils.h" +#include "paddle/fluid/framework/details/nan_inf_utils_detail.h" #include "paddle/fluid/framework/scope.h" namespace paddle { diff --git a/paddle/fluid/framework/details/op_registry.h b/paddle/fluid/framework/details/op_registry.h index 427b981e7cd..213d7033764 100644 --- a/paddle/fluid/framework/details/op_registry.h +++ b/paddle/fluid/framework/details/op_registry.h @@ -213,14 +213,14 @@ struct OpInfoFiller { platform::errors::AlreadyExists( "GradOpDescMaker of %s has been registered", op_type)); - info->grad_op_maker_ = []( - const OpDesc& fwd_op, - const std::unordered_set& no_grad_set, - std::unordered_map* grad_to_var, - const std::vector& grad_block) { - T maker(fwd_op, no_grad_set, grad_to_var, grad_block); - return maker(); - }; + info->grad_op_maker_ = + [](const OpDesc& fwd_op, + const std::unordered_set& no_grad_set, + std::unordered_map* grad_to_var, + const std::vector& grad_block) { + T maker(fwd_op, no_grad_set, grad_to_var, grad_block); + return maker(); + }; info->use_default_grad_op_desc_maker_ = std::is_base_of, T>::value || @@ -244,17 +244,17 @@ struct OpInfoFiller { platform::errors::AlreadyExists( "GradOpBaseMaker of %s has been registered", op_type)); - info->dygraph_grad_op_maker_ = []( - const std::string& type, - const imperative::NameVarBaseMap& var_base_map_in, - const imperative::NameVarBaseMap& var_base_map_out, - const framework::AttributeMap& attrs, - const framework::AttributeMap& default_attrs, - const std::map& inplace_map) { - T maker(type, var_base_map_in, var_base_map_out, attrs, inplace_map); - maker.SetDygraphDefaultAttrsMap(default_attrs); - return maker(); - }; + info->dygraph_grad_op_maker_ = + [](const std::string& type, + const imperative::NameVarBaseMap& var_base_map_in, + const imperative::NameVarBaseMap& var_base_map_out, + const framework::AttributeMap& attrs, + const framework::AttributeMap& default_attrs, + const std::map& inplace_map) { + T maker(type, var_base_map_in, var_base_map_out, attrs, inplace_map); + maker.SetDygraphDefaultAttrsMap(default_attrs); + return maker(); + }; } }; diff --git a/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc b/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc index 936e84a6c82..22c27fe86f1 100644 --- a/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/parallel_ssa_graph_executor.cc @@ -90,10 +90,9 @@ ParallelSSAGraphExecutor::ParallelSSAGraphExecutor( const std::vector &places, ir::Graph *graph) // TODO(Yancey1989): Copying graphs is not safely since it deleted the // attrs. - : ParallelSSAGraphExecutor(strategy, local_scopes, local_exec_scopes, - places, - SeparateMultiDevicesGraph(graph, - places.size())) {} + : ParallelSSAGraphExecutor( + strategy, local_scopes, local_exec_scopes, places, + SeparateMultiDevicesGraph(graph, places.size())) {} ParallelSSAGraphExecutor::ParallelSSAGraphExecutor( const ExecutionStrategy &strategy, const std::vector &local_scopes, diff --git a/paddle/fluid/framework/details/parallel_ssa_graph_executor.h b/paddle/fluid/framework/details/parallel_ssa_graph_executor.h index d9d83efcb8e..88c8b1cbfb2 100644 --- a/paddle/fluid/framework/details/parallel_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/parallel_ssa_graph_executor.h @@ -17,6 +17,7 @@ #include #include #include + #include "ThreadPool.h" #include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" diff --git a/paddle/fluid/framework/details/reduce_op_handle.cc b/paddle/fluid/framework/details/reduce_op_handle.cc index 2ae3880ab3c..799005e4b09 100644 --- a/paddle/fluid/framework/details/reduce_op_handle.cc +++ b/paddle/fluid/framework/details/reduce_op_handle.cc @@ -245,14 +245,15 @@ void ReduceOpHandle::RunImpl() { int type = platform::ToBKCLDataType( framework::TransToProtoVarType(lod_tensor.dtype())); size_t numel = static_cast(lod_tensor.numel()); - all_reduce_calls.emplace_back([buffer, recvbuffer, type, numel, root_id, - &bkcl_ctx] { - PADDLE_ENFORCE_EQ(bkcl_reduce(bkcl_ctx.comm(), buffer, recvbuffer, - numel, static_cast(type), - BKCL_ADD, root_id, nullptr), - BKCL_SUCCESS, platform::errors::Unavailable( - "bkcl_all_reduce failed")); - }); + all_reduce_calls.emplace_back( + [buffer, recvbuffer, type, numel, root_id, &bkcl_ctx] { + PADDLE_ENFORCE_EQ( + bkcl_reduce(bkcl_ctx.comm(), buffer, recvbuffer, numel, + static_cast(type), BKCL_ADD, + root_id, nullptr), + BKCL_SUCCESS, + platform::errors::Unavailable("bkcl_all_reduce failed")); + }); } WaitInputVarGenerated(); diff --git a/paddle/fluid/framework/details/rpc_op_handle.cc b/paddle/fluid/framework/details/rpc_op_handle.cc index 39bcf1d0f38..35373e1a709 100644 --- a/paddle/fluid/framework/details/rpc_op_handle.cc +++ b/paddle/fluid/framework/details/rpc_op_handle.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/details/rpc_op_handle.h" + #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/platform/profiler/event_tracing.h" diff --git a/paddle/fluid/framework/details/scope_buffered_monitor.cc b/paddle/fluid/framework/details/scope_buffered_monitor.cc index 57faf0e75ba..bd1a4378f07 100644 --- a/paddle/fluid/framework/details/scope_buffered_monitor.cc +++ b/paddle/fluid/framework/details/scope_buffered_monitor.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/details/scope_buffered_monitor.h" + #include "paddle/fluid/platform/profiler/event_tracing.h" namespace paddle { diff --git a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h index ea5a3c07957..091224f1e59 100644 --- a/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h +++ b/paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h @@ -14,6 +14,7 @@ #pragma once #include + #include #include #include @@ -21,6 +22,7 @@ #include #include #include + #include "paddle/fluid/framework/details/execution_strategy.h" #include "paddle/fluid/framework/details/op_handle_base.h" #include "paddle/fluid/framework/details/scope_buffered_monitor.h" diff --git a/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc b/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc index 7e63c5ffb9a..28a5c31f644 100644 --- a/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc +++ b/paddle/fluid/framework/details/sparse_all_reduce_op_handle.cc @@ -41,8 +41,9 @@ SparseAllReduceOpHandle::SparseAllReduceOpHandle( is_encoded_(is_encoded), nranks_(nranks) { // TODO(gongwb) :polish them! - PADDLE_ENFORCE_EQ(is_encoded, true, platform::errors::InvalidArgument( - "The argument is_encoded is false.")); + PADDLE_ENFORCE_EQ( + is_encoded, true, + platform::errors::InvalidArgument("The argument is_encoded is false.")); VLOG(1) << "Use dgc allreduce mode" << ", nranks:" << nranks_; @@ -193,11 +194,12 @@ void SparseAllReduceOpHandle::RunImplEncoded() { sparse_reduce_calls.emplace_back([=] { platform::CUDADeviceGuard guard(dev_id); - PADDLE_ENFORCE_EQ(paddle::communication::dgc::sparseReduce( - gather_buff, k, out_tensor_buf, - static_cast(out_numel), nranks_, stream), - true, platform::errors::Unavailable( - "Calling sparseReduce() failed.")); + PADDLE_ENFORCE_EQ( + paddle::communication::dgc::sparseReduce( + gather_buff, k, out_tensor_buf, static_cast(out_numel), + nranks_, stream), + true, + platform::errors::Unavailable("Calling sparseReduce() failed.")); }); } diff --git a/paddle/fluid/framework/device_worker.cc b/paddle/fluid/framework/device_worker.cc index 88026143683..56cd12f5001 100644 --- a/paddle/fluid/framework/device_worker.cc +++ b/paddle/fluid/framework/device_worker.cc @@ -190,9 +190,10 @@ void DeviceWorker::DumpField(const Scope& scope, int dump_mode, tensor = &cpu_tensor; } if (!CheckValidOutput(tensor, batch_size)) { - VLOG(0) << "Note: field[" << field << "] cannot pass check, so it was " - "skipped. Maybe the dimension is " - "wrong "; + VLOG(0) << "Note: field[" << field + << "] cannot pass check, so it was " + "skipped. Maybe the dimension is " + "wrong "; continue; } for (size_t i = 0; i < batch_size; ++i) { diff --git a/paddle/fluid/framework/device_worker_factory.cc b/paddle/fluid/framework/device_worker_factory.cc index e6635a2f941..c973afd1560 100644 --- a/paddle/fluid/framework/device_worker_factory.cc +++ b/paddle/fluid/framework/device_worker_factory.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/device_worker_factory.h" #include + #include #include diff --git a/paddle/fluid/framework/dlpack_tensor.cc b/paddle/fluid/framework/dlpack_tensor.cc index 20d08ef18ae..7e1f740bcc2 100644 --- a/paddle/fluid/framework/dlpack_tensor.cc +++ b/paddle/fluid/framework/dlpack_tensor.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/dlpack_tensor.h" + #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" diff --git a/paddle/fluid/framework/dlpack_tensor_test.cc b/paddle/fluid/framework/dlpack_tensor_test.cc index 829908bd982..6c19cf3450d 100644 --- a/paddle/fluid/framework/dlpack_tensor_test.cc +++ b/paddle/fluid/framework/dlpack_tensor_test.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/dlpack_tensor.h" + #include #include @@ -39,7 +40,7 @@ constexpr uint8_t GetDLDataTypeCode() { : (std::is_integral::value ? static_cast(kDLInt) : static_cast(-1))); } -} // NOLINT +} // namespace template void TestMain(const platform::Place &place, uint16_t lanes) { diff --git a/paddle/fluid/framework/downpour_lite_worker.cc b/paddle/fluid/framework/downpour_lite_worker.cc index 7344c93ef06..8ceffe58dcf 100644 --- a/paddle/fluid/framework/downpour_lite_worker.cc +++ b/paddle/fluid/framework/downpour_lite_worker.cc @@ -202,15 +202,15 @@ void DownpourLiteWorker::CopyDenseVars() { Variable* src_var = thread_scope_->FindVar(src_var_name); CHECK(src_var != nullptr) << src_var_name << " not found"; // NOLINT LoDTensor* src_tensor = src_var->GetMutable(); - CHECK(src_tensor != nullptr) << src_var_name - << " tensor is null"; // NOLINT + CHECK(src_tensor != nullptr) + << src_var_name << " tensor is null"; // NOLINT float* src_data = src_tensor->data(); Variable* dest_var = thread_scope_->FindVar(dest_var_name); CHECK(dest_var != nullptr) << dest_var_name << " not found"; // NOLINT LoDTensor* dest_tensor = dest_var->GetMutable(); - CHECK(dest_tensor != nullptr) << dest_var_name - << " tensor is null"; // NOLINT + CHECK(dest_tensor != nullptr) + << dest_var_name << " tensor is null"; // NOLINT float* dest_data = dest_tensor->data(); CHECK(src_tensor->numel() == dest_tensor->numel()) diff --git a/paddle/fluid/framework/downpour_worker.cc b/paddle/fluid/framework/downpour_worker.cc index 06c3d18af84..c14b48ef8a7 100644 --- a/paddle/fluid/framework/downpour_worker.cc +++ b/paddle/fluid/framework/downpour_worker.cc @@ -155,8 +155,8 @@ void DownpourWorker::CollectLabelInfo(size_t table_idx) { continue; } LoDTensor* tensor = fea_var->GetMutable(); - CHECK(tensor != nullptr) << "tensor of var " - << sparse_key_names_[table_id][i] << " is null"; + CHECK(tensor != nullptr) + << "tensor of var " << sparse_key_names_[table_id][i] << " is null"; // skip slots which do not have embedding Variable* emb_var = @@ -309,9 +309,9 @@ void DownpourWorker::AdjustInsWeight() { float* ins_weights = ins_weight_tensor->data(); size_t len = ins_weight_tensor->numel(); // len = batch size // here we assume nid_show slot only has one feasign in each instance - CHECK(len == nid_show_.size()) << "ins_weight size should be equal to " - << "nid_show size, " << len << " vs " - << nid_show_.size(); + CHECK(len == nid_show_.size()) + << "ins_weight size should be equal to " + << "nid_show size, " << len << " vs " << nid_show_.size(); float nid_adjw_threshold = adjust_ins_weight_config_.nid_adjw_threshold(); float nid_adjw_ratio = adjust_ins_weight_config_.nid_adjw_ratio(); int64_t nid_adjw_num = 0; @@ -326,9 +326,8 @@ void DownpourWorker::AdjustInsWeight() { } float ins_weight = 1.0; if (nid_show >= 0 && nid_show < nid_adjw_threshold) { - ins_weight = log(M_E + - (nid_adjw_threshold - nid_show) / nid_adjw_threshold * - nid_adjw_ratio); + ins_weight = log(M_E + (nid_adjw_threshold - nid_show) / + nid_adjw_threshold * nid_adjw_ratio); // count nid adjw insnum and weight ++nid_adjw_num; nid_adjw_weight += ins_weight; @@ -423,15 +422,15 @@ void DownpourWorker::CopyDenseVars() { Variable* src_var = thread_scope_->FindVar(src_var_name); CHECK(src_var != nullptr) << src_var_name << " not found"; // NOLINT LoDTensor* src_tensor = src_var->GetMutable(); - CHECK(src_tensor != nullptr) << src_var_name - << " tensor is null"; // NOLINT + CHECK(src_tensor != nullptr) + << src_var_name << " tensor is null"; // NOLINT float* src_data = src_tensor->data(); Variable* dest_var = thread_scope_->FindVar(dest_var_name); CHECK(dest_var != nullptr) << dest_var_name << " not found"; // NOLINT LoDTensor* dest_tensor = dest_var->GetMutable(); - CHECK(dest_tensor != nullptr) << dest_var_name - << " tensor is null"; // NOLINT + CHECK(dest_tensor != nullptr) + << dest_var_name << " tensor is null"; // NOLINT float* dest_data = dest_tensor->data(); CHECK(src_tensor->numel() == dest_tensor->numel()) diff --git a/paddle/fluid/framework/eigen_test.cc b/paddle/fluid/framework/eigen_test.cc index 43d5f9ea0e8..4e214bd36f3 100644 --- a/paddle/fluid/framework/eigen_test.cc +++ b/paddle/fluid/framework/eigen_test.cc @@ -13,10 +13,11 @@ // limitations under the License. #include "paddle/fluid/framework/eigen.h" -#include "paddle/phi/core/ddim.h" #include +#include "paddle/phi/core/ddim.h" + namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index 06ce9712f5c..830bbacb639 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/executor.h" + #include + #include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/trainer_desc.pb.h" #include "paddle/fluid/framework/trainer_factory.h" @@ -585,8 +587,9 @@ void Executor::RunPreparedContext( "Program in ExecutorPrepareContext should has feed_ops.")); PADDLE_ENFORCE_EQ( has_fetch_operators(global_block, *fetch_targets, fetch_holder_name), - true, platform::errors::PreconditionNotMet( - "Program in the prepared context should has fetch_ops.")); + true, + platform::errors::PreconditionNotMet( + "Program in the prepared context should has fetch_ops.")); // map the data of feed_targets to feed_holder for (auto* op : global_block.AllOps()) { diff --git a/paddle/fluid/framework/executor_cache.cc b/paddle/fluid/framework/executor_cache.cc index 50a41cb5611..468b3bc680a 100644 --- a/paddle/fluid/framework/executor_cache.cc +++ b/paddle/fluid/framework/executor_cache.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/executor_cache.h" + #include "paddle/fluid/framework/op_info.h" namespace paddle { diff --git a/paddle/fluid/framework/executor_thread_worker.cc b/paddle/fluid/framework/executor_thread_worker.cc index 06019372a73..c6ccc2adc65 100644 --- a/paddle/fluid/framework/executor_thread_worker.cc +++ b/paddle/fluid/framework/executor_thread_worker.cc @@ -13,13 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/executor_thread_worker.h" + #include #include + +#include "gflags/gflags.h" #include "google/protobuf/io/zero_copy_stream_impl.h" #include "google/protobuf/message.h" #include "google/protobuf/text_format.h" - -#include "gflags/gflags.h" #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/framework/feed_fetch_type.h" @@ -616,8 +617,8 @@ void AsyncExecutorThreadWorker::PushSparse(int table_id) { int len = tensor->numel(); CHECK(slot_dim * len == g_tensor->numel()) << "len:" << len << " g_numel:" << g_tensor->numel(); - CHECK(len == tensor->numel()) << "len:" << len - << "t_numel:" << tensor->numel(); + CHECK(len == tensor->numel()) + << "len:" << len << "t_numel:" << tensor->numel(); int64_t* ids = tensor->data(); for (auto id_idx = 0u; id_idx < len; ++id_idx) { if (ids[id_idx] == 0) { @@ -626,15 +627,15 @@ void AsyncExecutorThreadWorker::PushSparse(int table_id) { } memcpy(push_g[fea_idx].data() + offset, g, sizeof(float) * slot_dim); push_g[fea_idx][0] = 1.0f; - CHECK(fea_idx < fea_info.size()) << "fea_idx:" << fea_idx - << " size:" << fea_info.size(); + CHECK(fea_idx < fea_info.size()) + << "fea_idx:" << fea_idx << " size:" << fea_info.size(); push_g[fea_idx][1] = static_cast(fea_info[fea_idx].label); g += slot_dim; fea_idx++; } } - CHECK(fea_idx == features.size()) << "fea_idx:" << fea_idx - << " features size:" << features.size(); + CHECK(fea_idx == features.size()) + << "fea_idx:" << fea_idx << " features size:" << features.size(); CHECK_GT(features.size(), 0); std::vector push_g_vec; @@ -701,5 +702,5 @@ void AsyncExecutorThreadWorker::check_pull_push_memory( } #endif -} // einit_modelnd namespace framework +} // namespace framework } // end namespace paddle diff --git a/paddle/fluid/framework/executor_thread_worker.h b/paddle/fluid/framework/executor_thread_worker.h index 524922b0322..f4fa54d2c3a 100644 --- a/paddle/fluid/framework/executor_thread_worker.h +++ b/paddle/fluid/framework/executor_thread_worker.h @@ -21,6 +21,7 @@ limitations under the License. */ #include #include // NOLINT #include + #include "paddle/fluid/framework/data_feed.h" #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/program_desc.h" diff --git a/paddle/fluid/framework/feed_fetch_method.cc b/paddle/fluid/framework/feed_fetch_method.cc index 096134e8528..ec3fdc49fdf 100644 --- a/paddle/fluid/framework/feed_fetch_method.cc +++ b/paddle/fluid/framework/feed_fetch_method.cc @@ -14,9 +14,9 @@ limitations under the License. */ #include "paddle/fluid/framework/feed_fetch_method.h" +#include #include -#include #include "glog/logging.h" namespace phi { diff --git a/paddle/fluid/framework/fleet/ascend_wrapper.h b/paddle/fluid/framework/fleet/ascend_wrapper.h index d5586212011..a4bd208959e 100644 --- a/paddle/fluid/framework/fleet/ascend_wrapper.h +++ b/paddle/fluid/framework/fleet/ascend_wrapper.h @@ -22,6 +22,10 @@ limitations under the License. */ #include #include +#include "ge/ge_api.h" +#include "graph/attr_value.h" +#include "graph/tensor.h" +#include "graph/types.h" #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/lod_tensor.h" @@ -29,11 +33,6 @@ limitations under the License. */ #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/timer.h" -#include "ge/ge_api.h" -#include "graph/attr_value.h" -#include "graph/tensor.h" -#include "graph/types.h" - namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/fleet/box_wrapper.cc b/paddle/fluid/framework/fleet/box_wrapper.cc index 8564a421659..1bb432a791e 100644 --- a/paddle/fluid/framework/fleet/box_wrapper.cc +++ b/paddle/fluid/framework/fleet/box_wrapper.cc @@ -14,10 +14,12 @@ #ifdef PADDLE_WITH_BOX_PS #include "paddle/fluid/framework/fleet/box_wrapper.h" + #include #include #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" @@ -186,26 +188,30 @@ void BasicAucCalculator::calculate_bucket_error() { void BoxWrapper::FeedPass(int date, const std::vector& feasgin_to_box) const { int ret = boxps_ptr_->FeedPass(date, feasgin_to_box); - PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet( - "FeedPass failed in BoxPS.")); + PADDLE_ENFORCE_EQ( + ret, 0, + platform::errors::PreconditionNotMet("FeedPass failed in BoxPS.")); } void BoxWrapper::BeginFeedPass(int date, boxps::PSAgentBase** agent) const { int ret = boxps_ptr_->BeginFeedPass(date, *agent); - PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet( - "BeginFeedPass failed in BoxPS.")); + PADDLE_ENFORCE_EQ( + ret, 0, + platform::errors::PreconditionNotMet("BeginFeedPass failed in BoxPS.")); } void BoxWrapper::EndFeedPass(boxps::PSAgentBase* agent) const { int ret = boxps_ptr_->EndFeedPass(agent); - PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet( - "EndFeedPass failed in BoxPS.")); + PADDLE_ENFORCE_EQ( + ret, 0, + platform::errors::PreconditionNotMet("EndFeedPass failed in BoxPS.")); } void BoxWrapper::BeginPass() const { int ret = boxps_ptr_->BeginPass(); - PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet( - "BeginPass failed in BoxPS.")); + PADDLE_ENFORCE_EQ( + ret, 0, + platform::errors::PreconditionNotMet("BeginPass failed in BoxPS.")); } void BoxWrapper::SetTestMode(bool is_test) const { diff --git a/paddle/fluid/framework/fleet/box_wrapper.cu b/paddle/fluid/framework/fleet/box_wrapper.cu index aea479ed0b2..17e59ac9104 100644 --- a/paddle/fluid/framework/fleet/box_wrapper.cu +++ b/paddle/fluid/framework/fleet/box_wrapper.cu @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/framework/fleet/box_wrapper.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" @@ -175,13 +176,13 @@ void BoxWrapper::CopyForPull(const paddle::platform::Place& place, #define EXPAND_EMBED_PULL_CASE(i, ...) \ case i: { \ constexpr size_t ExpandDim = i; \ - PullCopy<<<(total_length + 512 - 1) / 512, 512, 0, stream>>>( \ - gpu_values, \ - reinterpret_cast*>( \ - total_values_gpu), \ - gpu_len, hidden_size, expand_embed_dim, slot_num, total_length, \ - gpu_keys); \ + PullCopy \ + <<<(total_length + 512 - 1) / 512, 512, 0, stream>>>( \ + gpu_values, \ + reinterpret_cast*>( \ + total_values_gpu), \ + gpu_len, hidden_size, expand_embed_dim, slot_num, total_length, \ + gpu_keys); \ } break #endif diff --git a/paddle/fluid/framework/fleet/box_wrapper.h b/paddle/fluid/framework/fleet/box_wrapper.h index b043edca138..dc01df221e9 100644 --- a/paddle/fluid/framework/fleet/box_wrapper.h +++ b/paddle/fluid/framework/fleet/box_wrapper.h @@ -24,6 +24,7 @@ limitations under the License. */ #include #endif #include + #include #include #include @@ -36,6 +37,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/data_feed.h" #include "paddle/fluid/framework/data_set.h" #include "paddle/fluid/framework/lod_tensor.h" @@ -65,10 +67,12 @@ class BasicAucCalculator { _local_pred = 0; } void add_data(double pred, int label) { - PADDLE_ENFORCE_GE(pred, 0.0, platform::errors::PreconditionNotMet( - "pred should be greater than 0")); - PADDLE_ENFORCE_LE(pred, 1.0, platform::errors::PreconditionNotMet( - "pred should be lower than 1")); + PADDLE_ENFORCE_GE( + pred, 0.0, + platform::errors::PreconditionNotMet("pred should be greater than 0")); + PADDLE_ENFORCE_LE( + pred, 1.0, + platform::errors::PreconditionNotMet("pred should be lower than 1")); PADDLE_ENFORCE_EQ( label * label, label, platform::errors::PreconditionNotMet( @@ -172,13 +176,15 @@ class AfsManager { pwd.c_str(), conf_path.c_str()); VLOG(0) << "AFSAPI Init: user: " << user << ", pwd: " << pwd; int ret = _afshandler->Init(true, (com_logstatus() == 0)); - PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet( - "Called AFSAPI Init Interface Failed.")); + PADDLE_ENFORCE_EQ(ret, 0, + platform::errors::PreconditionNotMet( + "Called AFSAPI Init Interface Failed.")); // Too high level will hurt the performance comlog_set_log_level(4); ret = _afshandler->Connect(); - PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet( - "Called AFSAPI Connect Interface Failed")); + PADDLE_ENFORCE_EQ(ret, 0, + platform::errors::PreconditionNotMet( + "Called AFSAPI Connect Interface Failed")); } virtual ~AfsManager() { if (_afshandler != NULL) { @@ -294,8 +300,9 @@ class AfsManager { int ret = PopenBidirectionalInternal(cmd.c_str(), rfp, wfp, pid, true, true); - PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet( - "Called PopenBidirectionalInternal Failed")); + PADDLE_ENFORCE_EQ(ret, 0, + platform::errors::PreconditionNotMet( + "Called PopenBidirectionalInternal Failed")); std::string filename(path); if (strncmp(filename.c_str(), "afs:", 4) == 0) { filename = filename.substr(4); @@ -451,8 +458,9 @@ class BoxWrapper { std::string ret_str; int ret = boxps_ptr_->SaveBase(batch_model_path, xbox_model_path, ret_str, seconds_from_1970 / 86400); - PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet( - "SaveBase failed in BoxPS.")); + PADDLE_ENFORCE_EQ( + ret, 0, + platform::errors::PreconditionNotMet("SaveBase failed in BoxPS.")); return ret_str; } @@ -460,8 +468,9 @@ class BoxWrapper { VLOG(3) << "Begin SaveDelta"; std::string ret_str; int ret = boxps_ptr_->SaveDelta(xbox_model_path, ret_str); - PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet( - "SaveDelta failed in BoxPS.")); + PADDLE_ENFORCE_EQ( + ret, 0, + platform::errors::PreconditionNotMet("SaveDelta failed in BoxPS.")); return ret_str; } diff --git a/paddle/fluid/framework/fleet/box_wrapper_impl.h b/paddle/fluid/framework/fleet/box_wrapper_impl.h index 6f7009f4d51..f6f1cbfc2a0 100644 --- a/paddle/fluid/framework/fleet/box_wrapper_impl.h +++ b/paddle/fluid/framework/fleet/box_wrapper_impl.h @@ -79,8 +79,9 @@ void BoxWrapper::PullSparseCase(const paddle::platform::Place& place, int ret = boxps_ptr_->PullSparseGPU( total_keys, reinterpret_cast(total_values_gpu), static_cast(total_length), device_id); - PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet( - "PullSparseGPU failed in BoxPS.")); + PADDLE_ENFORCE_EQ( + ret, 0, + platform::errors::PreconditionNotMet("PullSparseGPU failed in BoxPS.")); pull_boxps_timer.Pause(); VLOG(3) << "Begin Copy result to tensor, total_length[" << total_length @@ -144,8 +145,9 @@ void BoxWrapper::PushSparseGradCase( int ret = boxps_ptr_->PushSparseGPU( total_keys, reinterpret_cast(total_grad_values_gpu), static_cast(total_length), place.GetDeviceId()); - PADDLE_ENFORCE_EQ(ret, 0, platform::errors::PreconditionNotMet( - "PushSparseGPU failed in BoxPS.")); + PADDLE_ENFORCE_EQ( + ret, 0, + platform::errors::PreconditionNotMet("PushSparseGPU failed in BoxPS.")); push_boxps_timer.Pause(); #else PADDLE_THROW(platform::errors::PreconditionNotMet( diff --git a/paddle/fluid/framework/fleet/fleet_wrapper.h b/paddle/fluid/framework/fleet/fleet_wrapper.h index deb2b90c933..5c2be1e55f9 100644 --- a/paddle/fluid/framework/fleet/fleet_wrapper.h +++ b/paddle/fluid/framework/fleet/fleet_wrapper.h @@ -20,6 +20,7 @@ limitations under the License. */ #include #endif #include + #include #include #include diff --git a/paddle/fluid/framework/fleet/gloo_wrapper.cc b/paddle/fluid/framework/fleet/gloo_wrapper.cc index d850d05d87f..56d0e1ec47e 100644 --- a/paddle/fluid/framework/fleet/gloo_wrapper.cc +++ b/paddle/fluid/framework/fleet/gloo_wrapper.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/fleet/gloo_wrapper.h" + #include "paddle/fluid/framework/io/fs.h" #include "paddle/fluid/string/string_helper.h" diff --git a/paddle/fluid/framework/fleet/gloo_wrapper.h b/paddle/fluid/framework/fleet/gloo_wrapper.h index 42ae73f9b13..1ecaf1318b0 100644 --- a/paddle/fluid/framework/fleet/gloo_wrapper.h +++ b/paddle/fluid/framework/fleet/gloo_wrapper.h @@ -214,8 +214,9 @@ class GlooWrapper { static_cast( &gloo::min)); } else { - PADDLE_ENFORCE_EQ(0, 1, paddle::platform::errors::InvalidArgument( - "AllReduce mode not known: " + mode)); + PADDLE_ENFORCE_EQ(0, 1, + paddle::platform::errors::InvalidArgument( + "AllReduce mode not known: " + mode)); } gloo::allreduce(opts); #else diff --git a/paddle/fluid/framework/fleet/heter_context.h b/paddle/fluid/framework/fleet/heter_context.h index 823b60c5ef1..560607bd160 100644 --- a/paddle/fluid/framework/fleet/heter_context.h +++ b/paddle/fluid/framework/fleet/heter_context.h @@ -17,6 +17,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_HETERPS #include + #include #include #include diff --git a/paddle/fluid/framework/fleet/heter_ps/cudf/concurrent_unordered_map.cuh.h b/paddle/fluid/framework/fleet/heter_ps/cudf/concurrent_unordered_map.cuh.h index 4ad32d1714f..da65cccb435 100644 --- a/paddle/fluid/framework/fleet/heter_ps/cudf/concurrent_unordered_map.cuh.h +++ b/paddle/fluid/framework/fleet/heter_ps/cudf/concurrent_unordered_map.cuh.h @@ -22,6 +22,7 @@ #define CONCURRENT_UNORDERED_MAP_CUH #include + #include #include #include @@ -258,7 +259,7 @@ class cycle_iterator_adapter { return old; } - __host__ __device__ const cycle_iterator_adapter& operator++(int)const { + __host__ __device__ const cycle_iterator_adapter& operator++(int) const { cycle_iterator_adapter old(m_begin, m_end, m_current); if (m_end == (m_current + 1)) m_current = m_begin; diff --git a/paddle/fluid/framework/fleet/heter_ps/gpu_graph_node.h b/paddle/fluid/framework/fleet/heter_ps/gpu_graph_node.h index 19c355c671a..2e7588d0ac4 100644 --- a/paddle/fluid/framework/fleet/heter_ps/gpu_graph_node.h +++ b/paddle/fluid/framework/fleet/heter_ps/gpu_graph_node.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/platform/cuda_device_guard.h" @@ -284,6 +285,6 @@ struct NodeQueryResult { }; ~NodeQueryResult() {} }; -} -}; +} // namespace framework +}; // namespace paddle #endif diff --git a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_ps_table.h b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_ps_table.h index ae57c2ebe93..5831863f7f5 100644 --- a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_ps_table.h +++ b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_ps_table.h @@ -14,7 +14,9 @@ #pragma once #include + #include + #include "heter_comm.h" #include "paddle/fluid/distributed/ps/table/common_graph_table.h" #include "paddle/fluid/framework/fleet/heter_ps/gpu_graph_node.h" @@ -123,7 +125,7 @@ class GpuPsGraphTable : public HeterComm { std::condition_variable cv_; int cpu_table_status; }; -} -}; +} // namespace framework +}; // namespace paddle //#include "paddle/fluid/framework/fleet/heter_ps/graph_gpu_ps_table_inl.h" #endif diff --git a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_ps_table_inl.cu b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_ps_table_inl.cu index 72b9cae41c0..ab33d2a9c05 100644 --- a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_ps_table_inl.cu +++ b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_ps_table_inl.cu @@ -15,6 +15,7 @@ #include #include #include + #include #pragma once #ifdef PADDLE_WITH_HETERPS @@ -859,11 +860,10 @@ NeighborSampleResult GpuPsGraphTable::graph_neighbor_sample_v2( constexpr int TILE_SIZE = BLOCK_WARPS * 16; const dim3 block(WARP_SIZE, BLOCK_WARPS); const dim3 grid((shard_len + TILE_SIZE - 1) / TILE_SIZE); - neighbor_sample_example_v2< - WARP_SIZE, BLOCK_WARPS, - TILE_SIZE><<remote_stream(i, gpu_id)>>>( - graph, id_array, actual_size_array, sample_array, sample_size, - shard_len, default_value); + neighbor_sample_example_v2 + <<remote_stream(i, gpu_id)>>>( + graph, id_array, actual_size_array, sample_array, sample_size, + shard_len, default_value); } for (int i = 0; i < total_gpu; ++i) { @@ -946,12 +946,12 @@ NeighborSampleResult GpuPsGraphTable::graph_neighbor_sample_v2( constexpr int TILE_SIZE_ = BLOCK_WARPS_ * 16; const dim3 block2(WARP_SIZE_, BLOCK_WARPS_); const dim3 grid2((number_on_cpu + TILE_SIZE_ - 1) / TILE_SIZE_); - copy_buffer_ac_to_final_place<<>>( - gpu_buffers_ptr, gpu_ac_ptr, val, actual_sample_size, - thrust::raw_pointer_cast(t_index.data()) + 1, - thrust::raw_pointer_cast(cumsum_gpu_ac.data()), number_on_cpu, - sample_size); + copy_buffer_ac_to_final_place + <<>>( + gpu_buffers_ptr, gpu_ac_ptr, val, actual_sample_size, + thrust::raw_pointer_cast(t_index.data()) + 1, + thrust::raw_pointer_cast(cumsum_gpu_ac.data()), number_on_cpu, + sample_size); delete[] merge_buffers; delete[] cpu_keys; @@ -1027,13 +1027,13 @@ NodeQueryResult GpuPsGraphTable::query_node_list(int gpu_id, int start, local_begin_pos = [0,3] sample_size = [2,3] */ - std::function range_check = []( - int x, int y, int x1, int y1, int& x2, int& y2) { - if (y <= x1 || x >= y1) return 0; - y2 = min(y, y1); - x2 = max(x1, x); - return y2 - x2; - }; + std::function range_check = + [](int x, int y, int x1, int y1, int& x2, int& y2) { + if (y <= x1 || x >= y1) return 0; + y2 = min(y, y1); + x2 = max(x1, x); + return y2 - x2; + }; auto graph = gpu_graph_list[gpu_id]; if (graph.node_size == 0) { return result; @@ -1106,6 +1106,6 @@ NodeQueryResult GpuPsGraphTable::query_node_list(int gpu_id, int start, return result; */ } -} -}; +} // namespace framework +}; // namespace paddle #endif diff --git a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu index c976bb67cb2..43f0101009d 100644 --- a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu +++ b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.cu @@ -271,5 +271,5 @@ void GraphGpuWrapper::export_partition_files(int idx, std::string file_path) { ->cpu_graph_table->export_partition_files(idx, file_path); } #endif -} -}; +} // namespace framework +}; // namespace paddle diff --git a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.h b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.h index a34e752fc7e..d3c4dea5890 100644 --- a/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.h +++ b/paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.h @@ -16,6 +16,7 @@ #include #include #include + #include "paddle/fluid/distributed/ps/table/common_graph_table.h" #include "paddle/fluid/framework/fleet/heter_ps/gpu_graph_node.h" namespace paddle { @@ -73,5 +74,5 @@ class GraphGpuWrapper { void* graph_table; }; #endif -} -}; +} // namespace framework +}; // namespace paddle diff --git a/paddle/fluid/framework/fleet/heter_ps/graph_sampler.h b/paddle/fluid/framework/fleet/heter_ps/graph_sampler.h index a7c043f1edf..7cec4fcfb83 100644 --- a/paddle/fluid/framework/fleet/heter_ps/graph_sampler.h +++ b/paddle/fluid/framework/fleet/heter_ps/graph_sampler.h @@ -14,6 +14,7 @@ #pragma once #include + #include #include #include @@ -23,6 +24,7 @@ #include #include #include + #include "paddle/fluid/distributed/ps/table/common_graph_table.h" #include "paddle/fluid/framework/fleet/heter_ps/gpu_graph_node.h" #include "paddle/fluid/framework/fleet/heter_ps/graph_gpu_ps_table.h" @@ -106,7 +108,7 @@ class AllInGpuGraphSampler : public GraphSampler { // std::shared_ptr random; int gpu_num; }; -} -}; +} // namespace framework +}; // namespace paddle #include "paddle/fluid/framework/fleet/heter_ps/graph_sampler_inl.h" #endif diff --git a/paddle/fluid/framework/fleet/heter_ps/graph_sampler_inl.h b/paddle/fluid/framework/fleet/heter_ps/graph_sampler_inl.h index ad4b00b11aa..e68612d57e2 100644 --- a/paddle/fluid/framework/fleet/heter_ps/graph_sampler_inl.h +++ b/paddle/fluid/framework/fleet/heter_ps/graph_sampler_inl.h @@ -156,6 +156,6 @@ void AllInGpuGraphSampler::init(GpuPsGraphTable *g, this->gpu_num = g->gpu_num; graph_table = g->cpu_graph_table.get(); } -} -}; +} // namespace framework +}; // namespace paddle #endif diff --git a/paddle/fluid/framework/fleet/heter_ps/hashtable.h b/paddle/fluid/framework/fleet/heter_ps/hashtable.h index 234aa15ebf7..112a59c8fec 100644 --- a/paddle/fluid/framework/fleet/heter_ps/hashtable.h +++ b/paddle/fluid/framework/fleet/heter_ps/hashtable.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #ifdef PADDLE_WITH_HETERPS #include + #include #include #include @@ -36,6 +37,7 @@ limitations under the License. */ #include "thrust/pair.h" #elif defined(__xpu__) #include + #include "xpu/kernel/cluster_header.h" #include "xpu/kernel/math.h" #include "xpu/kernel/simd.h" diff --git a/paddle/fluid/framework/fleet/heter_ps/hashtable_kernel.cu b/paddle/fluid/framework/fleet/heter_ps/hashtable_kernel.cu index 57741c2c19b..c2e6cdc5c69 100644 --- a/paddle/fluid/framework/fleet/heter_ps/hashtable_kernel.cu +++ b/paddle/fluid/framework/fleet/heter_ps/hashtable_kernel.cu @@ -14,6 +14,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_HETERPS #include + #include "paddle/fluid/framework/fleet/heter_ps/hashtable.h" #include "paddle/fluid/framework/fleet/heter_ps/optimizer.cuh.h" @@ -366,10 +367,10 @@ template class HashTable; template class HashTable; template class HashTable; -template void HashTable::get< - cudaStream_t>(const unsigned long* d_keys, - paddle::framework::FeatureValue* d_vals, size_t len, - cudaStream_t stream); +template void +HashTable::get( + const unsigned long* d_keys, paddle::framework::FeatureValue* d_vals, + size_t len, cudaStream_t stream); template void HashTable::get( @@ -395,10 +396,10 @@ template void HashTable::get( // const unsigned long* d_keys, char* d_vals, size_t len, cudaStream_t // stream); -template void HashTable::insert< - cudaStream_t>(const unsigned long* d_keys, - const paddle::framework::FeatureValue* d_vals, size_t len, - cudaStream_t stream); +template void +HashTable::insert( + const unsigned long* d_keys, const paddle::framework::FeatureValue* d_vals, + size_t len, cudaStream_t stream); template void HashTable:: insert(const unsigned long* d_keys, size_t len, char* pool, @@ -438,21 +439,22 @@ template void HashTable::update< paddle::framework::FeaturePushValue>, cudaStream_t>(const unsigned long* d_keys, const paddle::framework::FeaturePushValue* d_grads, - size_t len, Optimizer - sgd, - cudaStream_t stream); - -template void -HashTable::update< - Optimizer, - cudaStream_t>(const unsigned long* d_keys, const char* d_grads, size_t len, + size_t len, Optimizer sgd, cudaStream_t stream); +template void HashTable:: + update, + cudaStream_t>(const unsigned long* d_keys, const char* d_grads, + size_t len, + Optimizer + sgd, + cudaStream_t stream); + // template void HashTable::update< // Optimizer #include + #include "cub/cub.cuh" #include "cub/util_allocator.cuh" #if defined(PADDLE_WITH_CUDA) @@ -26,6 +27,7 @@ limitations under the License. */ #elif defined(PADDLE_WITH_XPU_KP) // #include "paddle/fluid/framework/fleet/heter_ps/optimizer_conf.h" #include + #include "paddle/fluid/platform/device/xpu/enforce_xpu.h" #endif diff --git a/paddle/fluid/framework/fleet/heter_ps/heter_comm_inl.h b/paddle/fluid/framework/fleet/heter_ps/heter_comm_inl.h index 64b177abb86..38a4e7b7bb1 100644 --- a/paddle/fluid/framework/fleet/heter_ps/heter_comm_inl.h +++ b/paddle/fluid/framework/fleet/heter_ps/heter_comm_inl.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #ifdef PADDLE_WITH_HETERPS #include + #include "paddle/fluid/framework/fleet/heter_ps/feature_value.h" #include "paddle/fluid/framework/fleet/heter_ps/heter_comm_kernel.h" #include "paddle/fluid/platform/device_context.h" diff --git a/paddle/fluid/framework/fleet/heter_ps/heter_comm_kernel.cu b/paddle/fluid/framework/fleet/heter_ps/heter_comm_kernel.cu index 94d7929b294..a5ee8e2ff83 100644 --- a/paddle/fluid/framework/fleet/heter_ps/heter_comm_kernel.cu +++ b/paddle/fluid/framework/fleet/heter_ps/heter_comm_kernel.cu @@ -294,10 +294,10 @@ template void HeterCommKernel::fill_idx( template void HeterCommKernel::calc_shard_offset( int* idx, int* left, int* right, long long len, int total_devs, const cudaStream_t& stream); -template void HeterCommKernel::calc_shard_index< - unsigned long, int, cudaStream_t>(unsigned long* d_keys, long long len, - int* shard_index, int total_devs, - const cudaStream_t& stream); +template void +HeterCommKernel::calc_shard_index( + unsigned long* d_keys, long long len, int* shard_index, int total_devs, + const cudaStream_t& stream); template void HeterCommKernel::calc_shard_index( long* d_keys, long long len, int* shard_index, int total_devs, diff --git a/paddle/fluid/framework/fleet/heter_ps/heter_ps.cc b/paddle/fluid/framework/fleet/heter_ps/heter_ps.cc index 700b43f18fb..fe8e8c86505 100644 --- a/paddle/fluid/framework/fleet/heter_ps/heter_ps.cc +++ b/paddle/fluid/framework/fleet/heter_ps/heter_ps.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/fleet/heter_ps/heter_ps.h" + #include #ifdef PADDLE_WITH_HETERPS diff --git a/paddle/fluid/framework/fleet/heter_ps/heter_ps.cu b/paddle/fluid/framework/fleet/heter_ps/heter_ps.cu index 43b84ee5d26..cfe46626294 100644 --- a/paddle/fluid/framework/fleet/heter_ps/heter_ps.cu +++ b/paddle/fluid/framework/fleet/heter_ps/heter_ps.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/fleet/heter_ps/heter_ps.h" #ifdef PADDLE_WITH_HETERPS diff --git a/paddle/fluid/framework/fleet/heter_ps/heter_ps.h b/paddle/fluid/framework/fleet/heter_ps/heter_ps.h index 8449a4048b7..83dc232bc6a 100644 --- a/paddle/fluid/framework/fleet/heter_ps/heter_ps.h +++ b/paddle/fluid/framework/fleet/heter_ps/heter_ps.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/fleet/heter_ps/heter_comm.h" #include "paddle/fluid/framework/fleet/heter_ps/heter_ps_base.h" #if defined(PADDLE_WITH_CUDA) diff --git a/paddle/fluid/framework/fleet/heter_ps/heter_ps_base.h b/paddle/fluid/framework/fleet/heter_ps/heter_ps_base.h index 2c312e9d4d6..fe44c81fe44 100644 --- a/paddle/fluid/framework/fleet/heter_ps/heter_ps_base.h +++ b/paddle/fluid/framework/fleet/heter_ps/heter_ps_base.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/fleet/heter_ps/feature_value.h" #include "paddle/fluid/framework/fleet/heter_ps/heter_resource.h" #include "paddle/fluid/framework/fleet/heter_ps/optimizer_conf.h" diff --git a/paddle/fluid/framework/fleet/heter_ps/heter_resource.h b/paddle/fluid/framework/fleet/heter_ps/heter_resource.h index 5717f44d400..087877818f5 100644 --- a/paddle/fluid/framework/fleet/heter_ps/heter_resource.h +++ b/paddle/fluid/framework/fleet/heter_ps/heter_resource.h @@ -24,6 +24,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU_KP #include // NOLINT + #include "paddle/fluid/platform/device/xpu/xpu_info.h" #endif diff --git a/paddle/fluid/framework/fleet/heter_ps/optimizer.cuh.h b/paddle/fluid/framework/fleet/heter_ps/optimizer.cuh.h index 4684b4a0bc1..82090ef4817 100644 --- a/paddle/fluid/framework/fleet/heter_ps/optimizer.cuh.h +++ b/paddle/fluid/framework/fleet/heter_ps/optimizer.cuh.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #endif #include + #include "paddle/fluid/framework/fleet/heter_ps/feature_value.h" #include "paddle/fluid/framework/fleet/heter_ps/optimizer_conf.h" diff --git a/paddle/fluid/framework/fleet/heter_ps/test_comm.cu b/paddle/fluid/framework/fleet/heter_ps/test_comm.cu index 3a6ed50ad8e..72fa0282066 100644 --- a/paddle/fluid/framework/fleet/heter_ps/test_comm.cu +++ b/paddle/fluid/framework/fleet/heter_ps/test_comm.cu @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include + #include "paddle/fluid/framework/fleet/heter_ps/feature_value.h" #include "paddle/fluid/framework/fleet/heter_ps/heter_comm.h" #include "paddle/fluid/framework/fleet/heter_ps/heter_resource.h" diff --git a/paddle/fluid/framework/fleet/heter_ps/test_cpu_graph_sample.cu b/paddle/fluid/framework/fleet/heter_ps/test_cpu_graph_sample.cu index 62a0df94300..621c7f5bab4 100644 --- a/paddle/fluid/framework/fleet/heter_ps/test_cpu_graph_sample.cu +++ b/paddle/fluid/framework/fleet/heter_ps/test_cpu_graph_sample.cu @@ -13,8 +13,10 @@ // limitations under the License. #include + #include #include + #include "paddle/fluid/framework/fleet/heter_ps/feature_value.h" #include "paddle/fluid/framework/fleet/heter_ps/graph_gpu_ps_table.h" #include "paddle/fluid/framework/fleet/heter_ps/heter_comm.h" diff --git a/paddle/fluid/framework/fleet/heter_ps/test_cpu_query.cu b/paddle/fluid/framework/fleet/heter_ps/test_cpu_query.cu index ff3cd9d2d04..49e9a051ec0 100644 --- a/paddle/fluid/framework/fleet/heter_ps/test_cpu_query.cu +++ b/paddle/fluid/framework/fleet/heter_ps/test_cpu_query.cu @@ -13,8 +13,10 @@ // limitations under the License. #include + #include #include + #include "paddle/fluid/framework/fleet/heter_ps/feature_value.h" #include "paddle/fluid/framework/fleet/heter_ps/graph_gpu_ps_table.h" #include "paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.h" diff --git a/paddle/fluid/framework/fleet/heter_ps/test_graph.cu b/paddle/fluid/framework/fleet/heter_ps/test_graph.cu index 06c7026eb51..28098181b6c 100644 --- a/paddle/fluid/framework/fleet/heter_ps/test_graph.cu +++ b/paddle/fluid/framework/fleet/heter_ps/test_graph.cu @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include + #include "paddle/fluid/framework/fleet/heter_ps/feature_value.h" #include "paddle/fluid/framework/fleet/heter_ps/graph_gpu_ps_table.h" #include "paddle/fluid/framework/fleet/heter_ps/heter_comm.h" diff --git a/paddle/fluid/framework/fleet/heter_ps/test_sample_rate.cu b/paddle/fluid/framework/fleet/heter_ps/test_sample_rate.cu index affa60d022e..a1e8f06368b 100644 --- a/paddle/fluid/framework/fleet/heter_ps/test_sample_rate.cu +++ b/paddle/fluid/framework/fleet/heter_ps/test_sample_rate.cu @@ -13,6 +13,8 @@ // limitations under the License. #include + +#include #include // NOLINT #include #include @@ -20,32 +22,30 @@ #include // NOLINT #include #include -#include "google/protobuf/text_format.h" -#include +#include "google/protobuf/text_format.h" #include "gtest/gtest.h" #include "paddle/fluid/distributed/ps.pb.h" #include "paddle/fluid/distributed/ps/service/env.h" #include "paddle/fluid/distributed/ps/service/sendrecv.pb.h" #include "paddle/fluid/distributed/ps/table/common_graph_table.h" #include "paddle/fluid/distributed/ps/table/graph/graph_node.h" +#include "paddle/fluid/framework/fleet/heter_ps/feature_value.h" +#include "paddle/fluid/framework/fleet/heter_ps/graph_gpu_ps_table.h" +#include "paddle/fluid/framework/fleet/heter_ps/graph_sampler.h" +#include "paddle/fluid/framework/fleet/heter_ps/heter_comm.h" +#include "paddle/fluid/framework/fleet/heter_ps/heter_resource.h" +#include "paddle/fluid/framework/fleet/heter_ps/optimizer.cuh.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/variable.h" +#include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/printf.h" #include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/fluid/framework/fleet/heter_ps/feature_value.h" -#include "paddle/fluid/framework/fleet/heter_ps/graph_gpu_ps_table.h" -#include "paddle/fluid/framework/fleet/heter_ps/graph_sampler.h" -#include "paddle/fluid/framework/fleet/heter_ps/heter_comm.h" -#include "paddle/fluid/framework/fleet/heter_ps/heter_resource.h" -#include "paddle/fluid/framework/fleet/heter_ps/optimizer.cuh.h" -#include "paddle/fluid/platform/cuda_device_guard.h" - using namespace paddle::framework; namespace platform = paddle::platform; namespace operators = paddle::operators; diff --git a/paddle/fluid/framework/fleet/metrics.cc b/paddle/fluid/framework/fleet/metrics.cc index 56bc568460b..42252816405 100644 --- a/paddle/fluid/framework/fleet/metrics.cc +++ b/paddle/fluid/framework/fleet/metrics.cc @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/framework/lod_tensor.h" #if defined(PADDLE_WITH_PSLIB) || defined(PADDLE_WITH_PSCORE) @@ -63,10 +64,12 @@ void BasicAucCalculator::add_data(const float* d_pred, const int64_t* d_label, } void BasicAucCalculator::add_unlock_data(double pred, int label) { - PADDLE_ENFORCE_GE(pred, 0.0, platform::errors::PreconditionNotMet( - "pred should be greater than 0")); - PADDLE_ENFORCE_LE(pred, 1.0, platform::errors::PreconditionNotMet( - "pred should be lower than 1")); + PADDLE_ENFORCE_GE( + pred, 0.0, + platform::errors::PreconditionNotMet("pred should be greater than 0")); + PADDLE_ENFORCE_LE( + pred, 1.0, + platform::errors::PreconditionNotMet("pred should be lower than 1")); PADDLE_ENFORCE_EQ( label * label, label, platform::errors::PreconditionNotMet( @@ -272,10 +275,12 @@ void BasicAucCalculator::add_uid_data(const float* d_pred, void BasicAucCalculator::add_uid_unlock_data(double pred, int label, uint64_t uid) { - PADDLE_ENFORCE_GE(pred, 0.0, platform::errors::PreconditionNotMet( - "pred should be greater than 0")); - PADDLE_ENFORCE_LE(pred, 1.0, platform::errors::PreconditionNotMet( - "pred should be lower than 1")); + PADDLE_ENFORCE_GE( + pred, 0.0, + platform::errors::PreconditionNotMet("pred should be greater than 0")); + PADDLE_ENFORCE_LE( + pred, 1.0, + platform::errors::PreconditionNotMet("pred should be lower than 1")); PADDLE_ENFORCE_EQ( label * label, label, platform::errors::PreconditionNotMet( diff --git a/paddle/fluid/framework/fleet/metrics.h b/paddle/fluid/framework/fleet/metrics.h index 69b242664bb..7c3ea1b5512 100644 --- a/paddle/fluid/framework/fleet/metrics.h +++ b/paddle/fluid/framework/fleet/metrics.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include #include #include @@ -35,6 +36,7 @@ limitations under the License. */ #if defined(PADDLE_WITH_GLOO) #include + #include "paddle/fluid/framework/fleet/gloo_wrapper.h" #endif diff --git a/paddle/fluid/framework/fleet/ps_gpu_wrapper.cu b/paddle/fluid/framework/fleet/ps_gpu_wrapper.cu index 488a9ef8ce7..fbe76696114 100644 --- a/paddle/fluid/framework/fleet/ps_gpu_wrapper.cu +++ b/paddle/fluid/framework/fleet/ps_gpu_wrapper.cu @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/fleet/heter_ps/optimizer_conf.h" #include "paddle/fluid/framework/fleet/ps_gpu_wrapper.h" #include "paddle/fluid/framework/lod_tensor.h" diff --git a/paddle/fluid/framework/fleet/ps_gpu_wrapper.h b/paddle/fluid/framework/fleet/ps_gpu_wrapper.h index 0efec57e59d..7ddc5a1f6dd 100644 --- a/paddle/fluid/framework/fleet/ps_gpu_wrapper.h +++ b/paddle/fluid/framework/fleet/ps_gpu_wrapper.h @@ -27,6 +27,7 @@ limitations under the License. */ #include #ifdef PADDLE_WITH_GLOO #include + #include "paddle/fluid/framework/data_set.h" #include "paddle/fluid/framework/fleet/gloo_wrapper.h" #endif diff --git a/paddle/fluid/framework/fleet/test_fleet.cc b/paddle/fluid/framework/fleet/test_fleet.cc index 24f3e6bed64..34aea9de3b1 100644 --- a/paddle/fluid/framework/fleet/test_fleet.cc +++ b/paddle/fluid/framework/fleet/test_fleet.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "paddle/fluid/framework/fleet/fleet_wrapper.h" #include "paddle/fluid/framework/fleet/gloo_wrapper.h" #include "paddle/fluid/string/string_helper.h" diff --git a/paddle/fluid/framework/generator.cc b/paddle/fluid/framework/generator.cc index b621eca35b8..e3b9fe3626d 100644 --- a/paddle/fluid/framework/generator.cc +++ b/paddle/fluid/framework/generator.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/generator.h" #include + #include #include diff --git a/paddle/fluid/framework/generator.h b/paddle/fluid/framework/generator.h index 35efc1bee33..f62e8f74d26 100644 --- a/paddle/fluid/framework/generator.h +++ b/paddle/fluid/framework/generator.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include #include #include // temp for debug diff --git a/paddle/fluid/framework/gpu_utils.h b/paddle/fluid/framework/gpu_utils.h index 37c9852a1ab..9c59333000e 100644 --- a/paddle/fluid/framework/gpu_utils.h +++ b/paddle/fluid/framework/gpu_utils.h @@ -17,6 +17,7 @@ #define EIGEN_USE_GPU #include + #include "paddle/fluid/platform/enforce.h" #include "unsupported/Eigen/CXX11/Tensor" @@ -104,15 +105,17 @@ ConvertTensorIndex(int index, const Dim3& dims) { template IntType CeilOrFloor(IntType x, IntType deviser) { - PADDLE_ENFORCE_GT(deviser, 0, platform::errors::InvalidArgument( - "deviser should be greater than 0, " - "but received is:%d", - deviser)); + PADDLE_ENFORCE_GT( + deviser, 0, + platform::errors::InvalidArgument("deviser should be greater than 0, " + "but received is:%d", + deviser)); PADDLE_ENFORCE_GT( - x, 0, platform::errors::InvalidArgument("input should be greater than 0, " - "but received is:%d", - x)); + x, 0, + platform::errors::InvalidArgument("input should be greater than 0, " + "but received is:%d", + x)); const IntType round_to_zero = x / deviser; const IntType inte_result = round_to_zero * deviser; diff --git a/paddle/fluid/framework/grad_op_desc_maker.h b/paddle/fluid/framework/grad_op_desc_maker.h index ebbfd446a03..81f17be867f 100644 --- a/paddle/fluid/framework/grad_op_desc_maker.h +++ b/paddle/fluid/framework/grad_op_desc_maker.h @@ -20,6 +20,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_call_stack.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/operator.h" @@ -157,8 +158,9 @@ class GradOpDescMakerBase { const Attribute& GetAttr(const std::string& name) const { auto& map = fwd_op_.GetAttrMap(); auto it = map.find(name); - PADDLE_ENFORCE_NE(it, map.end(), platform::errors::NotFound( - "Cannot find attribute (%s).", name)); + PADDLE_ENFORCE_NE( + it, map.end(), + platform::errors::NotFound("Cannot find attribute (%s).", name)); return it->second; } diff --git a/paddle/fluid/framework/heter_service.h b/paddle/fluid/framework/heter_service.h index 9d0e3c50953..6b115d33d2f 100644 --- a/paddle/fluid/framework/heter_service.h +++ b/paddle/fluid/framework/heter_service.h @@ -22,6 +22,7 @@ limitations under the License. */ #include // NOLINT #include // NOLINT #include + #include "paddle/fluid/framework/heter_service.pb.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" diff --git a/paddle/fluid/framework/hetercpu_worker.cc b/paddle/fluid/framework/hetercpu_worker.cc index 75cc18887da..85e44ec44c6 100644 --- a/paddle/fluid/framework/hetercpu_worker.cc +++ b/paddle/fluid/framework/hetercpu_worker.cc @@ -311,8 +311,8 @@ void HeterCpuWorker::CollectLabelInfo(std::shared_ptr task, continue; } LoDTensor* tensor = fea_var->GetMutable(); - CHECK(tensor != nullptr) << "tensor of var " - << sparse_key_names_[table_id][i] << " is null"; + CHECK(tensor != nullptr) + << "tensor of var " << sparse_key_names_[table_id][i] << " is null"; // skip slots which do not have embedding Variable* emb_var = scope->FindVar(sparse_value_names_[table_id][i]); @@ -465,9 +465,9 @@ void HeterCpuWorker::AdjustInsWeight(std::shared_ptr task) { float* ins_weights = ins_weight_tensor->data(); size_t len = ins_weight_tensor->numel(); // len = batch size // here we assume nid_show slot only has one feasign in each instance - CHECK(len == nid_show_.size()) << "ins_weight size should be equal to " - << "nid_show size, " << len << " vs " - << nid_show_.size(); + CHECK(len == nid_show_.size()) + << "ins_weight size should be equal to " + << "nid_show size, " << len << " vs " << nid_show_.size(); float nid_adjw_threshold = adjust_ins_weight_config_.nid_adjw_threshold(); float nid_adjw_ratio = adjust_ins_weight_config_.nid_adjw_ratio(); int64_t nid_adjw_num = 0; @@ -482,9 +482,8 @@ void HeterCpuWorker::AdjustInsWeight(std::shared_ptr task) { } float ins_weight = 1.0; if (nid_show >= 0 && nid_show < nid_adjw_threshold) { - ins_weight = log(M_E + - (nid_adjw_threshold - nid_show) / nid_adjw_threshold * - nid_adjw_ratio); + ins_weight = log(M_E + (nid_adjw_threshold - nid_show) / + nid_adjw_threshold * nid_adjw_ratio); // count nid adjw insnum and weight ++nid_adjw_num; nid_adjw_weight += ins_weight; @@ -579,15 +578,15 @@ void HeterCpuWorker::CopyDenseVars() { Variable* src_var = thread_scope_->FindVar(src_var_name); CHECK(src_var != nullptr) << src_var_name << " not found"; // NOLINT LoDTensor* src_tensor = src_var->GetMutable(); - CHECK(src_tensor != nullptr) << src_var_name - << " tensor is null"; // NOLINT + CHECK(src_tensor != nullptr) + << src_var_name << " tensor is null"; // NOLINT float* src_data = src_tensor->data(); Variable* dest_var = thread_scope_->FindVar(dest_var_name); CHECK(dest_var != nullptr) << dest_var_name << " not found"; // NOLINT LoDTensor* dest_tensor = dest_var->GetMutable(); - CHECK(dest_tensor != nullptr) << dest_var_name - << " tensor is null"; // NOLINT + CHECK(dest_tensor != nullptr) + << dest_var_name << " tensor is null"; // NOLINT float* dest_data = dest_tensor->data(); CHECK(src_tensor->numel() == dest_tensor->numel()) diff --git a/paddle/fluid/framework/heterxpu_trainer.cc b/paddle/fluid/framework/heterxpu_trainer.cc index a4af56419a7..81c1a684959 100644 --- a/paddle/fluid/framework/heterxpu_trainer.cc +++ b/paddle/fluid/framework/heterxpu_trainer.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include + #include "io/fs.h" #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_feed_factory.h" diff --git a/paddle/fluid/framework/infershape_utils_test.cc b/paddle/fluid/framework/infershape_utils_test.cc index 2eeefb19a1a..805f992cf3e 100644 --- a/paddle/fluid/framework/infershape_utils_test.cc +++ b/paddle/fluid/framework/infershape_utils_test.cc @@ -12,13 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/infershape_utils.h" + #include #include #include "gtest/gtest.h" - #include "paddle/fluid/framework/attribute.h" -#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" diff --git a/paddle/fluid/framework/inplace_op_inference.h b/paddle/fluid/framework/inplace_op_inference.h index c46a77f0b35..93bbec251fe 100644 --- a/paddle/fluid/framework/inplace_op_inference.h +++ b/paddle/fluid/framework/inplace_op_inference.h @@ -15,6 +15,7 @@ #pragma once #include #include + #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/type_defs.h" diff --git a/paddle/fluid/framework/io/crypto/aes_cipher_test.cc b/paddle/fluid/framework/io/crypto/aes_cipher_test.cc index 7f923f597b6..67c758b012a 100644 --- a/paddle/fluid/framework/io/crypto/aes_cipher_test.cc +++ b/paddle/fluid/framework/io/crypto/aes_cipher_test.cc @@ -13,11 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/io/crypto/aes_cipher.h" + #include #include #include + #include #include + #include "paddle/fluid/framework/io/crypto/cipher_utils.h" namespace paddle { diff --git a/paddle/fluid/framework/io/crypto/cipher.cc b/paddle/fluid/framework/io/crypto/cipher.cc index eca175c020c..2001e8a416a 100644 --- a/paddle/fluid/framework/io/crypto/cipher.cc +++ b/paddle/fluid/framework/io/crypto/cipher.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/io/crypto/cipher.h" + #include "paddle/fluid/framework/io/crypto/aes_cipher.h" #include "paddle/fluid/framework/io/crypto/cipher_utils.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/io/crypto/cipher_utils.cc b/paddle/fluid/framework/io/crypto/cipher_utils.cc index ee9f06b2f3e..b622138f781 100644 --- a/paddle/fluid/framework/io/crypto/cipher_utils.cc +++ b/paddle/fluid/framework/io/crypto/cipher_utils.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/framework/io/crypto/cipher_utils.h" #include + #include #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/io/crypto/cipher_utils_test.cc b/paddle/fluid/framework/io/crypto/cipher_utils_test.cc index 928e2ced9b1..356c919cbcb 100644 --- a/paddle/fluid/framework/io/crypto/cipher_utils_test.cc +++ b/paddle/fluid/framework/io/crypto/cipher_utils_test.cc @@ -12,12 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/io/crypto/cipher_utils.h" + #include + #include #include -#include "paddle/fluid/framework/io/crypto/cipher_utils.h" - namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/io/fs.cc b/paddle/fluid/framework/io/fs.cc index b8aca886e7d..fd602895aae 100644 --- a/paddle/fluid/framework/io/fs.cc +++ b/paddle/fluid/framework/io/fs.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/io/fs.h" #include + #include #include "glog/logging.h" diff --git a/paddle/fluid/framework/io/fs.h b/paddle/fluid/framework/io/fs.h index 1ebe80e943a..088d4d97424 100644 --- a/paddle/fluid/framework/io/fs.h +++ b/paddle/fluid/framework/io/fs.h @@ -16,6 +16,7 @@ #include #include + #include #include #include diff --git a/paddle/fluid/framework/io/test_fs.cc b/paddle/fluid/framework/io/test_fs.cc index 49dee603200..adb6141fd56 100644 --- a/paddle/fluid/framework/io/test_fs.cc +++ b/paddle/fluid/framework/io/test_fs.cc @@ -13,7 +13,9 @@ // limitations under the License. #include + #include + #include "paddle/fluid/framework/io/fs.h" #if defined _WIN32 || defined __APPLE__ diff --git a/paddle/fluid/framework/ir/adaptive_pool2d_convert_global_pass_tester.cc b/paddle/fluid/framework/ir/adaptive_pool2d_convert_global_pass_tester.cc index 8870b68fbc5..e0ce58121a1 100644 --- a/paddle/fluid/framework/ir/adaptive_pool2d_convert_global_pass_tester.cc +++ b/paddle/fluid/framework/ir/adaptive_pool2d_convert_global_pass_tester.cc @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/adaptive_pool2d_convert_global_pass.h" - #include + +#include "paddle/fluid/framework/ir/adaptive_pool2d_convert_global_pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/ir/add_support_int8_pass.cc b/paddle/fluid/framework/ir/add_support_int8_pass.cc index 3a3f5c3741f..d38853bb964 100644 --- a/paddle/fluid/framework/ir/add_support_int8_pass.cc +++ b/paddle/fluid/framework/ir/add_support_int8_pass.cc @@ -68,9 +68,8 @@ void AddSupportInt8Pass::ApplyImpl(ir::Graph* graph) const { i++) { if (quanted_op_desc->Output(quanted_op_desc->OutputNames()[i]) .size() > 0 && - input_name == - quanted_op_desc->Output( - quanted_op_desc->OutputNames()[i])[0]) { + input_name == quanted_op_desc->Output( + quanted_op_desc->OutputNames()[i])[0]) { outscale_flag = true; quanted_op_desc->SetAttr( quanted_op_desc->OutputNames()[i], diff --git a/paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc b/paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc index 08e7c6f5b86..910cb5801db 100644 --- a/paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc +++ b/paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/framework/ir/coalesce_grad_tensor_pass.h" + #include #include + #include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/ir/graph_helper.h" diff --git a/paddle/fluid/framework/ir/conv_bn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/conv_bn_fuse_pass_tester.cc index ae843aad7d3..710f8ef1b37 100644 --- a/paddle/fluid/framework/ir/conv_bn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/conv_bn_fuse_pass_tester.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/conv_bn_fuse_pass.h" - #include + +#include "paddle/fluid/framework/ir/conv_bn_fuse_pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/cost_model.cc b/paddle/fluid/framework/ir/cost_model.cc index 6086409ffd9..05c7834c9ca 100644 --- a/paddle/fluid/framework/ir/cost_model.cc +++ b/paddle/fluid/framework/ir/cost_model.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/framework/ir/cost_model.h" #include + #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/platform/errors.h" diff --git a/paddle/fluid/framework/ir/cost_model_test.cc b/paddle/fluid/framework/ir/cost_model_test.cc index 57f3904d845..f5eaa2f0338 100644 --- a/paddle/fluid/framework/ir/cost_model_test.cc +++ b/paddle/fluid/framework/ir/cost_model_test.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/cost_model.h" + #include "gtest/gtest.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" diff --git a/paddle/fluid/framework/ir/cudnn_placement_pass_tester.cc b/paddle/fluid/framework/ir/cudnn_placement_pass_tester.cc index 2d270f444ad..2711ddf92d7 100644 --- a/paddle/fluid/framework/ir/cudnn_placement_pass_tester.cc +++ b/paddle/fluid/framework/ir/cudnn_placement_pass_tester.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/cudnn_placement_pass.h" - #include + +#include "paddle/fluid/framework/ir/cudnn_placement_pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/framework/operator.h" diff --git a/paddle/fluid/framework/ir/delete_dropout_op_pass.cc b/paddle/fluid/framework/ir/delete_dropout_op_pass.cc index 9473cc06928..5043beef824 100644 --- a/paddle/fluid/framework/ir/delete_dropout_op_pass.cc +++ b/paddle/fluid/framework/ir/delete_dropout_op_pass.cc @@ -11,10 +11,10 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include - #include "paddle/fluid/framework/ir/delete_dropout_op_pass.h" +#include + namespace phi { class DenseTensor; } // namespace phi diff --git a/paddle/fluid/framework/ir/delete_fill_constant_op_pass.cc b/paddle/fluid/framework/ir/delete_fill_constant_op_pass.cc index 79a06572d14..e4b6e43e5c3 100644 --- a/paddle/fluid/framework/ir/delete_fill_constant_op_pass.cc +++ b/paddle/fluid/framework/ir/delete_fill_constant_op_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/delete_fill_constant_op_pass.h" + #include "paddle/fluid/framework/ir/graph_pattern_detector.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/delete_quant_dequant_filter_op_pass.cc b/paddle/fluid/framework/ir/delete_quant_dequant_filter_op_pass.cc index 2fc133edb7a..a02efc0a7ce 100644 --- a/paddle/fluid/framework/ir/delete_quant_dequant_filter_op_pass.cc +++ b/paddle/fluid/framework/ir/delete_quant_dequant_filter_op_pass.cc @@ -102,9 +102,10 @@ void DeleteQuantDequantFilterOpPass::ApplyImpl(ir::Graph* graph) const { break; } } - PADDLE_ENFORCE_GT(arg_name.size(), 0, platform::errors::InvalidArgument( - "can not find the input %s.", - quant_dequant_op_out_name)); + PADDLE_ENFORCE_GT( + arg_name.size(), 0, + platform::errors::InvalidArgument("can not find the input %s.", + quant_dequant_op_out_name)); // any_op2_desc->SetAttr("enable_int8", true); any_op2_desc->SetAttr("bit_length", bit_length); diff --git a/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass_tester.cc b/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass_tester.cc index 727e42629f9..8deaf10d200 100644 --- a/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass_tester.cc @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.h" - #include + +#include "paddle/fluid/framework/ir/embedding_eltwise_layernorm_fuse_pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc index 482e38355c5..a34e0a5d1de 100644 --- a/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.h" #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass_tester.cc b/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass_tester.cc index 46a9b2eae35..be22ee9b2fe 100644 --- a/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass_tester.cc @@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass.h" - #include +#include "paddle/fluid/framework/ir/fc_elementwise_layernorm_fuse_pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/fc_fuse_pass.cc b/paddle/fluid/framework/ir/fc_fuse_pass.cc index 1e25b21483b..1802616c0df 100644 --- a/paddle/fluid/framework/ir/fc_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_fuse_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/fc_fuse_pass.h" + #include #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/framework/ir/fc_fuse_pass_tester.cc b/paddle/fluid/framework/ir/fc_fuse_pass_tester.cc index 39b544e7160..e40759cd3fb 100644 --- a/paddle/fluid/framework/ir/fc_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/fc_fuse_pass_tester.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/fc_fuse_pass.h" - #include + +#include "paddle/fluid/framework/ir/fc_fuse_pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h index df3fbc293b7..9ad3c28f09a 100644 --- a/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h +++ b/paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h @@ -13,9 +13,9 @@ // limitations under the License. #pragma once -#include "paddle/fluid/framework/ir/fc_gru_fuse_pass.h" - #include + +#include "paddle/fluid/framework/ir/fc_gru_fuse_pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc index b99e607f92b..5b4bb98ff53 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/fc_lstm_fuse_pass.h" + #include #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h index a313e49f0b2..3e47f079573 100644 --- a/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h +++ b/paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h @@ -14,9 +14,9 @@ #pragma once -#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass.h" - #include + +#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.h b/paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.h index ab66fb4a46a..632bb237fa2 100644 --- a/paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.h +++ b/paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.h @@ -14,6 +14,7 @@ #pragma once #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" diff --git a/paddle/fluid/framework/ir/fuse_bn_act_pass.cc b/paddle/fluid/framework/ir/fuse_bn_act_pass.cc index f12273e94dd..6a2a0867048 100644 --- a/paddle/fluid/framework/ir/fuse_bn_act_pass.cc +++ b/paddle/fluid/framework/ir/fuse_bn_act_pass.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/ir/fuse_bn_act_pass.h" + #include + #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/ir/fuse_bn_add_act_pass.cc b/paddle/fluid/framework/ir/fuse_bn_add_act_pass.cc index 005f006ab04..ff4850838c5 100644 --- a/paddle/fluid/framework/ir/fuse_bn_add_act_pass.cc +++ b/paddle/fluid/framework/ir/fuse_bn_add_act_pass.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/ir/fuse_bn_add_act_pass.h" + #include + #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc b/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc index 62f65baf336..3feea822bc1 100644 --- a/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc +++ b/paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/ir/fuse_elewise_add_act_pass.h" + #include + #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/ir/fuse_gemm_epilogue_pass.cc b/paddle/fluid/framework/ir/fuse_gemm_epilogue_pass.cc index b72a63d3785..1c6b856d987 100644 --- a/paddle/fluid/framework/ir/fuse_gemm_epilogue_pass.cc +++ b/paddle/fluid/framework/ir/fuse_gemm_epilogue_pass.cc @@ -14,7 +14,9 @@ // limitations under the License. #include "paddle/fluid/framework/ir/fuse_gemm_epilogue_pass.h" + #include + #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc index 0094b674c2a..9629b9209c4 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include + #include #include "glog/logging.h" diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc index f87d31cbc40..e290bdf99ce 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc @@ -67,8 +67,9 @@ class FuseMomentumOpPass : public FuseOptimizerOpPass { platform::errors::InvalidArgument( "All momentum Op's attr(use_nesterov) must be same, but there " "are two different value: %d, %d.", - use_nesterov, BOOST_GET_CONST(bool, momentum_op->Op()->GetAttr( - "use_nesterov")))); + use_nesterov, + BOOST_GET_CONST(bool, + momentum_op->Op()->GetAttr("use_nesterov")))); PADDLE_ENFORCE_EQ( op_role, BOOST_GET_CONST(int, momentum_op->Op()->GetAttr( diff --git a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc index 40e1de8a523..e3e5221531e 100644 --- a/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc +++ b/paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.h" + #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/operator.h" #include "paddle/phi/core/kernel_factory.h" diff --git a/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.cc b/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.cc index 56ca98b5660..bcfa69ac2e7 100644 --- a/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.cc +++ b/paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.cc @@ -13,10 +13,12 @@ // limitations under the License. #include "paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.h" + #include #include #include #include + #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/ir/fusion_group/code_generator.cc b/paddle/fluid/framework/ir/fusion_group/code_generator.cc index 5b125030a7a..a8a09d69023 100644 --- a/paddle/fluid/framework/ir/fusion_group/code_generator.cc +++ b/paddle/fluid/framework/ir/fusion_group/code_generator.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/fusion_group/code_generator.h" + #include "paddle/fluid/framework/ir/fusion_group/code_generator_helper.h" #include "paddle/fluid/framework/ir/fusion_group/cuda_resources.h" diff --git a/paddle/fluid/framework/ir/fusion_group/code_generator_helper.cc b/paddle/fluid/framework/ir/fusion_group/code_generator_helper.cc index 18bd6d623b7..650ed965067 100644 --- a/paddle/fluid/framework/ir/fusion_group/code_generator_helper.cc +++ b/paddle/fluid/framework/ir/fusion_group/code_generator_helper.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/ir/fusion_group/operation.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc b/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc index 7b6bbf02510..a24a9af158e 100644 --- a/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc +++ b/paddle/fluid/framework/ir/fusion_group/code_generator_tester.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include #include diff --git a/paddle/fluid/framework/ir/fusion_group/elementwise_group_detector.cc b/paddle/fluid/framework/ir/fusion_group/elementwise_group_detector.cc index 6fa3044affc..5be4091ca8b 100644 --- a/paddle/fluid/framework/ir/fusion_group/elementwise_group_detector.cc +++ b/paddle/fluid/framework/ir/fusion_group/elementwise_group_detector.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/fusion_group/elementwise_group_detector.h" + #include #include "paddle/fluid/framework/ir/fusion_group/operation.h" diff --git a/paddle/fluid/framework/ir/fusion_group/fusion_group_pass.cc b/paddle/fluid/framework/ir/fusion_group/fusion_group_pass.cc index 85d34405c5e..44df3a837f6 100644 --- a/paddle/fluid/framework/ir/fusion_group/fusion_group_pass.cc +++ b/paddle/fluid/framework/ir/fusion_group/fusion_group_pass.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/fusion_group/fusion_group_pass.h" + #include "paddle/fluid/framework/ir/fusion_group/code_generator.h" #include "paddle/fluid/framework/ir/fusion_group/elementwise_group_detector.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" diff --git a/paddle/fluid/framework/ir/fusion_group/fusion_group_pass_tester.cc b/paddle/fluid/framework/ir/fusion_group/fusion_group_pass_tester.cc index db22c03a7d9..402fad0e84c 100644 --- a/paddle/fluid/framework/ir/fusion_group/fusion_group_pass_tester.cc +++ b/paddle/fluid/framework/ir/fusion_group/fusion_group_pass_tester.cc @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/fusion_group/fusion_group_pass.h" - #include + +#include "paddle/fluid/framework/ir/fusion_group/fusion_group_pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/fusion_group/operation.cc b/paddle/fluid/framework/ir/fusion_group/operation.cc index 2b7a3e1899c..7d1b7bafa13 100644 --- a/paddle/fluid/framework/ir/fusion_group/operation.cc +++ b/paddle/fluid/framework/ir/fusion_group/operation.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/fusion_group/operation.h" + #include "paddle/fluid/framework/operator.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/fusion_group/subgraph.h b/paddle/fluid/framework/ir/fusion_group/subgraph.h index 5a29e875aea..1c334e70f1c 100644 --- a/paddle/fluid/framework/ir/fusion_group/subgraph.h +++ b/paddle/fluid/framework/ir/fusion_group/subgraph.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/ir/fusion_group/operation.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_traits.h" diff --git a/paddle/fluid/framework/ir/generate_pass.cc b/paddle/fluid/framework/ir/generate_pass.cc index 02c9d8e1c0c..00d69c9d5d2 100644 --- a/paddle/fluid/framework/ir/generate_pass.cc +++ b/paddle/fluid/framework/ir/generate_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/generate_pass.h" + #include "paddle/fluid/framework/ir/graph_pattern_detector.h" namespace paddle { @@ -234,178 +235,183 @@ bool IsDuplicatePattern(const GraphPatternDetector::subgraph_t& subgraph, GraphPatternDetector::handle_t GetGenerateDelete( const PDPattern& pattern, const proto::PassDesc& pass_desc) { - GraphPatternDetector::handle_t handler = [&]( - const GraphPatternDetector::subgraph_t& subgraph, Graph* graph) { - if (IsDuplicatePattern(subgraph, graph)) { - return; - } - // `var_node_maps` record the mapping of variable to the pattern subgraph. - std::map var_node_maps; - for (const proto::PassDesc::VarMap& var_map : pass_desc.var_maps()) { - Node* node = subgraph.at(pattern.RetrieveNode(var_map.pattern_var())); - const auto& iter = var_node_maps.find(var_map.replace_var()); - if (var_node_maps.end() == iter) { - // first node is input - var_node_maps.insert({var_map.replace_var(), node}); - } else { - // output node - for (Node* s_node : node->outputs) { - iter->second->outputs.push_back(s_node); - std::replace(s_node->inputs.begin(), s_node->inputs.end(), node, - iter->second); - s_node->Op()->RenameInput(node->Name(), iter->second->Name()); + GraphPatternDetector::handle_t handler = + [&](const GraphPatternDetector::subgraph_t& subgraph, Graph* graph) { + if (IsDuplicatePattern(subgraph, graph)) { + return; } - } - } - // Remove nodes that are intermediate. - std::unordered_set remove_nodes; - for (const std::unique_ptr& pdnode : pattern.nodes()) { - remove_nodes.emplace(subgraph.at(pdnode.get())); - } - for (auto iter : var_node_maps) { - remove_nodes.erase(iter.second); - } - GraphSafeRemoveNodes(graph, remove_nodes); - }; + // `var_node_maps` record the mapping of variable to the pattern + // subgraph. + std::map var_node_maps; + for (const proto::PassDesc::VarMap& var_map : pass_desc.var_maps()) { + Node* node = subgraph.at(pattern.RetrieveNode(var_map.pattern_var())); + const auto& iter = var_node_maps.find(var_map.replace_var()); + if (var_node_maps.end() == iter) { + // first node is input + var_node_maps.insert({var_map.replace_var(), node}); + } else { + // output node + for (Node* s_node : node->outputs) { + iter->second->outputs.push_back(s_node); + std::replace(s_node->inputs.begin(), s_node->inputs.end(), node, + iter->second); + s_node->Op()->RenameInput(node->Name(), iter->second->Name()); + } + } + } + // Remove nodes that are intermediate. + std::unordered_set remove_nodes; + for (const std::unique_ptr& pdnode : pattern.nodes()) { + remove_nodes.emplace(subgraph.at(pdnode.get())); + } + for (auto iter : var_node_maps) { + remove_nodes.erase(iter.second); + } + GraphSafeRemoveNodes(graph, remove_nodes); + }; return handler; } GraphPatternDetector::handle_t GetGenerateRewrite( const PDPattern& pattern, const proto::PassDesc& pass_desc) { - GraphPatternDetector::handle_t handler = [&]( - const GraphPatternDetector::subgraph_t& subgraph, Graph* graph) { - if (IsDuplicatePattern(subgraph, graph)) { - return; - } - for (const auto& condition : pass_desc.var_attr_conditions()) { - if (condition.has_condition_attr()) { - Node* node = - subgraph.at(pattern.RetrieveNode(condition.attr().var_name())); - Attribute node_attr = GetVarAttrValue(node->Var(), condition.attr()); - Attribute condition_attr; - if (condition.condition_attr().role() == - proto::PassDesc_RoleType_kVariable) { - Node* condition_node = - subgraph.at(pattern.RetrieveNode(condition.attr().var_name())); - condition_attr = GetVarAttrValue(condition_node->Var(), - condition.condition_attr()); - } else { - PADDLE_THROW( - platform::errors::Unimplemented("Unimplemented for operation.")); - } - bool check_failed = false; - if (condition.type() == proto::PassDesc_ConditionType_kEQ) { - check_failed = !(node_attr == condition_attr); - } - if (check_failed) { - VLOG(3) << "Check var [" << node->Name() << "] with attr [" - << condition.attr().name() << "] failed, skip this pattern."; + GraphPatternDetector::handle_t handler = + [&](const GraphPatternDetector::subgraph_t& subgraph, Graph* graph) { + if (IsDuplicatePattern(subgraph, graph)) { return; } - } - } - // `var_node_maps` record the mapping of variable to the pattern subgraph. - std::map var_node_maps; - for (const proto::PassDesc::VarMap& var_map : pass_desc.var_maps()) { - Node* node = subgraph.at(pattern.RetrieveNode(var_map.pattern_var())); - var_node_maps.insert({var_map.replace_var(), node}); - } - // Traverse all operators to create subgraph. - for (int index = 0; index < pass_desc.replace_size(); ++index) { - const proto::OpDesc& op = pass_desc.replace(index); - OpDesc op_desc; - std::vector in_nodes, out_nodes; - op_desc.SetType(op.type()); - // Create Nodes for inputs of current operator. - for (const proto::OpDesc::Var& var : op.inputs()) { - std::vector arguments; - for (const std::string& argument : var.arguments()) { - // The input may be mapped on the operator of pattern subgraph. - Node* node = nullptr; - auto iter = var_node_maps.find(argument); - if (var_node_maps.end() == iter) { - VarDesc var_desc(patterns::UniqueKey(argument)); - node = graph->CreateVarNode(&var_desc); - var_node_maps.insert({argument, node}); - } else { - node = iter->second; - } - in_nodes.push_back(node); - arguments.push_back(node->Name()); - } - op_desc.SetInput(var.parameter(), arguments); - } - // Create Nodes for outputs of current operator. - for (const proto::OpDesc::Var& var : op.outputs()) { - std::vector arguments; - for (const std::string& argument : var.arguments()) { - // The output may be mapped on the operator of pattern subgraph. - Node* node = nullptr; - auto iter = var_node_maps.find(argument); - if (var_node_maps.end() == iter) { - VarDesc var_desc(patterns::UniqueKey(argument)); - node = graph->CreateVarNode(&var_desc); - var_node_maps.insert({argument, node}); - } else { - if (in_nodes.end() == - std::find(in_nodes.begin(), in_nodes.end(), iter->second)) { - node = iter->second; + for (const auto& condition : pass_desc.var_attr_conditions()) { + if (condition.has_condition_attr()) { + Node* node = + subgraph.at(pattern.RetrieveNode(condition.attr().var_name())); + Attribute node_attr = + GetVarAttrValue(node->Var(), condition.attr()); + Attribute condition_attr; + if (condition.condition_attr().role() == + proto::PassDesc_RoleType_kVariable) { + Node* condition_node = subgraph.at( + pattern.RetrieveNode(condition.attr().var_name())); + condition_attr = GetVarAttrValue(condition_node->Var(), + condition.condition_attr()); } else { - node = graph->CreateVarNode(iter->second->Var()); + PADDLE_THROW(platform::errors::Unimplemented( + "Unimplemented for operation.")); + } + bool check_failed = false; + if (condition.type() == proto::PassDesc_ConditionType_kEQ) { + check_failed = !(node_attr == condition_attr); + } + if (check_failed) { + VLOG(3) << "Check var [" << node->Name() << "] with attr [" + << condition.attr().name() + << "] failed, skip this pattern."; + return; } } - out_nodes.push_back(node); - arguments.push_back(node->Name()); } - op_desc.SetOutput(var.parameter(), arguments); - } - // Set attribute for current operator. - for (const proto::OpDesc::Attr& attr : op.attrs()) { - op_desc.SetAttr(attr.name(), GetAttrValue(attr)); - } - for (const auto& attr_map : pass_desc.op_attr_maps()) { - if (attr_map.replace_attr().op_index() == index) { - Attribute attr; - if (attr_map.pattern_attr().role() == - proto::PassDesc_RoleType_kVariable) { - Node* condition_node = subgraph.at( - pattern.RetrieveNode(attr_map.pattern_attr().var_name())); - attr = - GetVarAttrValue(condition_node->Var(), attr_map.pattern_attr()); - } else { - Node* condition_node = subgraph.at(pattern.RetrieveNode( - std::to_string(attr_map.pattern_attr().op_index()))); - attr = - GetOpAttrValue(condition_node->Op(), attr_map.pattern_attr()); + // `var_node_maps` record the mapping of variable to the pattern + // subgraph. + std::map var_node_maps; + for (const proto::PassDesc::VarMap& var_map : pass_desc.var_maps()) { + Node* node = subgraph.at(pattern.RetrieveNode(var_map.pattern_var())); + var_node_maps.insert({var_map.replace_var(), node}); + } + // Traverse all operators to create subgraph. + for (int index = 0; index < pass_desc.replace_size(); ++index) { + const proto::OpDesc& op = pass_desc.replace(index); + OpDesc op_desc; + std::vector in_nodes, out_nodes; + op_desc.SetType(op.type()); + // Create Nodes for inputs of current operator. + for (const proto::OpDesc::Var& var : op.inputs()) { + std::vector arguments; + for (const std::string& argument : var.arguments()) { + // The input may be mapped on the operator of pattern subgraph. + Node* node = nullptr; + auto iter = var_node_maps.find(argument); + if (var_node_maps.end() == iter) { + VarDesc var_desc(patterns::UniqueKey(argument)); + node = graph->CreateVarNode(&var_desc); + var_node_maps.insert({argument, node}); + } else { + node = iter->second; + } + in_nodes.push_back(node); + arguments.push_back(node->Name()); + } + op_desc.SetInput(var.parameter(), arguments); + } + // Create Nodes for outputs of current operator. + for (const proto::OpDesc::Var& var : op.outputs()) { + std::vector arguments; + for (const std::string& argument : var.arguments()) { + // The output may be mapped on the operator of pattern subgraph. + Node* node = nullptr; + auto iter = var_node_maps.find(argument); + if (var_node_maps.end() == iter) { + VarDesc var_desc(patterns::UniqueKey(argument)); + node = graph->CreateVarNode(&var_desc); + var_node_maps.insert({argument, node}); + } else { + if (in_nodes.end() == + std::find(in_nodes.begin(), in_nodes.end(), iter->second)) { + node = iter->second; + } else { + node = graph->CreateVarNode(iter->second->Var()); + } + } + out_nodes.push_back(node); + arguments.push_back(node->Name()); + } + op_desc.SetOutput(var.parameter(), arguments); + } + // Set attribute for current operator. + for (const proto::OpDesc::Attr& attr : op.attrs()) { + op_desc.SetAttr(attr.name(), GetAttrValue(attr)); } - if (attr_map.has_operation()) { - Attribute operation = GetAttrValue(attr_map.operation().value()); - attr = boost::apply_visitor( - operation_visitor(attr_map.operation().type()), attr, - operation); + for (const auto& attr_map : pass_desc.op_attr_maps()) { + if (attr_map.replace_attr().op_index() == index) { + Attribute attr; + if (attr_map.pattern_attr().role() == + proto::PassDesc_RoleType_kVariable) { + Node* condition_node = subgraph.at( + pattern.RetrieveNode(attr_map.pattern_attr().var_name())); + attr = GetVarAttrValue(condition_node->Var(), + attr_map.pattern_attr()); + } else { + Node* condition_node = subgraph.at(pattern.RetrieveNode( + std::to_string(attr_map.pattern_attr().op_index()))); + attr = GetOpAttrValue(condition_node->Op(), + attr_map.pattern_attr()); + } + if (attr_map.has_operation()) { + Attribute operation = + GetAttrValue(attr_map.operation().value()); + attr = boost::apply_visitor( + operation_visitor(attr_map.operation().type()), attr, + operation); + } + op_desc.SetAttr(attr_map.replace_attr().name(), attr); + } + } + // Create a Node for current operator. + Node* op_node = graph->CreateOpNode(&op_desc); + for (Node* node : in_nodes) { + IR_NODE_LINK_TO(node, op_node); + } + for (Node* node : out_nodes) { + IR_NODE_LINK_TO(op_node, node); } - op_desc.SetAttr(attr_map.replace_attr().name(), attr); } - } - // Create a Node for current operator. - Node* op_node = graph->CreateOpNode(&op_desc); - for (Node* node : in_nodes) { - IR_NODE_LINK_TO(node, op_node); - } - for (Node* node : out_nodes) { - IR_NODE_LINK_TO(op_node, node); - } - } - // Remove nodes that are intermediate. - std::unordered_set remove_nodes; - for (const std::unique_ptr& pdnode : pattern.nodes()) { - remove_nodes.emplace(subgraph.at(pdnode.get())); - } - for (auto iter : var_node_maps) { - remove_nodes.erase(iter.second); - } - GraphSafeRemoveNodes(graph, remove_nodes); - }; + // Remove nodes that are intermediate. + std::unordered_set remove_nodes; + for (const std::unique_ptr& pdnode : pattern.nodes()) { + remove_nodes.emplace(subgraph.at(pdnode.get())); + } + for (auto iter : var_node_maps) { + remove_nodes.erase(iter.second); + } + GraphSafeRemoveNodes(graph, remove_nodes); + }; return handler; } diff --git a/paddle/fluid/framework/ir/generate_pass_tester.cc b/paddle/fluid/framework/ir/generate_pass_tester.cc index 6876dde50c1..7e98b11215a 100644 --- a/paddle/fluid/framework/ir/generate_pass_tester.cc +++ b/paddle/fluid/framework/ir/generate_pass_tester.cc @@ -12,16 +12,16 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/generate_pass.h" #include "gtest/gtest.h" +#include "paddle/fluid/framework/ir/generate_pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" REGISTER_GENERATE_PASS(generate_fc_fuse) { paddle::framework::ir::PassPairs pass_pairs; for (bool with_relu : {true, false}) { // pattern - SUBGRAPH_(pattern) = - [ subgraph = &pattern, with_relu ](VAR_(x), VAR_(y), VAR_(z)) { + SUBGRAPH_(pattern) = [subgraph = &pattern, with_relu](VAR_(x), VAR_(y), + VAR_(z)) { VLOG(3) << "exec lambda func."; auto mul = OP_(mul)({{"X", x}, {"Y", y}}).Out("Out"); auto ewadd = OP_(elementwise_add)({{"X", mul}, {"Y", z}}).Out("Out"); @@ -32,8 +32,8 @@ REGISTER_GENERATE_PASS(generate_fc_fuse) { } }; // replace - SUBGRAPH_(replace) = - [ subgraph = &replace, with_relu ](VAR_(x), VAR_(y), VAR_(z)) { + SUBGRAPH_(replace) = [subgraph = &replace, with_relu](VAR_(x), VAR_(y), + VAR_(z)) { auto& fc = OP_(fc)({{"Input", x}, {"W", y}, {"Bias", z}}); return fc.Out("Out"); }; diff --git a/paddle/fluid/framework/ir/gpu_cpu_map_matmul_to_mul_pass.cc b/paddle/fluid/framework/ir/gpu_cpu_map_matmul_to_mul_pass.cc index ac580b99b5c..8e58231e986 100644 --- a/paddle/fluid/framework/ir/gpu_cpu_map_matmul_to_mul_pass.cc +++ b/paddle/fluid/framework/ir/gpu_cpu_map_matmul_to_mul_pass.cc @@ -16,9 +16,9 @@ #include #include + #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_proto_maker.h" - #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/ir/graph.cc b/paddle/fluid/framework/ir/graph.cc index f5f6f3ecb85..acf8f6ec643 100644 --- a/paddle/fluid/framework/ir/graph.cc +++ b/paddle/fluid/framework/ir/graph.cc @@ -12,9 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/ir/graph.h" + #include -#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/operator.h" PADDLE_DEFINE_EXPORTED_bool(convert_all_blocks, true, diff --git a/paddle/fluid/framework/ir/graph.h b/paddle/fluid/framework/ir/graph.h index 10645f08dc3..40a6fbbade8 100644 --- a/paddle/fluid/framework/ir/graph.h +++ b/paddle/fluid/framework/ir/graph.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include #include #include diff --git a/paddle/fluid/framework/ir/graph_helper.cc b/paddle/fluid/framework/ir/graph_helper.cc index ed7aa451d13..d4c7a607db3 100644 --- a/paddle/fluid/framework/ir/graph_helper.cc +++ b/paddle/fluid/framework/ir/graph_helper.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/graph_helper.h" + #include #include + #include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/op_proto_maker.h" @@ -421,8 +423,9 @@ std::vector TopologySortGraphByDescOrder(const Graph &graph) { DescOrderComparator> adj_list = BuildOperationAdjList(graph); PADDLE_ENFORCE_EQ(HasCircleInternal(adj_list, nullptr), - false, platform::errors::InvalidArgument( - "Generated graph shouldn't contain cycle.")); + false, + platform::errors::InvalidArgument( + "Generated graph shouldn't contain cycle.")); std::unordered_set visited; std::vector ret; for (auto adj : adj_list) { diff --git a/paddle/fluid/framework/ir/graph_helper_test.cc b/paddle/fluid/framework/ir/graph_helper_test.cc index 0a2dcfed000..5972cd40817 100644 --- a/paddle/fluid/framework/ir/graph_helper_test.cc +++ b/paddle/fluid/framework/ir/graph_helper_test.cc @@ -12,9 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/graph.h" -#include "gtest/gtest.h" #include "paddle/fluid/framework/ir/graph_helper.h" + +#include "gtest/gtest.h" +#include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/program_desc.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/graph_pattern_detector.cc b/paddle/fluid/framework/ir/graph_pattern_detector.cc index ea101125b18..ca5a82708c5 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/graph_pattern_detector.h" + #include "paddle/fluid/framework/ir/graph_traits.h" #include "paddle/fluid/framework/ir/graph_viz_pass.h" #include "paddle/fluid/framework/operator.h" @@ -70,8 +71,9 @@ void PDPattern::AddEdge(PDNode *a, PDNode *b) { a, platform::errors::NotFound("PDNode %s is not found.", a->name())); PADDLE_ENFORCE_NOT_NULL( b, platform::errors::NotFound("PDNode %s is not found.", b->name())); - PADDLE_ENFORCE_NE(a, b, platform::errors::PermissionDenied( - "Cannot connect the same node in the graph.")); + PADDLE_ENFORCE_NE(a, b, + platform::errors::PermissionDenied( + "Cannot connect the same node in the graph.")); edges_.emplace_back(a, b); } @@ -3062,11 +3064,10 @@ PDNode *patterns::ReshapeTransposeMatmulPattern::operator()( transpose_out->assert_is_only_output_of_op("transpose2"); auto transpose_xshape = - with_transpose_xshape - ? pattern->NewNode(transpose_xshape_repr()) - ->AsIntermediate() - ->assert_is_op_output("transpose2", "XShape") - : nullptr; + with_transpose_xshape ? pattern->NewNode(transpose_xshape_repr()) + ->AsIntermediate() + ->assert_is_op_output("transpose2", "XShape") + : nullptr; auto matmul_out = pattern->NewNode(matmul_out_repr()) ->AsOutput() diff --git a/paddle/fluid/framework/ir/graph_pattern_detector_tester.cc b/paddle/fluid/framework/ir/graph_pattern_detector_tester.cc index 5ac5a5d9839..b02b2e13edc 100644 --- a/paddle/fluid/framework/ir/graph_pattern_detector_tester.cc +++ b/paddle/fluid/framework/ir/graph_pattern_detector_tester.cc @@ -152,12 +152,12 @@ TEST(GraphPatternDetecter, MultiSubgraph) { x.mutable_pattern()->AddEdge(any_var, any_op1); int count = 0; - GraphPatternDetector::handle_t handle = [&]( - const GraphPatternDetector::subgraph_t& s, Graph* g) { - LOG(INFO) << "Detect " << s.at(any_op)->Name() << " -> " - << s.at(any_var)->Name() << " -> " << s.at(any_op1)->Name(); - count++; - }; + GraphPatternDetector::handle_t handle = + [&](const GraphPatternDetector::subgraph_t& s, Graph* g) { + LOG(INFO) << "Detect " << s.at(any_op)->Name() << " -> " + << s.at(any_var)->Name() << " -> " << s.at(any_op1)->Name(); + count++; + }; x(&graph, handle); diff --git a/paddle/fluid/framework/ir/graph_printer.h b/paddle/fluid/framework/ir/graph_printer.h index 76b07f0d653..1b0e059f122 100644 --- a/paddle/fluid/framework/ir/graph_printer.h +++ b/paddle/fluid/framework/ir/graph_printer.h @@ -15,11 +15,13 @@ #pragma once #include + #include #include #include #include #include + #include "paddle/fluid/framework/details/multi_devices_helper.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/graph_test.cc b/paddle/fluid/framework/ir/graph_test.cc index 1ff67ae0fe0..db18a735ce2 100644 --- a/paddle/fluid/framework/ir/graph_test.cc +++ b/paddle/fluid/framework/ir/graph_test.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/graph.h" + #include "gtest/gtest.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/framework/ir/graph_to_program_pass.cc b/paddle/fluid/framework/ir/graph_to_program_pass.cc index 3ad591c6dff..f57cdd9d974 100644 --- a/paddle/fluid/framework/ir/graph_to_program_pass.cc +++ b/paddle/fluid/framework/ir/graph_to_program_pass.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/graph_to_program_pass.h" #include + #include #include "paddle/fluid/framework/op_proto_maker.h" diff --git a/paddle/fluid/framework/ir/graph_traits.cc b/paddle/fluid/framework/ir/graph_traits.cc index b0631456302..36bc3e6dd78 100644 --- a/paddle/fluid/framework/ir/graph_traits.cc +++ b/paddle/fluid/framework/ir/graph_traits.cc @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/ir/graph_traits.h" + #include #include -#include "paddle/fluid/framework/ir/graph_traits.h" - namespace paddle { namespace framework { namespace ir { @@ -76,21 +76,22 @@ NodesDFSIterator::NodesDFSIterator(const std::vector &source) { } NodesDFSIterator::NodesDFSIterator(NodesDFSIterator &&other) noexcept - : stack_(std::move(other.stack_)), - visited_(std::move(other.visited_)) {} + : stack_(std::move(other.stack_)), visited_(std::move(other.visited_)) {} NodesDFSIterator::NodesDFSIterator(const NodesDFSIterator &other) : stack_(other.stack_), visited_(other.visited_) {} Node &NodesDFSIterator::operator*() { - PADDLE_ENFORCE_EQ(stack_.empty(), false, platform::errors::OutOfRange( - "The iterator exceeds range.")); + PADDLE_ENFORCE_EQ( + stack_.empty(), false, + platform::errors::OutOfRange("The iterator exceeds range.")); return *stack_.top(); } NodesDFSIterator &NodesDFSIterator::operator++() { - PADDLE_ENFORCE_EQ(stack_.empty(), false, platform::errors::OutOfRange( - "The iterator exceeds range.")); + PADDLE_ENFORCE_EQ( + stack_.empty(), false, + platform::errors::OutOfRange("The iterator exceeds range.")); visited_.insert(stack_.top()); auto *cur = stack_.top(); stack_.pop(); diff --git a/paddle/fluid/framework/ir/graph_viz_pass.cc b/paddle/fluid/framework/ir/graph_viz_pass.cc index 7311eb4b91d..da48d1d19b6 100644 --- a/paddle/fluid/framework/ir/graph_viz_pass.cc +++ b/paddle/fluid/framework/ir/graph_viz_pass.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/graph_viz_pass.h" + #include + #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/graph_printer.h" #include "paddle/fluid/framework/op_proto_maker.h" diff --git a/paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc b/paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc index 6b91ea4e360..3d60148c170 100644 --- a/paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc +++ b/paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/identity_scale_op_clean_pass.h" + #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" @@ -46,42 +47,42 @@ void IdentityScaleOpCleanPass::ApplyImpl(ir::Graph* graph) const { scale_op->LinksFrom({scale_in}).LinksTo({scale_out}); int found_subgraph_count = 0; - GraphPatternDetector::handle_t handler = [&]( - const GraphPatternDetector::subgraph_t& subgraph, Graph* graph) { - Node* scale_op_var = subgraph.at(scale_op); - Node* scale_in_var = subgraph.at(scale_in); - Node* scale_out_var = subgraph.at(scale_out); - const std::string scale_in_name = scale_in_var->Name(); - const std::string scale_out_name = scale_out_var->Name(); - // Remove links in graph - GraphSafeRemoveNodes(graph, {scale_in_var, scale_op_var}); - // Modify pre_op_desc - // Link pre_op directly to scale_out - for (auto& node : graph->Nodes()) { - if (node->IsOp()) { - auto* op_desc = node->Op(); - auto out_vars_map = op_desc->Outputs(); - for (auto out_var_map : out_vars_map) { - auto names = out_var_map.second; - bool reset = false; - for (size_t i = 0; i < names.size(); i++) { - if (names[i] == scale_in_name) { - reset = true; - names[i] = scale_out_name; - break; + GraphPatternDetector::handle_t handler = + [&](const GraphPatternDetector::subgraph_t& subgraph, Graph* graph) { + Node* scale_op_var = subgraph.at(scale_op); + Node* scale_in_var = subgraph.at(scale_in); + Node* scale_out_var = subgraph.at(scale_out); + const std::string scale_in_name = scale_in_var->Name(); + const std::string scale_out_name = scale_out_var->Name(); + // Remove links in graph + GraphSafeRemoveNodes(graph, {scale_in_var, scale_op_var}); + // Modify pre_op_desc + // Link pre_op directly to scale_out + for (auto& node : graph->Nodes()) { + if (node->IsOp()) { + auto* op_desc = node->Op(); + auto out_vars_map = op_desc->Outputs(); + for (auto out_var_map : out_vars_map) { + auto names = out_var_map.second; + bool reset = false; + for (size_t i = 0; i < names.size(); i++) { + if (names[i] == scale_in_name) { + reset = true; + names[i] = scale_out_name; + break; + } + } + if (reset) { + op_desc->SetOutput(out_var_map.first, names); + op_desc->Flush(); + IR_NODE_LINK_TO(node, scale_out_var); + break; + } } } - if (reset) { - op_desc->SetOutput(out_var_map.first, names); - op_desc->Flush(); - IR_NODE_LINK_TO(node, scale_out_var); - break; - } } - } - } - found_subgraph_count++; - }; + found_subgraph_count++; + }; detector(graph, handler); AddStatis(found_subgraph_count); diff --git a/paddle/fluid/framework/ir/ipu/avg_shard_pass.cc b/paddle/fluid/framework/ir/ipu/avg_shard_pass.cc index f1ee3c26b8f..5c7373e1a77 100644 --- a/paddle/fluid/framework/ir/ipu/avg_shard_pass.cc +++ b/paddle/fluid/framework/ir/ipu/avg_shard_pass.cc @@ -14,10 +14,9 @@ #include "paddle/fluid/framework/ir/ipu/avg_shard_pass.h" -#include "paddle/fluid/platform/device/ipu/ipu_backend.h" - #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" +#include "paddle/fluid/platform/device/ipu/ipu_backend.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc b/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc index ebe40c3ee20..cbe57eae4c4 100644 --- a/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc +++ b/paddle/fluid/framework/ir/ipu/infer_shape_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/ipu/infer_shape_pass.h" + #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/framework/ir/ipu/inference_process_pass.cc b/paddle/fluid/framework/ir/ipu/inference_process_pass.cc index a6b82089dc4..df4ea7fac4b 100644 --- a/paddle/fluid/framework/ir/ipu/inference_process_pass.cc +++ b/paddle/fluid/framework/ir/ipu/inference_process_pass.cc @@ -14,11 +14,10 @@ #include "paddle/fluid/framework/ir/ipu/inference_process_pass.h" -#include "paddle/fluid/platform/device/ipu/ipu_backend.h" -#include "paddle/fluid/platform/device/ipu/ipu_strategy.h" - #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" +#include "paddle/fluid/platform/device/ipu/ipu_backend.h" +#include "paddle/fluid/platform/device/ipu/ipu_strategy.h" #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/ipu/optimizer_state_align_pass.cc b/paddle/fluid/framework/ir/ipu/optimizer_state_align_pass.cc index 4da913e7176..12d646e153b 100644 --- a/paddle/fluid/framework/ir/ipu/optimizer_state_align_pass.cc +++ b/paddle/fluid/framework/ir/ipu/optimizer_state_align_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/ipu/optimizer_state_align_pass.h" + #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/platform/device/ipu/ipu_backend.h" #include "paddle/fluid/platform/device/ipu/ipu_names.h" diff --git a/paddle/fluid/framework/ir/is_test_pass_tester.cc b/paddle/fluid/framework/ir/is_test_pass_tester.cc index bf0667aeafe..d2444295544 100644 --- a/paddle/fluid/framework/ir/is_test_pass_tester.cc +++ b/paddle/fluid/framework/ir/is_test_pass_tester.cc @@ -11,9 +11,9 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/is_test_pass.h" - #include + +#include "paddle/fluid/framework/ir/is_test_pass.h" #ifdef _WIN32 #undef FALSE #undef TRUE diff --git a/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc b/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc index 4b0dc4809f5..1b7b06213fe 100644 --- a/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/layer_norm_fuse_pass.cc @@ -12,11 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/ir/layer_norm_fuse_pass.h" + #include #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" -#include "paddle/fluid/framework/ir/layer_norm_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/var_desc.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/ir/lock_free_optimize_pass.h b/paddle/fluid/framework/ir/lock_free_optimize_pass.h index 93b6396bf7f..a72a59374f9 100644 --- a/paddle/fluid/framework/ir/lock_free_optimize_pass.h +++ b/paddle/fluid/framework/ir/lock_free_optimize_pass.h @@ -29,31 +29,31 @@ class Node; class Graph; /* -* Remove the sum op of all gradients of the backward op. -* And remove the dependecies of the optimizer related to the -* same backward op. -* -* Before this pass: -* -* forward_op1 forward_op2 -* | | -* grad_op1 grad_op2 -* \ / -* \ / -* sum_op -* | -* sgd_op -* -* After this pass: -* forward_op1 forward_op2 -* | | -* grad_op1 grad_op2 -* | | -* sgd_op1 sgd_op2 -* -* sgd_op1 and sgd_op2 will update the same weight which holds the same -* memory, so we could benefits from the acceleration -*/ + * Remove the sum op of all gradients of the backward op. + * And remove the dependecies of the optimizer related to the + * same backward op. + * + * Before this pass: + * + * forward_op1 forward_op2 + * | | + * grad_op1 grad_op2 + * \ / + * \ / + * sum_op + * | + * sgd_op + * + * After this pass: + * forward_op1 forward_op2 + * | | + * grad_op1 grad_op2 + * | | + * sgd_op1 sgd_op2 + * + * sgd_op1 and sgd_op2 will update the same weight which holds the same + * memory, so we could benefits from the acceleration + */ class LockFreeOptimizePass : public Pass { public: virtual ~LockFreeOptimizePass() {} diff --git a/paddle/fluid/framework/ir/matmul_scale_fuse_pass.cc b/paddle/fluid/framework/ir/matmul_scale_fuse_pass.cc index 2335e5eee01..a4bab58506e 100644 --- a/paddle/fluid/framework/ir/matmul_scale_fuse_pass.cc +++ b/paddle/fluid/framework/ir/matmul_scale_fuse_pass.cc @@ -16,9 +16,9 @@ #include #include + #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_proto_maker.h" - #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_cross_op_memory_reuse_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_cross_op_memory_reuse_pass.cc index b12b84d4a49..090673b87ed 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_cross_op_memory_reuse_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_cross_op_memory_reuse_pass.cc @@ -321,13 +321,15 @@ size_t BufferSharedCrossOpMemoryReusePass::ResolveDependencyBetween( } void BufferSharedCrossOpMemoryReusePass::BuildOpDependencyMap() const { - PADDLE_ENFORCE_EQ(ops_.empty(), true, platform::errors::InvalidArgument( - "Ops must be initialized here.")); + PADDLE_ENFORCE_EQ( + ops_.empty(), true, + platform::errors::InvalidArgument("Ops must be initialized here.")); PADDLE_ENFORCE_EQ( op_to_idx_.empty(), true, platform::errors::InvalidArgument("Op to idx must be initialized here.")); - PADDLE_ENFORCE_EQ(deps_.empty(), true, platform::errors::InvalidArgument( - "Deps must be initialized here.")); + PADDLE_ENFORCE_EQ( + deps_.empty(), true, + platform::errors::InvalidArgument("Deps must be initialized here.")); // Toposort ops OpGraphView graph_view(ir::FilterByNodeWrapper(*graph_)); diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc index 1ca6e989f27..682a72c5729 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc @@ -166,8 +166,9 @@ static std::string GetFirstVarName(const OpDesc &op, const std::string &slot, static std::vector>> GetInplaceVars(const BlockDesc &block, bool use_cuda, const std::vector &skip_vars) { - PADDLE_ENFORCE_EQ(block.ID(), 0, platform::errors::Unimplemented( - "Inplace can only perform in block 0.")); + PADDLE_ENFORCE_EQ( + block.ID(), 0, + platform::errors::Unimplemented("Inplace can only perform in block 0.")); // only take block 0 gc_vars const auto op_gc_vars = GetEagerDeletionCleanVars(*block.Program(), skip_vars)[0]; diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h b/paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h index e89734bacec..8d593254f90 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h +++ b/paddle/fluid/framework/ir/memory_optimize_pass/memory_optimization_var_info.h @@ -19,6 +19,7 @@ #include #include #include + #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.h b/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.h index d6f286afc55..b5506dd1dcb 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.h +++ b/paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.h @@ -136,13 +136,15 @@ void OpGraphView::BreadthFirstVisit(Callback &&callback) const { } } - PADDLE_ENFORCE_EQ(num_calls, op_num, platform::errors::InvalidArgument( - "There are unvisited ops.")); + PADDLE_ENFORCE_EQ( + num_calls, op_num, + platform::errors::InvalidArgument("There are unvisited ops.")); PADDLE_ENFORCE_EQ( visited_ops.size(), op_num, platform::errors::InvalidArgument("There are unvisited ops.")); - PADDLE_ENFORCE_EQ(op_deps.empty(), true, platform::errors::InvalidArgument( - "There are unvisited ops.")); + PADDLE_ENFORCE_EQ( + op_deps.empty(), true, + platform::errors::InvalidArgument("There are unvisited ops.")); } } // namespace ir diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.cc index 6077069ea74..b1fdb5e2160 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.cc @@ -26,9 +26,9 @@ namespace paddle { namespace framework { namespace ir { +using paddle::operators::OpAndGradOpPair; using paddle::operators::OpVariant; using paddle::operators::OpVariantSet; -using paddle::operators::OpAndGradOpPair; void RecurrentOpEagerDeletionPass::ApplyImpl(Graph *graph) const { // Find all recurrent_op and recurrent_grad_op in graph diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass.cc b/paddle/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass.cc index 313b2cc3345..3f88aaad57e 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/eager_deletion_op_handle.h" #include "paddle/fluid/framework/ir/graph_helper.h" diff --git a/paddle/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass_test.cc b/paddle/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass_test.cc index 88bf9e38763..848b6e494ad 100644 --- a/paddle/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass_test.cc +++ b/paddle/fluid/framework/ir/memory_optimize_pass/share_varinfo_into_cinn_pass_test.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "gtest/gtest.h" #include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/framework/details/eager_deletion_op_handle.h" diff --git a/paddle/fluid/framework/ir/mixed_precision_configure_pass.cc b/paddle/fluid/framework/ir/mixed_precision_configure_pass.cc index 4aa59d9196b..80f201d2d5a 100644 --- a/paddle/fluid/framework/ir/mixed_precision_configure_pass.cc +++ b/paddle/fluid/framework/ir/mixed_precision_configure_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mixed_precision_configure_pass.h" + #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/op_version_registry.h" @@ -25,9 +26,10 @@ void MixedPrecisionConfigurePass::InsertCastOps( VLOG(3) << "Insert the cast op before and after the kernel that does not " "supports fp16 precision"; - auto update_cast_desc = [&]( - framework::OpDesc& desc, const std::string& x_name, - const std::string& out_name, const int in_dtype, const int out_dtype) { + auto update_cast_desc = [&](framework::OpDesc& desc, + const std::string& x_name, + const std::string& out_name, const int in_dtype, + const int out_dtype) { desc.SetType("cast"); desc.SetInput("X", {x_name}); desc.SetOutput("Out", {out_name}); diff --git a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc index 9f6cd8992dc..62145cb6a0f 100644 --- a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass.h" + #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass_tester.cc index e13d44ac232..b1b546f085c 100644 --- a/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/batch_norm_act_fuse_pass_tester.cc @@ -34,7 +34,7 @@ void SetBatchNormAttrs(OpDesc* bn_op, bool is_test = true, bn_op->SetAttr("fuse_with_relu", false); bn_op->SetAttr("epsilon", 0.001f); } -} +} // namespace // ------------------------------ Test cases ----------------------------------- @@ -48,11 +48,12 @@ TEST(FuseBatchNormActOneDNNPass, ThrowIsTestTrainableStats) { auto prog = test::BuildProgramDesc( {"x", "m", "v", "bn_y", "act_y", "m_out", "var_out", "sm", "sv"}, {"scale", "bias"}); - auto* bn_op = test::CreateOp(&prog, "batch_norm", {{"X", "x"}, - {"Scale", "scale"}, - {"Bias", "bias"}, - {"Mean", "m"}, - {"Variance", "v"}}, + auto* bn_op = test::CreateOp(&prog, "batch_norm", + {{"X", "x"}, + {"Scale", "scale"}, + {"Bias", "bias"}, + {"Mean", "m"}, + {"Variance", "v"}}, {{"Y", "bn_y"}, {"MeanOut", "m_out"}, {"VarianceOut", "var_out"}, @@ -73,11 +74,12 @@ TEST(FuseBatchNormActOneDNNPass, ThrowIsTestTrainableStats) { TEST(FuseBatchNormActOneDNNPass, FuseIsTest) { auto prog = test::BuildProgramDesc({"x", "m", "v", "bn_y", "act_y"}, {"scale", "bias"}); - auto* bn_op = test::CreateOp(&prog, "batch_norm", {{"X", "x"}, - {"Scale", "scale"}, - {"Bias", "bias"}, - {"Mean", "m"}, - {"Variance", "v"}}, + auto* bn_op = test::CreateOp(&prog, "batch_norm", + {{"X", "x"}, + {"Scale", "scale"}, + {"Bias", "bias"}, + {"Mean", "m"}, + {"Variance", "v"}}, {{"Y", "bn_y"}}); SetBatchNormAttrs(bn_op, true, false); test::CreateOp(&prog, "relu", {{"X", "bn_y"}}, {{"Out", "act_y"}}, false); @@ -106,11 +108,12 @@ TEST(FuseBatchNormActOneDNNPass, ThrowTrainableStats) { auto prog = test::BuildProgramDesc( {"x", "m", "v", "bn_y", "act_y", "m_out", "var_out", "sm", "sv"}, {"scale", "bias"}); - auto* bn_op = test::CreateOp(&prog, "batch_norm", {{"X", "x"}, - {"Scale", "scale"}, - {"Bias", "bias"}, - {"Mean", "m"}, - {"Variance", "v"}}, + auto* bn_op = test::CreateOp(&prog, "batch_norm", + {{"X", "x"}, + {"Scale", "scale"}, + {"Bias", "bias"}, + {"Mean", "m"}, + {"Variance", "v"}}, {{"Y", "bn_y"}, {"MeanOut", "m_out"}, {"VarianceOut", "var_out"}, @@ -132,11 +135,12 @@ TEST(FuseBatchNormActOneDNNPass, AllAttrsFalse) { auto prog = test::BuildProgramDesc( {"x", "m", "v", "bn_y", "act_y", "m_out", "var_out", "sm", "sv"}, {"scale", "bias"}); - auto* bn_op = test::CreateOp(&prog, "batch_norm", {{"X", "x"}, - {"Scale", "scale"}, - {"Bias", "bias"}, - {"Mean", "m"}, - {"Variance", "v"}}, + auto* bn_op = test::CreateOp(&prog, "batch_norm", + {{"X", "x"}, + {"Scale", "scale"}, + {"Bias", "bias"}, + {"Mean", "m"}, + {"Variance", "v"}}, {{"Y", "bn_y"}, {"MeanOut", "m_out"}, {"VarianceOut", "var_out"}, @@ -158,11 +162,12 @@ TEST(FuseBatchNormActOneDNNPass, ThrowUseMkldnn) { auto prog = test::BuildProgramDesc( {"x", "m", "v", "bn_y", "act_y", "m_out", "var_out", "sm", "sv"}, {"scale", "bias"}); - auto* bn_op = test::CreateOp(&prog, "batch_norm", {{"X", "x"}, - {"Scale", "scale"}, - {"Bias", "bias"}, - {"Mean", "m"}, - {"Variance", "v"}}, + auto* bn_op = test::CreateOp(&prog, "batch_norm", + {{"X", "x"}, + {"Scale", "scale"}, + {"Bias", "bias"}, + {"Mean", "m"}, + {"Variance", "v"}}, {{"Y", "bn_y"}, {"MeanOut", "m_out"}, {"VarianceOut", "var_out"}, diff --git a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc index d7d0b988b55..e19426d01d1 100644 --- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.cc @@ -12,11 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h" + #include + #include #include "paddle/fluid/framework/ir/graph_helper.h" -#include "paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h" #include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h index b0076c1b38c..26fb6e4978f 100644 --- a/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/compute_propagate_scales_mkldnn_pass.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass_tester.cc index 1fefab805b1..e3db8547176 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass_tester.cc @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h" - #include + #include + +#include "paddle/fluid/framework/ir/mkldnn/conv_activation_mkldnn_fuse_pass.h" #include "paddle/fluid/framework/op_proto_maker.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h index a74d7443ee1..18e09173491 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h @@ -23,8 +23,8 @@ namespace paddle { namespace framework { namespace ir { /* -* Fuse the Conv and Elementwise_add to a ConvBiasOp. -*/ + * Fuse the Conv and Elementwise_add to a ConvBiasOp. + */ class Graph; class ConvBiasFusePass : public FusePassBase { @@ -38,8 +38,8 @@ class ConvBiasFusePass : public FusePassBase { const std::string name_scope_{"conv_bias_mkldnn_fuse"}; }; /* -* Fuse the Conv3D and Elementwise_add to a Conv3DBiasOp. -*/ + * Fuse the Conv3D and Elementwise_add to a Conv3DBiasOp. + */ class Conv2DTransposeBiasFusePass : public ConvBiasFusePass { public: Conv2DTransposeBiasFusePass(); diff --git a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc index e9850483ebe..0e052debaee 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass_tester.cc @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h" #include -#include "paddle/fluid/framework/naive_executor.h" -#include "paddle/fluid/platform/place.h" +#include "paddle/fluid/framework/ir/mkldnn/conv_bias_mkldnn_fuse_pass.h" +#include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/imperative/type_defs.h" +#include "paddle/fluid/platform/place.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass_tester.cc index 6b648608ca1..7d165b1a38a 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass_tester.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.h" - #include + +#include "paddle/fluid/framework/ir/mkldnn/conv_concat_relu_mkldnn_fuse_pass.h" #include "paddle/fluid/framework/op_proto_maker.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc index 879c669bbbe..58eec79344d 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_bfloat16_pass.cc @@ -226,7 +226,7 @@ class DeQuantizer final : public Quanter { return Quanter::create_quant_op(output_name, input_name); } }; -} +} // namespace using string::PrettyLogDetail; void CPUBFloat16Pass::ApplyImpl(ir::Graph* graph) const { diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc index a61c043b580..452212664ec 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.cc @@ -12,11 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h" + #include #include #include -#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h" #include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" #include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/string/pretty_log.h" diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc index 912c16288c2..fb36365ac54 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass_tester.cc @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h" // NOLINT #include + #include +#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_pass.h" // NOLINT #include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc index 350fad2c672..f6e5279ed23 100644 --- a/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass_tester.cc @@ -12,9 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.h" - #include + +#include "paddle/fluid/framework/ir/mkldnn/cpu_quantize_placement_pass.h" #include "paddle/fluid/platform/mkldnn_helper.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass_tester.cc index 06940b38ea8..979c601ac04 100644 --- a/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass_tester.cc @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.h" - #include +#include "paddle/fluid/framework/ir/mkldnn/depthwise_conv_mkldnn_pass.h" #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc index b7f7a8071d2..2a8a248a99f 100644 --- a/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/elt_act_mkldnn_fuse_pass.h" + #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc index 7fc8806452b..afcd493f92f 100644 --- a/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass.h" + #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass_tester.cc index 59d81cb8647..4b158ccc5a8 100644 --- a/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/fc_act_mkldnn_fuse_pass_tester.cc @@ -32,7 +32,9 @@ TEST(FuseFCActOneDNNPass, ThrowUseMkldnn) { test::BuildProgramDesc({"x", "fc_y", "act_y"}, {"weights", "bias"}); test::CreateOp(&prog, "fc", { - {"Input", "x"}, {"Weights", "weights"}, {"Bias", "bias"}, + {"Input", "x"}, + {"Weights", "weights"}, + {"Bias", "bias"}, }, {{"Out", "fc_y"}}, false); test::CreateOp(&prog, "gelu", {{"Input", "fc_y"}}, {{"Out", "act_y"}}, false); @@ -51,7 +53,9 @@ TEST(FuseFCActOneDNNPass, FuseWithGeluTanh) { test::BuildProgramDesc({"x", "fc_y", "act_y"}, {"weights", "bias"}); test::CreateOp(&prog, "fc", { - {"Input", "x"}, {"Weights", "weights"}, {"Bias", "bias"}, + {"Input", "x"}, + {"Weights", "weights"}, + {"Bias", "bias"}, }, {{"Out", "fc_y"}}); auto* act_op = test::CreateOp(&prog, "gelu", {{"Input", "fc_y"}}, @@ -83,7 +87,9 @@ TEST(FuseFCActOneDNNPass, FuseWithGeluErf) { test::BuildProgramDesc({"x", "fc_y", "act_y"}, {"weights", "bias"}); test::CreateOp(&prog, "fc", { - {"Input", "x"}, {"Weights", "weights"}, {"Bias", "bias"}, + {"Input", "x"}, + {"Weights", "weights"}, + {"Bias", "bias"}, }, {{"Out", "fc_y"}}); auto* act_op = test::CreateOp(&prog, "gelu", {{"Input", "fc_y"}}, @@ -115,7 +121,9 @@ TEST(FuseFCActOneDNNPass, FuseWithGeluAuto) { test::BuildProgramDesc({"x", "fc_y", "act_y"}, {"weights", "bias"}); test::CreateOp(&prog, "fc", { - {"Input", "x"}, {"Weights", "weights"}, {"Bias", "bias"}, + {"Input", "x"}, + {"Weights", "weights"}, + {"Bias", "bias"}, }, {{"Out", "fc_y"}}); test::CreateOp(&prog, "gelu", {{"Input", "fc_y"}}, {{"Out", "act_y"}}, false); @@ -145,7 +153,9 @@ TEST(FuseFCActOneDNNPass, FuseWithTanh) { test::BuildProgramDesc({"x", "fc_y", "act_y"}, {"weights", "bias"}); test::CreateOp(&prog, "fc", { - {"Input", "x"}, {"Weights", "weights"}, {"Bias", "bias"}, + {"Input", "x"}, + {"Weights", "weights"}, + {"Bias", "bias"}, }, {{"Out", "fc_y"}}); test::CreateOp(&prog, "tanh", {{"Input", "fc_y"}}, {{"Out", "act_y"}}, false); @@ -175,7 +185,9 @@ TEST(FuseFCActOneDNNPass, FuseWithSigmoid) { test::BuildProgramDesc({"x", "fc_y", "act_y"}, {"weights", "bias"}); test::CreateOp(&prog, "fc", { - {"Input", "x"}, {"Weights", "weights"}, {"Bias", "bias"}, + {"Input", "x"}, + {"Weights", "weights"}, + {"Bias", "bias"}, }, {{"Out", "fc_y"}}); test::CreateOp(&prog, "sigmoid", {{"Input", "fc_y"}}, {{"Out", "act_y"}}, @@ -206,7 +218,9 @@ TEST(FuseFCActOneDNNPass, FuseWithMish) { test::BuildProgramDesc({"x", "fc_y", "act_y"}, {"weights", "bias"}); test::CreateOp(&prog, "fc", { - {"Input", "x"}, {"Weights", "weights"}, {"Bias", "bias"}, + {"Input", "x"}, + {"Weights", "weights"}, + {"Bias", "bias"}, }, {{"Out", "fc_y"}}); test::CreateOp(&prog, "mish", {{"Input", "fc_y"}}, {{"Out", "act_y"}}, false); @@ -236,7 +250,9 @@ TEST(FuseFCActOneDNNPass, FuseWithHardSwish) { test::BuildProgramDesc({"x", "fc_y", "act_y"}, {"weights", "bias"}); test::CreateOp(&prog, "fc", { - {"Input", "x"}, {"Weights", "weights"}, {"Bias", "bias"}, + {"Input", "x"}, + {"Weights", "weights"}, + {"Bias", "bias"}, }, {{"Out", "fc_y"}}); test::CreateOp(&prog, "hard_swish", {{"Input", "fc_y"}}, {{"Out", "act_y"}}, diff --git a/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc index 2e62597f2ee..60856512779 100644 --- a/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/fc_elementwise_add_mkldnn_fuse_pass.h" + #include "paddle/fluid/framework/ir/graph_traits.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/string/pretty_log.h" diff --git a/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc index 678a8fb4a69..a5481f5c6f3 100644 --- a/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.cc @@ -129,17 +129,13 @@ void Int8ScaleCalculationMkldnnPass::ApplyImpl(ir::Graph* graph) const { bool has_activation = !conv_op->Op()->GetAttrIfExists("fuse_activation").empty(); float activation_scale = - force_fp32_output - ? 1.0f - : has_activation - ? conv_op->Op()->GetAttrIfExists("Scale_out") - : 1.0f; + force_fp32_output ? 1.0f + : has_activation ? conv_op->Op()->GetAttrIfExists("Scale_out") + : 1.0f; auto scale_out_data = - force_fp32_output - ? 1.0f - : has_activation - ? 1.0f - : conv_op->Op()->GetAttrIfExists("Scale_out"); + force_fp32_output ? 1.0f + : has_activation ? 1.0f + : conv_op->Op()->GetAttrIfExists("Scale_out"); float sum_scale = fuse_residual_conn ? scale_out_data / scale_in_eltwise_data : 1.0f; diff --git a/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass_tester.cc index 804d04e35f6..9d3940c9664 100644 --- a/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass_tester.cc @@ -12,9 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.h" #include +#include "paddle/fluid/framework/ir/mkldnn/int8_scale_calculation_mkldnn_pass.h" + namespace paddle { namespace framework { namespace ir { diff --git a/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc index 4eb532b47cb..1ed36e06fb1 100644 --- a/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/interpolate_mkldnn_pass.h" + #include #include + #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_fuse_pass.cc index 34a35877a7f..f6c99a477bc 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_fuse_pass.cc @@ -13,8 +13,11 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_fuse_pass.h" + #include + #include + #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_fuse_pass_tester.cc index ed99989cf38..ddb9e717392 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/matmul_transpose_reshape_fuse_pass_tester.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "paddle/fluid/framework/ir/mkldnn/matmul_v2_transpose_reshape_fuse_pass.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/matmul_v2_transpose_reshape_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/matmul_v2_transpose_reshape_fuse_pass.cc index dcf4664d963..6e106fa9dae 100644 --- a/paddle/fluid/framework/ir/mkldnn/matmul_v2_transpose_reshape_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/matmul_v2_transpose_reshape_fuse_pass.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/matmul_v2_transpose_reshape_fuse_pass.h" + #include + #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc index 4236dc55d51..06e0db4c93e 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_conv_bn_fuse_pass_tester.cc @@ -12,12 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include #include #include #include -#include - #include "gtest/gtest.h" #include "paddle/fluid/framework/ir/graph_traits.h" #include "paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.h" diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc index c4770a322db..1ca9e76f79d 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_fc_rnn_fuse_pass_tester.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "paddle/fluid/framework/ir/fc_gru_fuse_pass_tester.h" #include "paddle/fluid/framework/ir/fc_lstm_fuse_pass_tester.h" #include "paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h" diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.cc index d2763bd6a6d..ae8dbceb7a6 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.cc @@ -13,12 +13,14 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.h" + #include #include #include #include #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_info.h" diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.h b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.h index 44b6d110db8..880630055e9 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once #include + #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/pass.h" diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass_tester.cc index 7df957b2c0e..7f4e5d32536 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass_tester.cc @@ -12,13 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.h" - #include -#include #include +#include +#include "paddle/fluid/framework/ir/mkldnn/mkldnn_inplace_pass.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h b/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h index 505bb2739e1..99a55b26e99 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/ir/graph_helper.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass_tester.cc index 4012e04f7d2..671ad4c1c4b 100644 --- a/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass_tester.cc @@ -12,11 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h" - #include + #include +#include "paddle/fluid/framework/ir/mkldnn/mkldnn_placement_pass.h" + namespace paddle { namespace framework { namespace ir { diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc index 76a0c883c89..73089df5717 100644 --- a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.h" + #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/platform/errors.h" diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.h index 70f88104b4b..cf53ecec926 100644 --- a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass_tester.cc index 7b6681ff967..60890336b30 100644 --- a/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass_tester.cc @@ -12,9 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.h" #include +#include "paddle/fluid/framework/ir/mkldnn/multi_gru_fuse_pass.h" + namespace paddle { namespace framework { namespace ir { diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc index 7821501cc4b..06125e51fb6 100644 --- a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.cc @@ -13,10 +13,12 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.h" + #include #include #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/platform/errors.h" diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.h b/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.h index 546a3d6570b..af58ae2bda4 100644 --- a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" diff --git a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass_tester.cc index 3738e3ebd68..2924401bc2e 100644 --- a/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass_tester.cc @@ -12,10 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.h" #include + #include +#include "paddle/fluid/framework/ir/mkldnn/multi_gru_seq_fuse_pass.h" + namespace paddle { namespace framework { namespace ir { diff --git a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc b/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc index 63e402cb529..15100b23407 100644 --- a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.h" + #include + #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/mkldnn/mkldnn_pass_util.h" #include "paddle/fluid/framework/op_version_registry.h" @@ -124,10 +126,11 @@ void QuantDequantMkldnnPass::CollectInputScalesFromFake( auto* op_desc = op_node->Op(); const int bit_length = BOOST_GET_CONST(int, op_desc->GetAttr("bit_length")); - PADDLE_ENFORCE_EQ(bit_length, 8, platform::errors::InvalidArgument( - "Unsupported number quantization " - "bits: %d, only 8 is supported now.", - bit_length)); + PADDLE_ENFORCE_EQ(bit_length, 8, + platform::errors::InvalidArgument( + "Unsupported number quantization " + "bits: %d, only 8 is supported now.", + bit_length)); auto x_var_name = op_desc->Input("X")[0]; auto scale_name = op_desc->Input("InScale")[0]; diff --git a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.h b/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.h index a9442f70740..5003e1878bf 100644 --- a/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.h +++ b/paddle/fluid/framework/ir/mkldnn/quant_dequant_mkldnn_pass.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc index 96f575745a3..05b1d419f6f 100644 --- a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.cc @@ -13,9 +13,11 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h" + #include #include #include + #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/string/pretty_log.h" diff --git a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc index e6886356460..023dd6af7ee 100644 --- a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass_tester.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include + #include "paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_mkldnn_fuse_pass.h" #include "paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_v2_mkldnn_fuse_pass.h" - -#include #include "paddle/fluid/framework/ir/pass_tester_helper.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_v2_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_v2_mkldnn_fuse_pass.cc index 203966dc682..ed57be12c78 100644 --- a/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_v2_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_v2_mkldnn_fuse_pass.cc @@ -13,9 +13,11 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/reshape_transpose_matmul_v2_mkldnn_fuse_pass.h" + #include #include #include + #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/string/pretty_log.h" diff --git a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass_tester.cc index 60f844ffc80..09bad959eb0 100644 --- a/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass_tester.cc @@ -12,9 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h" #include +#include "paddle/fluid/framework/ir/mkldnn/scale_matmul_fuse_pass.h" + namespace paddle { namespace framework { namespace ir { diff --git a/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.cc b/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.cc index bf603dc4bbc..a7e0f3a5834 100644 --- a/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.cc @@ -12,9 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.h" + #include -#include "paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.h" #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc index fe42e8f96f8..86775e20aa7 100644 --- a/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass_tester.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include #include "paddle/fluid/framework/ir/mkldnn/shuffle_channel_mkldnn_detect_pass.h" diff --git a/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.cc index 82d642264c2..cad92e3153b 100644 --- a/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.h" + #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass_tester.cc b/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass_tester.cc index 003a39f37d4..662dfb0f9d4 100644 --- a/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass_tester.cc @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.h" - #include + #include + +#include "paddle/fluid/framework/ir/mkldnn/softplus_activation_mkldnn_fuse_pass.h" #include "paddle/fluid/framework/op_proto_maker.h" namespace paddle { @@ -52,43 +53,27 @@ void MainTest(const std::string& activation_type) { } } -TEST(FuseSoftplusActivationOneDNNPass, FuseSoftplusWithTanh) { - MainTest("tanh") -} +TEST(FuseSoftplusActivationOneDNNPass, FuseSoftplusWithTanh){MainTest("tanh")} -TEST(FuseSoftplusActivationOneDNNPass, FuseSoftplusWithRelu) { - MainTest("relu") -} +TEST(FuseSoftplusActivationOneDNNPass, FuseSoftplusWithRelu){MainTest("relu")} -TEST(FuseSoftplusActivationOneDNNPass, FuseSoftplusWithLeakyRelu) { - MainTest("leaky_relu") -} +TEST(FuseSoftplusActivationOneDNNPass, + FuseSoftplusWithLeakyRelu){MainTest("leaky_relu")} -TEST(FuseSoftplusActivationOneDNNPass, FuseSoftplusWithSwish) { - MainTest("swish") -} +TEST(FuseSoftplusActivationOneDNNPass, FuseSoftplusWithSwish){MainTest("swish")} -TEST(FuseSoftplusActivationOneDNNPass, FuseSoftplusWithHardswish) { - MainTest("hardswish") -} +TEST(FuseSoftplusActivationOneDNNPass, + FuseSoftplusWithHardswish){MainTest("hardswish")} -TEST(FuseSoftplusActivationOneDNNPass, FuseSoftplusWithSqrt) { - MainTest("sqrt") -} +TEST(FuseSoftplusActivationOneDNNPass, FuseSoftplusWithSqrt){MainTest("sqrt")} -TEST(FuseSoftplusActivationOneDNNPass, FuseSoftplusWithAbs) { MainTest("abs") } +TEST(FuseSoftplusActivationOneDNNPass, FuseSoftplusWithAbs){MainTest("abs")} -TEST(FuseSoftplusActivationOneDNNPass, FuseSoftplusWithClip) { - MainTest("clip") -} +TEST(FuseSoftplusActivationOneDNNPass, FuseSoftplusWithClip){MainTest("clip")} -TEST(FuseSoftplusActivationOneDNNPass, FuseSoftplusWithGelu) { - MainTest("gelu") -} +TEST(FuseSoftplusActivationOneDNNPass, FuseSoftplusWithGelu){MainTest("gelu")} -TEST(FuseSoftplusActivationOneDNNPass, FuseSoftplusWithRelu6) { - MainTest("relu6") -} +TEST(FuseSoftplusActivationOneDNNPass, FuseSoftplusWithRelu6){MainTest("relu6")} TEST(FuseSoftplusActivationOneDNNPass, FuseSoftplusWithSigmoid) { MainTest("sigmoid") diff --git a/paddle/fluid/framework/ir/multi_batch_merge_pass.cc b/paddle/fluid/framework/ir/multi_batch_merge_pass.cc index 06af5eaec13..b849076935a 100644 --- a/paddle/fluid/framework/ir/multi_batch_merge_pass.cc +++ b/paddle/fluid/framework/ir/multi_batch_merge_pass.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/framework/ir/multi_batch_merge_pass.h" #include + #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/op_proto_maker.h" diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/add_reader_dependency_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/add_reader_dependency_pass.cc index abb1d062c96..b907869b4a3 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/add_reader_dependency_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/add_reader_dependency_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "paddle/fluid/framework/ir/pass.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/fix_op_run_order_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/fix_op_run_order_pass.cc index 772b4c1c915..55b6389768c 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/fix_op_run_order_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/fix_op_run_order_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "paddle/fluid/framework/details/eager_deletion_op_handle.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/details/op_handle_base.h" diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/fuse_all_reduce_op_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/fuse_all_reduce_op_pass.cc index 484d09fd444..5189f410e3c 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/fuse_all_reduce_op_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/fuse_all_reduce_op_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "paddle/fluid/framework/details/all_reduce_op_handle.h" #include "paddle/fluid/framework/details/container_cast.h" #include "paddle/fluid/framework/details/fused_all_reduce_op_handle.h" diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc index 1b6245928d3..7180c3820c7 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h" + #include #include #include @@ -20,6 +21,7 @@ #include #include #include + #include "paddle/fluid/framework/details/all_reduce_op_handle.h" #include "paddle/fluid/framework/details/broadcast_op_handle.h" #include "paddle/fluid/framework/details/computation_op_handle.h" @@ -495,9 +497,9 @@ void MultiDevSSAGraphBuilderBase::CreateAllReduceOp(ir::Graph *result, "use_dgc=%d, use_grad_merge=%d", is_encoded, is_grad_merge)); - auto append_allreduce_op = [&]( - const std::vector &scopes, - const std::vector &places) -> details::OpHandleBase * { + auto append_allreduce_op = [&](const std::vector &scopes, + const std::vector &places) + -> details::OpHandleBase * { if (is_encoded) { #if defined(PADDLE_WITH_DGC) && \ (defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL)) @@ -758,13 +760,14 @@ int BalanceVarSSAGraphBuilder::GetOpDeviceID(ir::Node *node) const { "and Parameter@Grad.", node->Name(), OpProtoAndCheckerMaker::OpRoleVarAttrName())); int dev_id = GetVarDeviceID(param_grad[1]); - PADDLE_ENFORCE_NE(dev_id, -1, platform::errors::NotFound( - "Can not find Device ID, for NodeName:%s, " - "NodeType:%s, Param:%s, Param@Grad:%s" - "For this fault, you can consult the " - "Paddle technical personnel for answer ", - node->Name(), node->Op()->Type(), - param_grad[0], param_grad[1])); + PADDLE_ENFORCE_NE( + dev_id, -1, + platform::errors::NotFound("Can not find Device ID, for NodeName:%s, " + "NodeType:%s, Param:%s, Param@Grad:%s" + "For this fault, you can consult the " + "Paddle technical personnel for answer ", + node->Name(), node->Op()->Type(), + param_grad[0], param_grad[1])); return dev_id; } @@ -956,10 +959,11 @@ bool DistSSAGraphBuilder::DealWithSpecialOp(ir::Graph *result, bool insert_op = false; if (OpHaveRole(*node, OpRole::kRPC)) { int op_dev_id = CreateRPCOp(result, node); - PADDLE_ENFORCE_NE(op_dev_id, -1, platform::errors::InvalidArgument( - "Can not schedule the RPC operator to " - "the right place. NodeName:%s.", - node->Name())); + PADDLE_ENFORCE_NE(op_dev_id, -1, + platform::errors::InvalidArgument( + "Can not schedule the RPC operator to " + "the right place. NodeName:%s.", + node->Name())); if (node->Op()->Type() == "recv") { auto recv_vars_attr = BOOST_GET_CONST(std::vector, diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h index c76f3001676..75080742077 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.h @@ -46,7 +46,7 @@ class NCCLContextMap; class BKCLContextMap; class BKCLCommunicator; #endif -} +} // namespace platform namespace framework { class Scope; diff --git a/paddle/fluid/framework/ir/multi_devices_graph_pass/set_reader_device_info_utils.cc b/paddle/fluid/framework/ir/multi_devices_graph_pass/set_reader_device_info_utils.cc index 09ef94c0826..c7b6e477fd5 100644 --- a/paddle/fluid/framework/ir/multi_devices_graph_pass/set_reader_device_info_utils.cc +++ b/paddle/fluid/framework/ir/multi_devices_graph_pass/set_reader_device_info_utils.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/multi_devices_graph_pass/set_reader_device_info_utils.h" + #include "paddle/fluid/framework/details/computation_op_handle.h" #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" diff --git a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc index 4a594777805..03d433f4db1 100644 --- a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass.cc @@ -51,11 +51,12 @@ static int BuildFusion(Graph* graph, const std::string& name_scope) { multihead_pattern(); // Create New OpDesc - auto fuse_creater = [&]( - Node* input0, Node* mul0, Node* mul1, Node* mul2, Node* mul0_out, - Node* mul1_out, Node* mul2_out, Node* eltadd0_b, Node* eltadd1_b, - Node* eltadd2_b, Node* eltadd_qk_b, Node* reshape2, - Node* reshape2_qkv_out, Node* scale, Node* scale_out) { + auto fuse_creater = [&](Node* input0, Node* mul0, Node* mul1, Node* mul2, + Node* mul0_out, Node* mul1_out, Node* mul2_out, + Node* eltadd0_b, Node* eltadd1_b, Node* eltadd2_b, + Node* eltadd_qk_b, Node* reshape2, + Node* reshape2_qkv_out, Node* scale, + Node* scale_out) { auto scale_attr = BOOST_GET_CONST(float, scale->Op()->GetAttr("scale")); // auto scale_bias = BOOST_GET_CONST(float, scale->Op()->GetAttr("bias")); // bool after_scale = @@ -756,13 +757,14 @@ int MultiHeadMatmulV2FusePass::BuildFusionV2(Graph* graph, multihead_pattern(); // Create New OpDesc - auto fuse_creater = [&]( - Node* input0, Node* mul0, Node* mul1, Node* mul2, Node* mul0_out, - Node* mul1_out, Node* mul2_out, Node* mul0_w, Node* mul1_w, Node* mul2_w, - Node* eltadd0_b, Node* eltadd1_b, Node* eltadd2_b, Node* eltadd_qk_b, - Node* reshape2, Node* reshape2_qkv_out, Node* scale, Node* scale_out, - Node* softmax_qk, Node* eltadd0, Node* eltadd1, Node* eltadd2, - Node* matmul_qk, Node* reshape2_qkv) { + auto fuse_creater = [&](Node* input0, Node* mul0, Node* mul1, Node* mul2, + Node* mul0_out, Node* mul1_out, Node* mul2_out, + Node* mul0_w, Node* mul1_w, Node* mul2_w, + Node* eltadd0_b, Node* eltadd1_b, Node* eltadd2_b, + Node* eltadd_qk_b, Node* reshape2, + Node* reshape2_qkv_out, Node* scale, Node* scale_out, + Node* softmax_qk, Node* eltadd0, Node* eltadd1, + Node* eltadd2, Node* matmul_qk, Node* reshape2_qkv) { auto scale_attr = BOOST_GET_CONST(float, scale->Op()->GetAttr("scale")); // mul (B * S * Hidden) x (Hidden * 3 * N * H) = (B * S * 3 * N * H) @@ -1207,11 +1209,12 @@ int MultiHeadMatmulV3FusePass::BuildFusionV3(Graph* graph, multihead_pattern(); // Create New OpDesc - auto fuse_creater = [&]( - Node* input0, Node* mul0, Node* mul1, Node* mul2, Node* mul0_out, - Node* mul1_out, Node* mul2_out, Node* mul0_w, Node* mul1_w, Node* mul2_w, - Node* eltadd0_b, Node* eltadd1_b, Node* eltadd2_b, Node* eltadd_qk_b, - Node* reshape2, Node* reshape2_qkv_out, Node* matmul_qk) { + auto fuse_creater = [&](Node* input0, Node* mul0, Node* mul1, Node* mul2, + Node* mul0_out, Node* mul1_out, Node* mul2_out, + Node* mul0_w, Node* mul1_w, Node* mul2_w, + Node* eltadd0_b, Node* eltadd1_b, Node* eltadd2_b, + Node* eltadd_qk_b, Node* reshape2, + Node* reshape2_qkv_out, Node* matmul_qk) { auto scale_attr = BOOST_GET_CONST(float, matmul_qk->Op()->GetAttr("alpha")); // mul (B * S * Hidden) x (Hidden * 3 * N * H) = (B * S * 3 * N * H) diff --git a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass_tester.cc b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass_tester.cc index b121436ee87..858ebf68b40 100644 --- a/paddle/fluid/framework/ir/multihead_matmul_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/multihead_matmul_fuse_pass_tester.cc @@ -9,8 +9,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/multihead_matmul_fuse_pass.h" // NOLINT #include + +#include "paddle/fluid/framework/ir/multihead_matmul_fuse_pass.h" // NOLINT #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/framework/ir/node_test.cc b/paddle/fluid/framework/ir/node_test.cc index 9c47df402bd..2d84162e13a 100644 --- a/paddle/fluid/framework/ir/node_test.cc +++ b/paddle/fluid/framework/ir/node_test.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/node.h" + #include "gtest/gtest.h" #include "paddle/fluid/framework/var_desc.h" diff --git a/paddle/fluid/framework/ir/op_compat_sensible_pass.cc b/paddle/fluid/framework/ir/op_compat_sensible_pass.cc index 73a8691f9e2..e309e068563 100644 --- a/paddle/fluid/framework/ir/op_compat_sensible_pass.cc +++ b/paddle/fluid/framework/ir/op_compat_sensible_pass.cc @@ -13,9 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/op_compat_sensible_pass.h" + #include #include #include + #include "paddle/fluid/framework/op_def_api.h" #include "paddle/fluid/framework/op_info.h" diff --git a/paddle/fluid/framework/ir/op_compat_sensible_pass.h b/paddle/fluid/framework/ir/op_compat_sensible_pass.h index e24294a03a2..393a2fb9392 100644 --- a/paddle/fluid/framework/ir/op_compat_sensible_pass.h +++ b/paddle/fluid/framework/ir/op_compat_sensible_pass.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/pass.h" diff --git a/paddle/fluid/framework/ir/op_compat_sensible_pass_tester.cc b/paddle/fluid/framework/ir/op_compat_sensible_pass_tester.cc index 756d3c2c770..4b106d75f1c 100644 --- a/paddle/fluid/framework/ir/op_compat_sensible_pass_tester.cc +++ b/paddle/fluid/framework/ir/op_compat_sensible_pass_tester.cc @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/op_compat_sensible_pass.h" #include "gtest/gtest.h" +#include "paddle/fluid/framework/ir/op_compat_sensible_pass.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/program_desc.h" diff --git a/paddle/fluid/framework/ir/pass.cc b/paddle/fluid/framework/ir/pass.cc index 2c10a68188e..85eecbd014e 100644 --- a/paddle/fluid/framework/ir/pass.cc +++ b/paddle/fluid/framework/ir/pass.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/ir/pass.h" #include + #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/op_proto_maker.h" @@ -90,9 +91,10 @@ static void MergePrograms(ProgramDesc *dst, const details::ProgramDescs &srcs, bool reverse = !append; auto create_var_visitor = [dst](const ProgramDesc &src) { - PADDLE_ENFORCE_EQ(src.Size(), 1, platform::errors::Unimplemented( - "MergePrograms can only support to " - "merge program with only one block.")); + PADDLE_ENFORCE_EQ( + src.Size(), 1, + platform::errors::Unimplemented("MergePrograms can only support to " + "merge program with only one block.")); const auto &src_block = src.Block(0); auto *dst_block = dst->MutableBlock(0); for (const auto *src_new_var : src_block.AllVars()) { diff --git a/paddle/fluid/framework/ir/pass_test.cc b/paddle/fluid/framework/ir/pass_test.cc index 616ba7f1a97..8c368a796ed 100644 --- a/paddle/fluid/framework/ir/pass_test.cc +++ b/paddle/fluid/framework/ir/pass_test.cc @@ -84,8 +84,9 @@ TEST(PassTest, TestPassAttrCheck) { } catch (paddle::platform::EnforceNotMet& e) { exception = std::string(e.what()); } - std::string msg = "Invalid type for attritube test_pass_attr, expected: " + - try_type + ", actual: int"; + std::string msg = + "Invalid type for attritube test_pass_attr, expected: " + try_type + + ", actual: int"; ASSERT_TRUE(exception.find(msg) != exception.npos); } @@ -168,8 +169,9 @@ TEST(PassTest, TestPassAttrCheckConvertAllBlocks) { } catch (paddle::platform::EnforceNotMet& e) { exception = std::string(e.what()); } - std::string msg = "Invalid type for attritube test_pass_attr, expected: " + - try_type + ", actual: int"; + std::string msg = + "Invalid type for attritube test_pass_attr, expected: " + try_type + + ", actual: int"; ASSERT_TRUE(exception.find(msg) != exception.npos); } diff --git a/paddle/fluid/framework/ir/pass_test_util.cc b/paddle/fluid/framework/ir/pass_test_util.cc index 4d8965918f8..40dcb3cf1db 100644 --- a/paddle/fluid/framework/ir/pass_test_util.cc +++ b/paddle/fluid/framework/ir/pass_test_util.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/ir/pass_test_util.h" + #include #include #include @@ -23,7 +25,6 @@ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/ir/graph_traits.h" #include "paddle/fluid/framework/ir/pass.h" -#include "paddle/fluid/framework/ir/pass_test_util.h" #include "paddle/fluid/framework/ir/pass_tester_helper.h" #include "paddle/fluid/framework/op_proto_maker.h" diff --git a/paddle/fluid/framework/ir/pass_tester_helper.h b/paddle/fluid/framework/ir/pass_tester_helper.h index acefde9df68..ad58e4e4a0c 100644 --- a/paddle/fluid/framework/ir/pass_tester_helper.h +++ b/paddle/fluid/framework/ir/pass_tester_helper.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/operator.h" diff --git a/paddle/fluid/framework/ir/placement_pass_base.cc b/paddle/fluid/framework/ir/placement_pass_base.cc index 35ba9200607..fd1b54f8c4d 100644 --- a/paddle/fluid/framework/ir/placement_pass_base.cc +++ b/paddle/fluid/framework/ir/placement_pass_base.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/placement_pass_base.h" + #include + #include "paddle/fluid/framework/operator.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/preln_skip_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/preln_skip_layernorm_fuse_pass.cc index 6c06b741adb..80e6c2b7967 100644 --- a/paddle/fluid/framework/ir/preln_skip_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/preln_skip_layernorm_fuse_pass.cc @@ -43,8 +43,8 @@ struct PrelnSkipLayerNorm : public PatternBase { PATTERN_DECL_NODE(layer_norm); // declare variable node's name PATTERN_DECL_NODE( - elementwise_out); // (elementwise_input_x,elementwise_input_y) -> - // elementwise_out + elementwise_out); // (elementwise_input_x,elementwise_input_y) + // -> elementwise_out PATTERN_DECL_NODE(layer_norm_bias); PATTERN_DECL_NODE(layer_norm_scale); PATTERN_DECL_NODE(layer_norm_out); diff --git a/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc b/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc index a03a6f5b2c7..a2dd846ba52 100644 --- a/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.h" + #include #include "paddle/fluid/framework/op_version_registry.h" @@ -145,9 +146,9 @@ void BuildRepeatedFCReluPattern(PDPattern* pattern, return x->outputs[fc_idx]->outputs[0]; }; - auto var_next_is_fc_act_repeated_n_times = [=]( - Node* x, int repeated_times, const std::string& act_type = "relu", - bool check_in_has_only_one_out = true) -> bool { + auto var_next_is_fc_act_repeated_n_times = + [=](Node* x, int repeated_times, const std::string& act_type = "relu", + bool check_in_has_only_one_out = true) -> bool { for (int i = 0; i < repeated_times; ++i) { if (!var_next_is_fc_act(x, act_type, i == 0 && check_in_has_only_one_out)) { @@ -191,9 +192,9 @@ void BuildRepeatedFCReluPattern(PDPattern* pattern, return nullptr; }; - auto var_before_is_fc_act_repeated_n_times = [=]( - Node* x, int repeated_times, - const std::string& act_type = "relu") -> bool { + auto var_before_is_fc_act_repeated_n_times = [=](Node* x, int repeated_times, + const std::string& act_type = + "relu") -> bool { for (int i = 0; i < repeated_times; ++i) { if (!var_before_is_fc_act(x, act_type, i == repeated_times - 1)) { return false; diff --git a/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass_tester.cc b/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass_tester.cc index f0ff77acf9f..3112b776ae5 100644 --- a/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass_tester.cc @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.h" - #include + #include "paddle/fluid/framework/ir/pass_tester_helper.h" +#include "paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/runtime_context_cache_pass.cc b/paddle/fluid/framework/ir/runtime_context_cache_pass.cc index 778e658354f..451e41e767d 100644 --- a/paddle/fluid/framework/ir/runtime_context_cache_pass.cc +++ b/paddle/fluid/framework/ir/runtime_context_cache_pass.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/runtime_context_cache_pass.h" + #include "paddle/fluid/framework/operator.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc b/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc index 9fa951920f4..2c0b142c98f 100644 --- a/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc +++ b/paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.h" + #include #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/framework/ir/seqpool_concat_fuse_pass.cc b/paddle/fluid/framework/ir/seqpool_concat_fuse_pass.cc index 2b084bd5734..052b0a4bdc1 100644 --- a/paddle/fluid/framework/ir/seqpool_concat_fuse_pass.cc +++ b/paddle/fluid/framework/ir/seqpool_concat_fuse_pass.cc @@ -44,8 +44,8 @@ PDNode* BuildSeqPoolConcatPattern(PDPattern* pattern, is_concat_op_with_inputs(x->outputs[0], num_inputs); }; - auto is_seqpool_op_with_pootype_of_nth_input_of_concat = [=]( - Node* x, const std::string& type, int idx) -> bool { + auto is_seqpool_op_with_pootype_of_nth_input_of_concat = + [=](Node* x, const std::string& type, int idx) -> bool { bool this_is_seqpool_op = x && x->IsOp() && x->Op()->Type() == "sequence_pool" && x->Op()->HasAttr("pooltype") && diff --git a/paddle/fluid/framework/ir/seqpool_concat_fuse_pass_tester.cc b/paddle/fluid/framework/ir/seqpool_concat_fuse_pass_tester.cc index d3668038518..e56ba9ad1e7 100644 --- a/paddle/fluid/framework/ir/seqpool_concat_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/seqpool_concat_fuse_pass_tester.cc @@ -12,8 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/seqpool_concat_fuse_pass.h" #include + +#include "paddle/fluid/framework/ir/seqpool_concat_fuse_pass.h" #include "paddle/fluid/framework/op_proto_maker.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass.cc b/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass.cc index 7200e0ac1d4..916adbbe337 100644 --- a/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass.cc +++ b/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass.cc @@ -44,11 +44,11 @@ static void GetConcatNodes(ir::Graph* graph, std::vector* concat_nodes) { GraphPatternDetector gpd; auto* pattern = gpd.mutable_pattern(); auto concat_op_node = BuildCVMConcatPattern(pattern); - GraphPatternDetector::handle_t handler = [&]( - const GraphPatternDetector::subgraph_t& subgraph, Graph* graph) { - Node* concat_op = subgraph.at(concat_op_node); - concat_nodes->push_back(concat_op); - }; + GraphPatternDetector::handle_t handler = + [&](const GraphPatternDetector::subgraph_t& subgraph, Graph* graph) { + Node* concat_op = subgraph.at(concat_op_node); + concat_nodes->push_back(concat_op); + }; gpd(graph, handler); } } // anonymous namespace @@ -148,19 +148,19 @@ void SeqPoolCVMConcatFusePass::ApplyImpl(ir::Graph* graph) const { Node* cvm_input_of_cvm; Node* concat_out_var = concat_node->outputs[0]; - GraphPatternDetector::handle_t handler = [&]( - const GraphPatternDetector::subgraph_t& subgraph, Graph* graph) { - Node* seqpool_in_var = subgraph.at(seqpool_in_var_node); - Node* seqpool_op = subgraph.at(seqpool_op_node); - Node* seqpool_out_var = subgraph.at(seqpool_out_var_node); - Node* seqpool_idx_out_var = subgraph.at(seqpool_idx_out_var_node); - Node* cvm_op = subgraph.at(cvm_op_node); - Node* cvm_out_var = subgraph.at(cvm_out_var_node); - cvm_input_of_cvm = subgraph.at(cvm_cvm_in_var_node); - marked_nodes.insert({seqpool_op, seqpool_out_var, seqpool_idx_out_var, - cvm_op, cvm_out_var, concat_node}); - ins_to_concat[cvm_out_var->Name()] = seqpool_in_var; - }; + GraphPatternDetector::handle_t handler = + [&](const GraphPatternDetector::subgraph_t& subgraph, Graph* graph) { + Node* seqpool_in_var = subgraph.at(seqpool_in_var_node); + Node* seqpool_op = subgraph.at(seqpool_op_node); + Node* seqpool_out_var = subgraph.at(seqpool_out_var_node); + Node* seqpool_idx_out_var = subgraph.at(seqpool_idx_out_var_node); + Node* cvm_op = subgraph.at(cvm_op_node); + Node* cvm_out_var = subgraph.at(cvm_out_var_node); + cvm_input_of_cvm = subgraph.at(cvm_cvm_in_var_node); + marked_nodes.insert({seqpool_op, seqpool_out_var, seqpool_idx_out_var, + cvm_op, cvm_out_var, concat_node}); + ins_to_concat[cvm_out_var->Name()] = seqpool_in_var; + }; gpd(graph, handler); if (!ins_to_concat.empty()) { diff --git a/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass_tester.cc b/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass_tester.cc index bba640cf148..8d8ebc955d3 100644 --- a/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass_tester.cc @@ -12,8 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass.h" #include + +#include "paddle/fluid/framework/ir/seqpool_cvm_concat_fuse_pass.h" #include "paddle/fluid/framework/op_proto_maker.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc b/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc index bcd7bedcc43..9007105950b 100644 --- a/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc +++ b/paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc @@ -12,9 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/ir/shuffle_channel_detect_pass.h" + #include -#include "paddle/fluid/framework/ir/shuffle_channel_detect_pass.h" #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/simplify_with_basic_ops_pass_tester.cc b/paddle/fluid/framework/ir/simplify_with_basic_ops_pass_tester.cc index 80f387c4427..908797163d2 100644 --- a/paddle/fluid/framework/ir/simplify_with_basic_ops_pass_tester.cc +++ b/paddle/fluid/framework/ir/simplify_with_basic_ops_pass_tester.cc @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/simplify_with_basic_ops_pass.h" - #include + #include "paddle/fluid/framework/ir/pass_tester_helper.h" +#include "paddle/fluid/framework/ir/simplify_with_basic_ops_pass.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.cc index bfa14d9296b..6bebe8de9f2 100644 --- a/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/skip_layernorm_fuse_pass.cc @@ -43,8 +43,8 @@ struct SkipLayerNorm : public PatternBase { PATTERN_DECL_NODE(layer_norm); // declare variable node's name PATTERN_DECL_NODE( - elementwise_out); // (elementwise_input_x,elementwise_input_y) -> - // elementwise_out + elementwise_out); // (elementwise_input_x,elementwise_input_y) + // -> elementwise_out PATTERN_DECL_NODE(layer_norm_bias); PATTERN_DECL_NODE(layer_norm_scale); PATTERN_DECL_NODE(layer_norm_out); diff --git a/paddle/fluid/framework/ir/skip_layernorm_fuse_pass_tester.cc b/paddle/fluid/framework/ir/skip_layernorm_fuse_pass_tester.cc index 29be2c3cb09..c95fd0abd52 100644 --- a/paddle/fluid/framework/ir/skip_layernorm_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/skip_layernorm_fuse_pass_tester.cc @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/skip_layernorm_fuse_pass.h" - #include + #include "paddle/fluid/framework/ir/pass_tester_helper.h" +#include "paddle/fluid/framework/ir/skip_layernorm_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.cc b/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.cc index 7c43b022182..a8c7150d6e3 100644 --- a/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.cc +++ b/paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.cc @@ -170,8 +170,9 @@ PDNode* BuildSquaredMatSubPattern(PDPattern* pattern, auto* matmul_xy_op = pattern->NewNode( [=](Node* x) { - return x && x->IsOp() && (x->Op()->Type() == "matmul_v2" || - x->Op()->Type() == "matmul") && + return x && x->IsOp() && + (x->Op()->Type() == "matmul_v2" || + x->Op()->Type() == "matmul") && is_fusion_first_mul_out(x->outputs[0]); }, name_scope + "/matmul_xy_op"); @@ -212,8 +213,9 @@ PDNode* BuildSquaredMatSubPattern(PDPattern* pattern, auto* matmul_squared_x_y_op = pattern->NewNode( [=](Node* x) { - return x && x->IsOp() && (x->Op()->Type() == "matmul_v2" || - x->Op()->Type() == "matmul") && + return x && x->IsOp() && + (x->Op()->Type() == "matmul_v2" || + x->Op()->Type() == "matmul") && is_fusion_mat_squared_x_y_op_out(x->outputs[0]); }, name_scope + "/matmul_squared_x_y_op"); diff --git a/paddle/fluid/framework/ir/sync_batch_norm_pass_tester.cc b/paddle/fluid/framework/ir/sync_batch_norm_pass_tester.cc index 94fb6850641..78dafaa1e2f 100644 --- a/paddle/fluid/framework/ir/sync_batch_norm_pass_tester.cc +++ b/paddle/fluid/framework/ir/sync_batch_norm_pass_tester.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include #include "paddle/fluid/framework/ir/pass.h" diff --git a/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc b/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc index bda6b903864..6802310383d 100644 --- a/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc +++ b/paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.h" + #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/trt_map_matmul_to_mul_pass.cc b/paddle/fluid/framework/ir/trt_map_matmul_to_mul_pass.cc index d3211c08414..a6e3780fd22 100644 --- a/paddle/fluid/framework/ir/trt_map_matmul_to_mul_pass.cc +++ b/paddle/fluid/framework/ir/trt_map_matmul_to_mul_pass.cc @@ -16,9 +16,9 @@ #include #include + #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/op_proto_maker.h" - #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc b/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc index 798a038f767..2e3e957fd15 100644 --- a/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc +++ b/paddle/fluid/framework/ir/trt_multihead_matmul_fuse_pass.cc @@ -51,11 +51,12 @@ static int BuildFusion(Graph* graph, const std::string& name_scope) { multihead_pattern(); // Create New OpDesc - auto fuse_creater = [&]( - Node* input0, Node* mul0, Node* mul1, Node* mul2, Node* mul0_out, - Node* mul1_out, Node* mul2_out, Node* eltadd0_b, Node* eltadd1_b, - Node* eltadd2_b, Node* eltadd_qk_b, Node* reshape2, - Node* reshape2_qkv_out, Node* scale, Node* scale_out) { + auto fuse_creater = [&](Node* input0, Node* mul0, Node* mul1, Node* mul2, + Node* mul0_out, Node* mul1_out, Node* mul2_out, + Node* eltadd0_b, Node* eltadd1_b, Node* eltadd2_b, + Node* eltadd_qk_b, Node* reshape2, + Node* reshape2_qkv_out, Node* scale, + Node* scale_out) { auto scale_attr = BOOST_GET_CONST(float, scale->Op()->GetAttr("scale")); // auto scale_bias = BOOST_GET_CONST(float, scale->Op()->GetAttr("bias")); // bool after_scale = @@ -756,13 +757,14 @@ int TrtMultiHeadMatmulV2FusePass::BuildFusionV2(Graph* graph, multihead_pattern(); // Create New OpDesc - auto fuse_creater = [&]( - Node* input0, Node* mul0, Node* mul1, Node* mul2, Node* mul0_out, - Node* mul1_out, Node* mul2_out, Node* mul0_w, Node* mul1_w, Node* mul2_w, - Node* eltadd0_b, Node* eltadd1_b, Node* eltadd2_b, Node* eltadd_qk_b, - Node* reshape2, Node* reshape2_qkv_out, Node* scale, Node* scale_out, - Node* softmax_qk, Node* eltadd0, Node* eltadd1, Node* eltadd2, - Node* matmul_qk, Node* reshape2_qkv) { + auto fuse_creater = [&](Node* input0, Node* mul0, Node* mul1, Node* mul2, + Node* mul0_out, Node* mul1_out, Node* mul2_out, + Node* mul0_w, Node* mul1_w, Node* mul2_w, + Node* eltadd0_b, Node* eltadd1_b, Node* eltadd2_b, + Node* eltadd_qk_b, Node* reshape2, + Node* reshape2_qkv_out, Node* scale, Node* scale_out, + Node* softmax_qk, Node* eltadd0, Node* eltadd1, + Node* eltadd2, Node* matmul_qk, Node* reshape2_qkv) { auto scale_attr = BOOST_GET_CONST(float, scale->Op()->GetAttr("scale")); // mul (B * S * Hidden) x (Hidden * 3 * N * H) = (B * S * 3 * N * H) @@ -1229,11 +1231,12 @@ int TrtMultiHeadMatmulV3FusePass::BuildFusionV3(Graph* graph, multihead_pattern(); // Create New OpDesc - auto fuse_creater = [&]( - Node* input0, Node* mul0, Node* mul1, Node* mul2, Node* mul0_out, - Node* mul1_out, Node* mul2_out, Node* mul0_w, Node* mul1_w, Node* mul2_w, - Node* eltadd0_b, Node* eltadd1_b, Node* eltadd2_b, Node* eltadd_qk_b, - Node* reshape2, Node* reshape2_qkv_out, Node* matmul_qk) { + auto fuse_creater = [&](Node* input0, Node* mul0, Node* mul1, Node* mul2, + Node* mul0_out, Node* mul1_out, Node* mul2_out, + Node* mul0_w, Node* mul1_w, Node* mul2_w, + Node* eltadd0_b, Node* eltadd1_b, Node* eltadd2_b, + Node* eltadd_qk_b, Node* reshape2, + Node* reshape2_qkv_out, Node* matmul_qk) { auto scale_attr = BOOST_GET_CONST(float, matmul_qk->Op()->GetAttr("alpha")); // mul (B * S * Hidden) x (Hidden * 3 * N * H) = (B * S * 3 * N * H) diff --git a/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc index 53452d4239a..13883909435 100644 --- a/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc @@ -43,8 +43,8 @@ struct TrtSkipLayerNorm : public PatternBase { PATTERN_DECL_NODE(layer_norm); // declare variable node's name PATTERN_DECL_NODE( - elementwise_out); // (elementwise_input_x,elementwise_input_y) -> - // elementwise_out + elementwise_out); // (elementwise_input_x,elementwise_input_y) + // -> elementwise_out PATTERN_DECL_NODE(layer_norm_bias); PATTERN_DECL_NODE(layer_norm_scale); PATTERN_DECL_NODE(layer_norm_out); diff --git a/paddle/fluid/framework/ir/unsqueeze2_eltwise_fuse_pass_tester.cc b/paddle/fluid/framework/ir/unsqueeze2_eltwise_fuse_pass_tester.cc index 067a37c611a..3ebd61ff575 100644 --- a/paddle/fluid/framework/ir/unsqueeze2_eltwise_fuse_pass_tester.cc +++ b/paddle/fluid/framework/ir/unsqueeze2_eltwise_fuse_pass_tester.cc @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/ir/unsqueeze2_eltwise_fuse_pass.h" - #include + #include "paddle/fluid/framework/ir/pass_tester_helper.h" +#include "paddle/fluid/framework/ir/unsqueeze2_eltwise_fuse_pass.h" #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { diff --git a/paddle/fluid/framework/ir/yolo_box_fuse_pass.cc b/paddle/fluid/framework/ir/yolo_box_fuse_pass.cc index 20075a49749..19836b69ae9 100644 --- a/paddle/fluid/framework/ir/yolo_box_fuse_pass.cc +++ b/paddle/fluid/framework/ir/yolo_box_fuse_pass.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/ir/yolo_box_fuse_pass.h" + #include + #include "glog/logging.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/pass.h" diff --git a/paddle/fluid/framework/lod_tensor.h b/paddle/fluid/framework/lod_tensor.h index 1c5c12b3d57..dd316a0979c 100644 --- a/paddle/fluid/framework/lod_tensor.h +++ b/paddle/fluid/framework/lod_tensor.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include #include #include diff --git a/paddle/fluid/framework/lod_tensor_array.h b/paddle/fluid/framework/lod_tensor_array.h index 36a5c3c5d60..7aa180ed75c 100644 --- a/paddle/fluid/framework/lod_tensor_array.h +++ b/paddle/fluid/framework/lod_tensor_array.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/lod_tensor.h" namespace paddle { diff --git a/paddle/fluid/framework/lod_tensor_test.cc b/paddle/fluid/framework/lod_tensor_test.cc index a89baac3e7a..254e70231ea 100644 --- a/paddle/fluid/framework/lod_tensor_test.cc +++ b/paddle/fluid/framework/lod_tensor_test.cc @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/lod_tensor.h" + #include #include -#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/phi/core/lod_utils.h" namespace paddle { diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index dba3b3ff1e6..1c2740c2b2e 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/naive_executor.h" + #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/variable_helper.h" #include "paddle/fluid/platform/denormal.h" diff --git a/paddle/fluid/framework/naive_executor_test.cc b/paddle/fluid/framework/naive_executor_test.cc index 2f3c3f3d06e..763e314d226 100644 --- a/paddle/fluid/framework/naive_executor_test.cc +++ b/paddle/fluid/framework/naive_executor_test.cc @@ -13,8 +13,11 @@ // limitations under the License. #include "paddle/fluid/framework/naive_executor.h" + #include + #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/program_desc.h" diff --git a/paddle/fluid/framework/new_executor/data_transfer.cc b/paddle/fluid/framework/new_executor/data_transfer.cc index d0e5565139c..171e15162fb 100644 --- a/paddle/fluid/framework/new_executor/data_transfer.cc +++ b/paddle/fluid/framework/new_executor/data_transfer.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/new_executor/data_transfer.h" + #include "paddle/fluid/framework/convert_utils.h" namespace paddle { @@ -276,9 +277,9 @@ std::shared_ptr TransferDevice(const std::string& var_name, // 2. Construct VariableNameMap VariableNameMap in_name_map = {{"X", {var_name}}}; VariableNameMap out_name_map = {{"Out", {*new_var_name}}}; - int dst_place_type = platform::is_cpu_place(dst_place) - ? 0 - : platform::is_gpu_place(dst_place) ? 1 : -1; + int dst_place_type = platform::is_cpu_place(dst_place) ? 0 + : platform::is_gpu_place(dst_place) ? 1 + : -1; AttributeMap attr_map = {{"dst_place_type", dst_place_type}}; // 3. Create memcpy_d2h_op or memcpy_h2d_op diff --git a/paddle/fluid/framework/new_executor/event_manager.cc b/paddle/fluid/framework/new_executor/event_manager.cc index bca2264b66a..0bfa00494d6 100644 --- a/paddle/fluid/framework/new_executor/event_manager.cc +++ b/paddle/fluid/framework/new_executor/event_manager.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/new_executor/event_manager.h" + #include "paddle/fluid/platform/profiler/event_tracing.h" namespace paddle { diff --git a/paddle/fluid/framework/new_executor/executor_statistics.cc b/paddle/fluid/framework/new_executor/executor_statistics.cc index fb79712d47d..f6afcf2f24d 100644 --- a/paddle/fluid/framework/new_executor/executor_statistics.cc +++ b/paddle/fluid/framework/new_executor/executor_statistics.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/new_executor/executor_statistics.h" + #include #include #include @@ -21,6 +22,7 @@ #include #include #include + #include "glog/logging.h" #include "paddle/fluid/platform/flags.h" #include "paddle/fluid/platform/os_info.h" @@ -520,7 +522,7 @@ void StatisticsEngine::MergeEvents(std::function merger, int StatisticsEngine::MergeInnerthreadEvents( std::vector>* all_evts) { - auto merger = [& priorities = priorities_](size_t idx1, size_t idx2) { + auto merger = [&priorities = priorities_](size_t idx1, size_t idx2) { return priorities[idx1].innerthread_priority <= priorities[idx2].innerthread_priority ? idx1 @@ -541,7 +543,7 @@ int StatisticsEngine::MergeInnerthreadEvents( int StatisticsEngine::MergeInterthreadEvents( std::vector>* all_evts) { - auto merger = [& priorities = priorities_](size_t idx1, size_t idx2) { + auto merger = [&priorities = priorities_](size_t idx1, size_t idx2) { return priorities[idx1].interthread_priority <= priorities[idx2].interthread_priority ? idx1 diff --git a/paddle/fluid/framework/new_executor/executor_statistics.h b/paddle/fluid/framework/new_executor/executor_statistics.h index 530e9455968..ebe9d3a2e79 100644 --- a/paddle/fluid/framework/new_executor/executor_statistics.h +++ b/paddle/fluid/framework/new_executor/executor_statistics.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/platform/profiler/event_node.h" namespace paddle { diff --git a/paddle/fluid/framework/new_executor/garbage_collector/event_garbage_collector.cc b/paddle/fluid/framework/new_executor/garbage_collector/event_garbage_collector.cc index 46c85a22dc3..1ae9f4223d3 100644 --- a/paddle/fluid/framework/new_executor/garbage_collector/event_garbage_collector.cc +++ b/paddle/fluid/framework/new_executor/garbage_collector/event_garbage_collector.cc @@ -110,7 +110,7 @@ void InterpreterCoreEventGarbageCollector::Free( const platform::DeviceContext* ctx) { event->Record(ctx); event->SetFininshed(); // Only for CPU Event - queue_->AddTask([ container = garbages, event = event ]() { + queue_->AddTask([container = garbages, event = event]() { while (!event->Query()) { #if defined(_WIN32) SleepEx(50, FALSE); @@ -128,7 +128,7 @@ void InterpreterCoreEventGarbageCollector::Free( const platform::DeviceContext* ctx) { event->Record(ctx); event->SetFininshed(); // Only for CPU Event - queue_->AddTask([ container = garbage, event = event ]() { + queue_->AddTask([container = garbage, event = event]() { while (!event->Query()) { #if defined(_WIN32) SleepEx(50, FALSE); diff --git a/paddle/fluid/framework/new_executor/garbage_collector/event_garbage_collector.h b/paddle/fluid/framework/new_executor/garbage_collector/event_garbage_collector.h index 33954713d4e..57963269663 100644 --- a/paddle/fluid/framework/new_executor/garbage_collector/event_garbage_collector.h +++ b/paddle/fluid/framework/new_executor/garbage_collector/event_garbage_collector.h @@ -14,6 +14,7 @@ #pragma once #include + #include "paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.h" #include "paddle/fluid/framework/new_executor/workqueue/workqueue.h" diff --git a/paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.cc b/paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.cc index a20cd275398..8e849c79bd2 100644 --- a/paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.cc +++ b/paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.h" + #include "paddle/fluid/framework/garbage_collector.h" namespace paddle { diff --git a/paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.h b/paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.h index 34f95eee731..d0159c0ca83 100644 --- a/paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.h +++ b/paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.h @@ -14,6 +14,7 @@ #pragma once #include + #include "paddle/fluid/memory/allocation/spin_lock.h" #include "paddle/fluid/platform/device_event.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/new_executor/interpretercore.cc b/paddle/fluid/framework/new_executor/interpretercore.cc index da2fd0c8c61..fe0c7fe0721 100644 --- a/paddle/fluid/framework/new_executor/interpretercore.cc +++ b/paddle/fluid/framework/new_executor/interpretercore.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/new_executor/interpretercore.h" + #include + #include "paddle/fluid/framework/details/nan_inf_utils.h" #include "paddle/fluid/framework/details/share_tensor_buffer_functor.h" #include "paddle/fluid/framework/new_executor/garbage_collector/event_garbage_collector.h" @@ -585,10 +587,12 @@ void InterpreterCore::ExecuteInstructionList( for (size_t i = 0; i < dependecy_count_.size(); ++i) { if (dependecy_count_[i] == 0) { - async_work_queue_->AddTask(vec_instr.at(i).KernelType(), [ - this, i, atomic_deps = atomic_deps.get(), - atomic_var_ref = atomic_var_ref.get() - ] { RunInstructionAsync(i, atomic_deps, atomic_var_ref); }); + async_work_queue_->AddTask(vec_instr.at(i).KernelType(), + [this, i, atomic_deps = atomic_deps.get(), + atomic_var_ref = atomic_var_ref.get()] { + RunInstructionAsync(i, atomic_deps, + atomic_var_ref); + }); } } @@ -692,10 +696,10 @@ void InterpreterCore::RunInstructionAsync( ready_ops.pop(); auto& instr_node = vec_instruction_.at(instr_id); VLOG(5) << __func__ << " OP id:" << instr_node.Id() - << " name:" << instr_node.OpBase()->Type() - << " type:" << (instr_node.KernelType() == OpFuncType::kQueueSync - ? "kQueueSync" - : "kQueueAsync") + << " name:" << instr_node.OpBase()->Type() << " type:" + << (instr_node.KernelType() == OpFuncType::kQueueSync + ? "kQueueSync" + : "kQueueAsync") << " runs on " << platform::GetCurrentThreadName(); auto* op = instr_node.OpBase(); diff --git a/paddle/fluid/framework/new_executor/interpretercore_util.cc b/paddle/fluid/framework/new_executor/interpretercore_util.cc index f601a4ad28b..0b75964b94e 100644 --- a/paddle/fluid/framework/new_executor/interpretercore_util.cc +++ b/paddle/fluid/framework/new_executor/interpretercore_util.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/new_executor/interpretercore_util.h" + #include #include "paddle/fluid/framework/executor_gc_helper.h" @@ -398,9 +399,10 @@ void build_op_func_list(const platform::Place& place, // But some OPs do have such behavior (e.g., cinn_launch OP). Here special // treatment for them. if (op_with_kernel->Type() == "cinn_launch") { - VLOG(6) << "OP(" << op_with_kernel->Type() << ") use scope in kernel, " - "so pass a real scope to " - "ExecutionContext"; + VLOG(6) << "OP(" << op_with_kernel->Type() + << ") use scope in kernel, " + "so pass a real scope to " + "ExecutionContext"; runtime_scope = local_scope; } @@ -747,8 +749,9 @@ std::map> get_downstream_map( std::map> build_op_downstream_map( const std::vector& vec_instruction, std::vector>* op_happens_before) { - auto var2min_rw_op = std::map< - int, std::list>(); // # map from variable id to read / write op id. + auto var2min_rw_op = + std::map>(); // # map from variable id to read / + // write op id. auto var2recent_write_op = std::map(); // # map from variable to recent write op. auto op2dependences = @@ -825,8 +828,14 @@ std::map> build_op_downstream_map( // add dependences for random op, make sure that the random op is scheduled // sequentially const std::set random_op_set = { - "bernoulli", "poisson", "multinomial", "gaussian_random", - "truncated_gaussian_random", "uniform_random", "randint", "randperm", + "bernoulli", + "poisson", + "multinomial", + "gaussian_random", + "truncated_gaussian_random", + "uniform_random", + "randint", + "randperm", "exponential", "sampling_id" "dropout", @@ -846,7 +855,10 @@ std::map> build_op_downstream_map( // add dependency for communication op auto is_comm_op = [](std::string op) -> bool { const std::set special_comm_op_set = { - "send", "recv", "send_v2", "recv_v2", + "send", + "recv", + "send_v2", + "recv_v2", }; const std::string communication_op_prefix = "c_"; if (op.find(communication_op_prefix) != std::string::npos || diff --git a/paddle/fluid/framework/new_executor/interpretercore_util.h b/paddle/fluid/framework/new_executor/interpretercore_util.h index 60ac3702f4b..3d5b067c187 100644 --- a/paddle/fluid/framework/new_executor/interpretercore_util.h +++ b/paddle/fluid/framework/new_executor/interpretercore_util.h @@ -22,10 +22,9 @@ #include #include -#include - #include #include +#include #include #include diff --git a/paddle/fluid/framework/new_executor/new_executor_defs.cc b/paddle/fluid/framework/new_executor/new_executor_defs.cc index c75a7871d63..1a4dd2edf27 100644 --- a/paddle/fluid/framework/new_executor/new_executor_defs.cc +++ b/paddle/fluid/framework/new_executor/new_executor_defs.cc @@ -12,12 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/new_executor/new_executor_defs.h" + #include #include #include #include -#include "paddle/fluid/framework/new_executor/new_executor_defs.h" #include "paddle/phi/core/utils/rw_lock.h" // When in inference scenario, the scopes will not be written by two threads in @@ -385,10 +386,11 @@ InterpretercoreInferShapeContext::GetOutputsVarType( void InterpretercoreInferShapeContext::SetOutputDim(const std::string& name, const DDim& dim) { auto& vars = OutputVars(name); - PADDLE_ENFORCE_EQ(vars.size(), 1UL, platform::errors::InvalidArgument( - "Output(%s) should hold one element, " - "but now it holds %zu elements.", - name, vars.size())); + PADDLE_ENFORCE_EQ( + vars.size(), 1UL, + platform::errors::InvalidArgument("Output(%s) should hold one element, " + "but now it holds %zu elements.", + name, vars.size())); SetDim(vars[0], dim); } @@ -653,8 +655,9 @@ void VariableScope::CheckExist(int id) const { } void VariableScope::CheckExist(const std::string& name) const { - PADDLE_ENFORCE_EQ(HasVar(name), true, platform::errors::NotFound( - "%s not in VariableScope.", name)); + PADDLE_ENFORCE_EQ( + HasVar(name), true, + platform::errors::NotFound("%s not in VariableScope.", name)); } void VariableScope::ClearListener() { @@ -709,8 +712,9 @@ void VariableScopeListener::onClear() {} Instruction::Instruction(size_t id, OpFuncNode&& op_func_node, const platform::DeviceContext& dev_ctx) : id_(id), op_func_node_(op_func_node), dev_ctx_(dev_ctx) { - PADDLE_ENFORCE_GE(id, 0, platform::errors::PreconditionNotMet( - "Required id >= 0, but received id = %d", id)); + PADDLE_ENFORCE_GE(id, 0, + platform::errors::PreconditionNotMet( + "Required id >= 0, but received id = %d", id)); } size_t Instruction::Id() const { return id_; } diff --git a/paddle/fluid/framework/new_executor/standalone_executor.cc b/paddle/fluid/framework/new_executor/standalone_executor.cc index 31315df5701..64332d7fc90 100644 --- a/paddle/fluid/framework/new_executor/standalone_executor.cc +++ b/paddle/fluid/framework/new_executor/standalone_executor.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/new_executor/standalone_executor.h" + #include "paddle/fluid/framework/new_executor/interpretercore_util.h" #include "paddle/fluid/platform/profiler/event_tracing.h" diff --git a/paddle/fluid/framework/new_executor/standalone_executor_test.cc b/paddle/fluid/framework/new_executor/standalone_executor_test.cc index 23bd777fae1..60d59899549 100644 --- a/paddle/fluid/framework/new_executor/standalone_executor_test.cc +++ b/paddle/fluid/framework/new_executor/standalone_executor_test.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include #include #include diff --git a/paddle/fluid/framework/new_executor/stream_analyzer.cc b/paddle/fluid/framework/new_executor/stream_analyzer.cc index fdcd19b0309..6c689c8548b 100644 --- a/paddle/fluid/framework/new_executor/stream_analyzer.cc +++ b/paddle/fluid/framework/new_executor/stream_analyzer.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/new_executor/stream_analyzer.h" + #include namespace paddle { diff --git a/paddle/fluid/framework/new_executor/stream_analyzer.h b/paddle/fluid/framework/new_executor/stream_analyzer.h index 2a276c6f509..8a6552c6883 100644 --- a/paddle/fluid/framework/new_executor/stream_analyzer.h +++ b/paddle/fluid/framework/new_executor/stream_analyzer.h @@ -15,6 +15,7 @@ #pragma once #include #include + #include "paddle/fluid/framework/new_executor/new_executor_defs.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_event.h" diff --git a/paddle/fluid/framework/new_executor/workqueue/event_count.h b/paddle/fluid/framework/new_executor/workqueue/event_count.h index 7a826c39907..7c20e12ff1f 100644 --- a/paddle/fluid/framework/new_executor/workqueue/event_count.h +++ b/paddle/fluid/framework/new_executor/workqueue/event_count.h @@ -54,6 +54,7 @@ #include #include #include + #include "glog/logging.h" namespace paddle { diff --git a/paddle/fluid/framework/new_executor/workqueue/events_waiter.cc b/paddle/fluid/framework/new_executor/workqueue/events_waiter.cc index 346e20d811e..dbe609427ad 100644 --- a/paddle/fluid/framework/new_executor/workqueue/events_waiter.cc +++ b/paddle/fluid/framework/new_executor/workqueue/events_waiter.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/new_executor/workqueue/events_waiter.h" + #include + #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/fluid/framework/new_executor/workqueue/events_waiter.h b/paddle/fluid/framework/new_executor/workqueue/events_waiter.h index 9d85f4a2724..9284ffa853a 100644 --- a/paddle/fluid/framework/new_executor/workqueue/events_waiter.h +++ b/paddle/fluid/framework/new_executor/workqueue/events_waiter.h @@ -20,6 +20,7 @@ #include #include #include + #include "paddle/fluid/framework/new_executor/workqueue/event_count.h" #include "paddle/fluid/memory/allocation/spin_lock.h" diff --git a/paddle/fluid/framework/new_executor/workqueue/nonblocking_threadpool.h b/paddle/fluid/framework/new_executor/workqueue/nonblocking_threadpool.h index 559eb6a7490..20aebfba8e8 100644 --- a/paddle/fluid/framework/new_executor/workqueue/nonblocking_threadpool.h +++ b/paddle/fluid/framework/new_executor/workqueue/nonblocking_threadpool.h @@ -12,6 +12,7 @@ #include #include #include + #include "glog/logging.h" #include "paddle/fluid/framework/new_executor/workqueue/event_count.h" #include "paddle/fluid/framework/new_executor/workqueue/run_queue.h" diff --git a/paddle/fluid/framework/new_executor/workqueue/run_queue.h b/paddle/fluid/framework/new_executor/workqueue/run_queue.h index 2fc42cf308a..7644425a484 100644 --- a/paddle/fluid/framework/new_executor/workqueue/run_queue.h +++ b/paddle/fluid/framework/new_executor/workqueue/run_queue.h @@ -42,6 +42,7 @@ #include #include #include + #include "paddle/fluid/framework/new_executor/workqueue/workqueue_utils.h" #include "paddle/fluid/memory/allocation/spin_lock.h" @@ -76,9 +77,8 @@ class RunQueue { unsigned front = front_.load(std::memory_order_relaxed); Elem* e = &array_[front & kMask]; uint8_t s = e->state.load(std::memory_order_relaxed); - if (s != kEmpty || - !e->state.compare_exchange_strong(s, kBusy, - std::memory_order_acquire)) { + if (s != kEmpty || !e->state.compare_exchange_strong( + s, kBusy, std::memory_order_acquire)) { return w; } front_.store(front + 1 + (kSize << 1), std::memory_order_relaxed); @@ -93,9 +93,8 @@ class RunQueue { unsigned front = front_.load(std::memory_order_relaxed); Elem* e = &array_[(front - 1) & kMask]; uint8_t s = e->state.load(std::memory_order_relaxed); - if (s != kReady || - !e->state.compare_exchange_strong(s, kBusy, - std::memory_order_acquire)) { + if (s != kReady || !e->state.compare_exchange_strong( + s, kBusy, std::memory_order_acquire)) { return Work(); } Work w = std::move(e->w); @@ -112,9 +111,8 @@ class RunQueue { unsigned back = back_.load(std::memory_order_relaxed); Elem* e = &array_[(back - 1) & kMask]; uint8_t s = e->state.load(std::memory_order_relaxed); - if (s != kEmpty || - !e->state.compare_exchange_strong(s, kBusy, - std::memory_order_acquire)) { + if (s != kEmpty || !e->state.compare_exchange_strong( + s, kBusy, std::memory_order_acquire)) { return w; } back = ((back - 1) & kMask2) | (back & ~kMask2); @@ -134,9 +132,8 @@ class RunQueue { unsigned back = back_.load(std::memory_order_relaxed); Elem* e = &array_[back & kMask]; uint8_t s = e->state.load(std::memory_order_relaxed); - if (s != kReady || - !e->state.compare_exchange_strong(s, kBusy, - std::memory_order_acquire)) { + if (s != kReady || !e->state.compare_exchange_strong( + s, kBusy, std::memory_order_acquire)) { return Work(); } Work w = std::move(e->w); @@ -163,9 +160,8 @@ class RunQueue { Elem* e = &array_[mid & kMask]; uint8_t s = e->state.load(std::memory_order_relaxed); if (n == 0) { - if (s != kReady || - !e->state.compare_exchange_strong(s, kBusy, - std::memory_order_acquire)) + if (s != kReady || !e->state.compare_exchange_strong( + s, kBusy, std::memory_order_acquire)) continue; start = mid; } else { diff --git a/paddle/fluid/framework/new_executor/workqueue/workqueue.cc b/paddle/fluid/framework/new_executor/workqueue/workqueue.cc index 0f0de8ef9b0..b06c540b756 100644 --- a/paddle/fluid/framework/new_executor/workqueue/workqueue.cc +++ b/paddle/fluid/framework/new_executor/workqueue/workqueue.cc @@ -5,6 +5,7 @@ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. #include "paddle/fluid/framework/new_executor/workqueue/workqueue.h" + #include "paddle/fluid/framework/new_executor/workqueue/nonblocking_threadpool.h" #include "paddle/fluid/framework/new_executor/workqueue/workqueue_utils.h" #include "paddle/fluid/platform/enforce.h" @@ -64,11 +65,8 @@ class WorkQueueImpl : public WorkQueue { platform::TracerEventType::UserDefined, 10 /*level*/); if (tracker_ != nullptr) { - fn = [ - task = std::move(fn), raii = CounterGuard(tracker_) - ]() mutable { - task(); - }; + fn = [task = std::move(fn), + raii = CounterGuard(tracker_)]() mutable { task(); }; } queue_->AddTask(std::move(fn)); } @@ -158,11 +156,8 @@ void WorkQueueGroupImpl::AddTask(size_t queue_idx, std::function fn) { 10 /*level*/); assert(queue_idx < queues_.size()); if (queues_options_.at(queue_idx).track_task) { - fn = [ - task = std::move(fn), raii = CounterGuard(tracker_) - ]() mutable { - task(); - }; + fn = [task = std::move(fn), + raii = CounterGuard(tracker_)]() mutable { task(); }; } queues_[queue_idx]->AddTask(std::move(fn)); } diff --git a/paddle/fluid/framework/new_executor/workqueue/workqueue.h b/paddle/fluid/framework/new_executor/workqueue/workqueue.h index 2c2576528fe..1a1900c5687 100644 --- a/paddle/fluid/framework/new_executor/workqueue/workqueue.h +++ b/paddle/fluid/framework/new_executor/workqueue/workqueue.h @@ -20,6 +20,7 @@ #include #include #include + #include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -118,10 +119,10 @@ class WorkQueue { std::bind(std::forward(f), std::forward(args)...); std::promise prom; std::future res = prom.get_future(); - AddTask([ - t = std::move(task), - p = FakeCopyable>(std::move(prom)) - ]() mutable { p.Get().set_value(t()); }); + AddTask([t = std::move(task), p = FakeCopyable>( + std::move(prom))]() mutable { + p.Get().set_value(t()); + }); return res; } @@ -158,10 +159,9 @@ class WorkQueueGroup { std::bind(std::forward(f), std::forward(args)...); std::promise prom; std::future res = prom.get_future(); - AddTask(queue_idx, [ - t = std::move(task), - p = FakeCopyable>(std::move(prom)) - ]() mutable { p.Get().set_value(t()); }); + AddTask(queue_idx, [t = std::move(task), + p = FakeCopyable>(std::move( + prom))]() mutable { p.Get().set_value(t()); }); return res; } diff --git a/paddle/fluid/framework/new_executor/workqueue/workqueue_test.cc b/paddle/fluid/framework/new_executor/workqueue/workqueue_test.cc index 857eaead5b6..3e38d0dbbf9 100644 --- a/paddle/fluid/framework/new_executor/workqueue/workqueue_test.cc +++ b/paddle/fluid/framework/new_executor/workqueue/workqueue_test.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/framework/new_executor/workqueue/workqueue.h" + #include #include + #include "glog/logging.h" #include "gtest/gtest.h" #include "paddle/fluid/framework/new_executor/workqueue/workqueue_utils.h" @@ -37,10 +39,10 @@ TEST(WorkQueueUtils, TestEventsWaiter) { TEST(WorkQueue, TestSingleThreadedWorkQueue) { VLOG(1) << "In Test"; - using paddle::framework::WorkQueueOptions; - using paddle::framework::WorkQueue; using paddle::framework::CreateSingleThreadedWorkQueue; using paddle::framework::EventsWaiter; + using paddle::framework::WorkQueue; + using paddle::framework::WorkQueueOptions; std::atomic finished{false}; std::atomic counter{0}; constexpr unsigned kLoopNum = 1000000; @@ -83,10 +85,10 @@ TEST(WorkQueue, TestSingleThreadedWorkQueue) { TEST(WorkQueue, TestMultiThreadedWorkQueue) { VLOG(1) << "In Test"; - using paddle::framework::WorkQueueOptions; - using paddle::framework::WorkQueue; using paddle::framework::CreateMultiThreadedWorkQueue; using paddle::framework::EventsWaiter; + using paddle::framework::WorkQueue; + using paddle::framework::WorkQueueOptions; std::atomic finished{false}; std::atomic counter{0}; constexpr unsigned kExternalLoopNum = 100; @@ -136,10 +138,10 @@ TEST(WorkQueue, TestMultiThreadedWorkQueue) { } TEST(WorkQueue, TestWorkQueueGroup) { - using paddle::framework::WorkQueueOptions; - using paddle::framework::WorkQueueGroup; using paddle::framework::CreateWorkQueueGroup; using paddle::framework::EventsWaiter; + using paddle::framework::WorkQueueGroup; + using paddle::framework::WorkQueueOptions; std::atomic finished{false}; std::atomic counter{0}; constexpr unsigned kExternalLoopNum = 100; diff --git a/paddle/fluid/framework/new_executor/workqueue/workqueue_utils.cc b/paddle/fluid/framework/new_executor/workqueue/workqueue_utils.cc index 82dcbbd509d..152f89d9ef0 100644 --- a/paddle/fluid/framework/new_executor/workqueue/workqueue_utils.cc +++ b/paddle/fluid/framework/new_executor/workqueue/workqueue_utils.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/new_executor/workqueue/workqueue_utils.h" + #include #include diff --git a/paddle/fluid/framework/new_executor/workqueue/workqueue_utils.h b/paddle/fluid/framework/new_executor/workqueue/workqueue_utils.h index b6e6ede8c33..380746c05d6 100644 --- a/paddle/fluid/framework/new_executor/workqueue/workqueue_utils.h +++ b/paddle/fluid/framework/new_executor/workqueue/workqueue_utils.h @@ -21,6 +21,7 @@ #include #include #include + #include "paddle/fluid/framework/new_executor/workqueue/events_waiter.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/no_need_buffer_vars_inference.cc b/paddle/fluid/framework/no_need_buffer_vars_inference.cc index 25f64838c6d..665c9b811fa 100644 --- a/paddle/fluid/framework/no_need_buffer_vars_inference.cc +++ b/paddle/fluid/framework/no_need_buffer_vars_inference.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/framework/no_need_buffer_vars_inference.h" + #include + #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/imperative/saved_variable_wrapper_list.h" diff --git a/paddle/fluid/framework/no_need_buffer_vars_inference_test.cc b/paddle/fluid/framework/no_need_buffer_vars_inference_test.cc index a92d52fd2e9..a2c7df763a7 100644 --- a/paddle/fluid/framework/no_need_buffer_vars_inference_test.cc +++ b/paddle/fluid/framework/no_need_buffer_vars_inference_test.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/no_need_buffer_vars_inference.h" + #include "gtest/gtest.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/imperative/layer.h" diff --git a/paddle/fluid/framework/op_def_api.cc b/paddle/fluid/framework/op_def_api.cc index 73f1409ae69..b62f17987e6 100644 --- a/paddle/fluid/framework/op_def_api.cc +++ b/paddle/fluid/framework/op_def_api.cc @@ -17,6 +17,7 @@ #define _LINUX #endif #include "paddle/fluid/framework/op_def_api.h" + #include #include #include @@ -28,6 +29,7 @@ #endif #include #include + #include "glog/logging.h" #include "paddle/fluid/framework/op_def.pb.h" diff --git a/paddle/fluid/framework/op_def_api.h b/paddle/fluid/framework/op_def_api.h index 1ef2254d0da..754b76663df 100644 --- a/paddle/fluid/framework/op_def_api.h +++ b/paddle/fluid/framework/op_def_api.h @@ -21,5 +21,5 @@ namespace framework { const proto::OpDef& GetOpDef(const std::string& op_name); bool HasOpDef(const std::string& op_name); -} -} +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index 87d3a048d0b..db2a411da00 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -495,8 +495,9 @@ bool OpDesc::HasProtoAttr(const std::string &name) const { proto::AttrType OpDesc::GetAttrType(const std::string &name) const { auto it = attrs_.find(name); - PADDLE_ENFORCE_NE(it, attrs_.end(), platform::errors::NotFound( - "Attribute %s is not found.", name)); + PADDLE_ENFORCE_NE( + it, attrs_.end(), + platform::errors::NotFound("Attribute %s is not found.", name)); return static_cast(it->second.which() - 1); } @@ -599,8 +600,9 @@ void OpDesc::SetAttrMap( Attribute OpDesc::GetAttr(const std::string &name) const { auto it = attrs_.find(name); - PADDLE_ENFORCE_NE(it, attrs_.end(), platform::errors::NotFound( - "Attribute %s is not found.", name)); + PADDLE_ENFORCE_NE( + it, attrs_.end(), + platform::errors::NotFound("Attribute %s is not found.", name)); return it->second; } @@ -854,10 +856,11 @@ bool CompileTimeInferShapeContext::HasInput(const std::string &name) const { if (length == 0) { return false; } - PADDLE_ENFORCE_EQ(length, 1UL, platform::errors::InvalidArgument( - "Input(%s) should have only one value, " - "but it has %d values now.", - name, length)); + PADDLE_ENFORCE_EQ( + length, 1UL, + platform::errors::InvalidArgument("Input(%s) should have only one value, " + "but it has %d values now.", + name, length)); return block_.HasVarRecursive(input_names[0]); } @@ -870,10 +873,11 @@ bool CompileTimeInferShapeContext::HasOutput(const std::string &name) const { if (length == 0) { return false; } - PADDLE_ENFORCE_EQ(length, 1UL, platform::errors::InvalidArgument( - "Output(%s) should have only one value, " - "but it has %d values now.", - name, length)); + PADDLE_ENFORCE_EQ(length, 1UL, + platform::errors::InvalidArgument( + "Output(%s) should have only one value, " + "but it has %d values now.", + name, length)); return block_.HasVarRecursive(output_names[0]); } diff --git a/paddle/fluid/framework/op_proto_maker.h b/paddle/fluid/framework/op_proto_maker.h index 903ee73b2c0..51aeed2e5d7 100644 --- a/paddle/fluid/framework/op_proto_maker.h +++ b/paddle/fluid/framework/op_proto_maker.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "glog/logging.h" #include "paddle/fluid/framework/attribute.h" namespace paddle { diff --git a/paddle/fluid/framework/op_registry_test.cc b/paddle/fluid/framework/op_registry_test.cc index 889b6b0c86b..8b77b1d260c 100644 --- a/paddle/fluid/framework/op_registry_test.cc +++ b/paddle/fluid/framework/op_registry_test.cc @@ -12,11 +12,11 @@ See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/op_registry.h" + #include #include -#include "paddle/fluid/framework/op_registry.h" - namespace pd = paddle::framework; namespace paddle { @@ -58,8 +58,9 @@ class MyTestOpProtoAndCheckerMaker : public OpProtoAndCheckerMaker { AddInput("input", "input of cosine op").AsDuplicable(); AddOutput("output", "output of cosine op").AsIntermediate(); auto my_checker = [](int i) { - PADDLE_ENFORCE_EQ(i % 2, 0, platform::errors::InvalidArgument( - "'test_attr' must be even!")); + PADDLE_ENFORCE_EQ( + i % 2, 0, + platform::errors::InvalidArgument("'test_attr' must be even!")); }; AddAttr("test_attr", "a simple test attribute") .AddCustomChecker(my_checker); diff --git a/paddle/fluid/framework/op_version_proto.h b/paddle/fluid/framework/op_version_proto.h index 9b70bb93bb9..022531d53de 100644 --- a/paddle/fluid/framework/op_version_proto.h +++ b/paddle/fluid/framework/op_version_proto.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include #include "paddle/fluid/framework/framework.pb.h" diff --git a/paddle/fluid/framework/op_version_registry_test.cc b/paddle/fluid/framework/op_version_registry_test.cc index e66d0dc5a1f..8f83631c272 100644 --- a/paddle/fluid/framework/op_version_registry_test.cc +++ b/paddle/fluid/framework/op_version_registry_test.cc @@ -12,10 +12,10 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include - #include "paddle/fluid/framework/op_version_registry.h" +#include + namespace paddle { namespace framework { namespace compatible { diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 69f14d7903c..7395a8e0da8 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -12,6 +12,7 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include + #include #include @@ -1205,10 +1206,11 @@ bool OperatorWithKernel::SupportsMKLDNN( const proto::VarType::Type data_type) const { auto op_kernel_iter = OperatorWithKernel::AllOpKernels().find(type_); if (op_kernel_iter == OperatorWithKernel::AllOpKernels().end()) { - VLOG(6) << "Warning: " << type_ << " don't find its MKLDNN Kernel in Fluid " - "Registered Kernels. And We don't " - "search its kernels in phi lib, " - "SupportsMKLDNN() return false."; + VLOG(6) << "Warning: " << type_ + << " don't find its MKLDNN Kernel in Fluid " + "Registered Kernels. And We don't " + "search its kernels in phi lib, " + "SupportsMKLDNN() return false."; return false; } auto& op_kernels = op_kernel_iter->second; @@ -1440,7 +1442,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, #if defined(PADDLE_WITH_XPU_KP) && (!is_xpu_unsupport || use_phi_xpu_kp) #endif - ) { + ) { run_phi_kernel_ = true; } else { auto& all_op_kernels = AllOpKernels(); @@ -1464,7 +1466,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope, #if defined(PADDLE_WITH_XPU_KP) || (is_xpu_unsupport && !is_xpu_kp_support) #endif - ) { + ) { auto pt_cpu_kernel_key = FallBackToCpu(*kernel_type_.get(), pt_kernel_key, *this); pt_kernel_.reset( @@ -2238,8 +2240,9 @@ phi::KernelSignature OperatorWithKernel::GetExpectedPhiKernelArgs( if (arg_map_fn) { arg_map_fn_.reset(new phi::ArgumentMappingFn(*arg_map_fn)); } else { - auto func = [this]( - const phi::ArgumentMappingContext& ctx) -> phi::KernelSignature { + auto func = + [this]( + const phi::ArgumentMappingContext& ctx) -> phi::KernelSignature { return phi::DefaultKernelSignatureMap::Instance().Get(type_); }; arg_map_fn_.reset(new phi::ArgumentMappingFn(func)); diff --git a/paddle/fluid/framework/operator.h b/paddle/fluid/framework/operator.h index 2efa2e4bd8a..dc13287b5aa 100644 --- a/paddle/fluid/framework/operator.h +++ b/paddle/fluid/framework/operator.h @@ -27,6 +27,7 @@ limitations under the License. */ #include "glog/logging.h" // For VLOG #include "paddle/fluid/framework/attribute.h" #include "paddle/fluid/framework/block_desc.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_kernel_type.h" @@ -38,12 +39,10 @@ limitations under the License. */ #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/variant.h" -#include "paddle/utils/flat_hash_map.h" - -#include "paddle/fluid/framework/convert_utils.h" #include "paddle/phi/core/compat/arg_map_context.h" #include "paddle/phi/core/compat/op_utils.h" #include "paddle/phi/core/kernel_factory.h" +#include "paddle/utils/flat_hash_map.h" namespace paddle { namespace framework { @@ -610,12 +609,12 @@ class OperatorWithKernel : public OperatorBase { /* member functions for adapting to phi lib */ /** In the Tensor calculation library, the new Kernel adopts a clearer and - * more streamlined design. The arguments of the Kernel and the input and - * output arguments registered in the original OpMaker do not match in some - * cases, so we use map to record the arguments required by the kernel. - * When selecting Kernel during Op execution, select the arguments of the - * original Op according to the GetExpectedPhiKernelArgs returned arguments. - */ + * more streamlined design. The arguments of the Kernel and the input and + * output arguments registered in the original OpMaker do not match in some + * cases, so we use map to record the arguments required by the kernel. + * When selecting Kernel during Op execution, select the arguments of the + * original Op according to the GetExpectedPhiKernelArgs returned arguments. + */ phi::KernelSignature GetExpectedPhiKernelArgs( const ExecutionContext& ctx) const; diff --git a/paddle/fluid/framework/operator_exception_test.cc b/paddle/fluid/framework/operator_exception_test.cc index 7b513996fb4..0f635d170de 100644 --- a/paddle/fluid/framework/operator_exception_test.cc +++ b/paddle/fluid/framework/operator_exception_test.cc @@ -12,12 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/operator.h" #include #include #include #include + #include "gtest/gtest.h" +#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/framework/operator_kernel_configs.h b/paddle/fluid/framework/operator_kernel_configs.h index ab812a30981..57d377f1389 100644 --- a/paddle/fluid/framework/operator_kernel_configs.h +++ b/paddle/fluid/framework/operator_kernel_configs.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include "glog/logging.h" namespace paddle { diff --git a/paddle/fluid/framework/operator_test.cc b/paddle/fluid/framework/operator_test.cc index 24e09bcd463..3dda60de12a 100644 --- a/paddle/fluid/framework/operator_test.cc +++ b/paddle/fluid/framework/operator_test.cc @@ -11,11 +11,11 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "gtest/gtest.h" +#include "paddle/fluid/framework/operator.h" +#include "gtest/gtest.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/init.h" diff --git a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc index 295510cdb1c..a2bdd2bc4c1 100644 --- a/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc +++ b/paddle/fluid/framework/paddle2cinn/build_cinn_pass.cc @@ -334,7 +334,7 @@ std::unique_ptr CreateNewSubGraph(const GraphNodeSet& cluster, } GraphNodeSet need_feed_vars; - std::unordered_set param_vars, output_vars; + std::unordered_set param_vars, output_vars; // the subgraph is independently, so here we only need link // to the node in new subgraph, and discard the link to // out-graph. @@ -386,18 +386,18 @@ std::unique_ptr CreateNewSubGraph(const GraphNodeSet& cluster, subgraph.get()); // Save lists of input variables, internal variables and output variables // of the cluster as attributes of the subgraph for convenience. - auto collect_names_fn = []( - const GraphNodeSet& nodes, - const std::unordered_set& ignore_names) { - auto result = std::make_unique>(); - for (auto* node : nodes) { - if (!node->Var() || ignore_names.count(node->Name())) { - continue; - } - result->emplace_back(node->Name()); - } - return result; - }; + auto collect_names_fn = + [](const GraphNodeSet& nodes, + const std::unordered_set& ignore_names) { + auto result = std::make_unique>(); + for (auto* node : nodes) { + if (!node->Var() || ignore_names.count(node->Name())) { + continue; + } + result->emplace_back(node->Name()); + } + return result; + }; subgraph->Set>( kInternalVars, collect_names_fn(cluster_internals, {}).release()); subgraph->Set>( diff --git a/paddle/fluid/framework/paddle2cinn/build_cinn_pass_test.cc b/paddle/fluid/framework/paddle2cinn/build_cinn_pass_test.cc index d593aadc02c..e9c517af2c3 100644 --- a/paddle/fluid/framework/paddle2cinn/build_cinn_pass_test.cc +++ b/paddle/fluid/framework/paddle2cinn/build_cinn_pass_test.cc @@ -19,7 +19,6 @@ limitations under the License. */ #include #include "gtest/gtest.h" - #include "paddle/fluid/framework/details/build_strategy.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/node.h" diff --git a/paddle/fluid/framework/paddle2cinn/cinn_cache_key.cc b/paddle/fluid/framework/paddle2cinn/cinn_cache_key.cc index 9b5ce876c25..585f9edce86 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_cache_key.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_cache_key.cc @@ -100,7 +100,7 @@ size_t CinnCacheKeyByStructure::HashGraph(const ir::Graph& graph) { // graph.Nodes() return unordered_set, here using set to avoid the same graph // may return different result - std::set node_set(compare), + std::set node_set(compare), output_set(compare); node_set.insert(graph.Nodes().begin(), graph.Nodes().end()); diff --git a/paddle/fluid/framework/paddle2cinn/cinn_cache_key_test.cc b/paddle/fluid/framework/paddle2cinn/cinn_cache_key_test.cc index 1ebeecbff95..24e65599018 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_cache_key_test.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_cache_key_test.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +// clang-format off #include #include @@ -21,6 +22,7 @@ #include "paddle/fluid/framework/paddle2cinn/cinn_cache_key.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/phi/core/ddim.h" +// clang-format on namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc index 12f60354206..2a6a51d73f2 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc @@ -51,14 +51,14 @@ namespace paddle { namespace framework { namespace paddle2cinn { -using ir::Graph; -using ir::Node; -using inference::analysis::Dot; using ::cinn::auto_schedule::AutoTuner; using ::cinn::common::Target; using ::cinn::frontend::Optimize; using ::cinn::hlir::framework::BuildScope; using ::cinn::hlir::framework::GraphCompiler; +using inference::analysis::Dot; +using ir::Graph; +using ir::Node; CinnCompiler* CinnCompiler::GetInstance() { static CinnCompiler* instance = new CinnCompiler(); diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.h b/paddle/fluid/framework/paddle2cinn/cinn_compiler.h index a38e8b4c5f6..91c55976764 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.h +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.h @@ -20,6 +20,7 @@ #include #include #include + #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/paddle2cinn/cinn_cache_key.h" diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc index 255e318c9fa..5a84a97ee8d 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler_test.cc @@ -44,8 +44,8 @@ DECLARE_string(deny_cinn_ops); namespace paddle { namespace framework { namespace paddle2cinn { -using ir::Graph; using ::cinn::common::Target; +using ir::Graph; namespace { template > diff --git a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc index 31bf8d9b726..4e362057c91 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +// clang-format off #include "paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.h" #include @@ -30,6 +31,7 @@ limitations under the License. */ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/errors.h" +// clang-format on namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.h b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.h index 526eb65a56e..4155147da4b 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.h +++ b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +// clang-format off #include #include #include @@ -26,6 +27,7 @@ limitations under the License. */ #include "cinn/frontend/net_builder.h" #include "cinn/frontend/op_mapper_registry.h" +// clang-format on namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc index c0e1ca8f0d1..8a6f92a6f45 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization_test.cc @@ -12,18 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +// clang-format off #include "gtest/gtest.h" #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/paddle2cinn/cinn_graph_symbolization.h" +// clang-format on namespace paddle { namespace framework { namespace paddle2cinn { +using ::cinn::frontend::NetBuilder; using ir::Graph; using ir::Node; -using ::cinn::frontend::NetBuilder; using CinnTensor = ::cinn::hlir::framework::Tensor; using OpMapperContext = CinnGraphSymbolization::OpMapperContext; using CinnOpDesc = CinnGraphSymbolization::CinnOpDesc; diff --git a/paddle/fluid/framework/paddle2cinn/transform_desc.h b/paddle/fluid/framework/paddle2cinn/transform_desc.h index 76a4f812730..6f0931b6d03 100644 --- a/paddle/fluid/framework/paddle2cinn/transform_desc.h +++ b/paddle/fluid/framework/paddle2cinn/transform_desc.h @@ -14,6 +14,8 @@ #pragma once +// The headers cant be sorted by clang-format or compilint error occurs. +// clang-format off #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/program_desc.h" @@ -24,6 +26,7 @@ #include "cinn/frontend/paddle/cpp/op_desc.h" #include "cinn/frontend/paddle/cpp/program_desc.h" #include "cinn/frontend/paddle/cpp/var_desc.h" +// clang-format on namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/paddle2cinn/transform_desc_test.cc b/paddle/fluid/framework/paddle2cinn/transform_desc_test.cc index ba324295cad..ae9f51c3f67 100644 --- a/paddle/fluid/framework/paddle2cinn/transform_desc_test.cc +++ b/paddle/fluid/framework/paddle2cinn/transform_desc_test.cc @@ -12,10 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +// clang-format off #include #include "gtest/gtest.h" #include "paddle/fluid/framework/paddle2cinn/transform_desc.h" +// clang-format on namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/paddle2cinn/transform_type.cc b/paddle/fluid/framework/paddle2cinn/transform_type.cc index 0e348084d25..60502edd99a 100644 --- a/paddle/fluid/framework/paddle2cinn/transform_type.cc +++ b/paddle/fluid/framework/paddle2cinn/transform_type.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/paddle2cinn/transform_type.h" + #include "cinn/common/type.h" #include "cinn/runtime/cinn_runtime.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/framework/paddle2cinn/transform_type.h b/paddle/fluid/framework/paddle2cinn/transform_type.h index e44960abbd9..f0b08ba1e00 100644 --- a/paddle/fluid/framework/paddle2cinn/transform_type.h +++ b/paddle/fluid/framework/paddle2cinn/transform_type.h @@ -19,7 +19,7 @@ struct cinn_type_t; namespace cinn::common { struct Type; -} // ::cinn::common +} // namespace cinn::common namespace paddle::framework::paddle2cinn { diff --git a/paddle/fluid/framework/paddle2cinn/transform_type_test.cc b/paddle/fluid/framework/paddle2cinn/transform_type_test.cc index 6c5d360d34c..4456642b3e9 100644 --- a/paddle/fluid/framework/paddle2cinn/transform_type_test.cc +++ b/paddle/fluid/framework/paddle2cinn/transform_type_test.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/paddle2cinn/transform_type.h" + #include "cinn/common/type.h" #include "cinn/runtime/cinn_runtime.h" #include "gtest/gtest.h" diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index b088a535a12..00d48098a13 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -666,8 +666,9 @@ ParallelExecutor::ParallelExecutor(const std::vector &places, ir::Graph *graph) : member_(new ParallelExecutorPrivate(places, scope)) { PADDLE_ENFORCE_EQ(places.size() > 0 && !platform::is_npu_place(places[0]), - true, platform::errors::Unavailable( - "NPU is not supported in ParallelExecutor.")); + true, + platform::errors::Unavailable( + "NPU is not supported in ParallelExecutor.")); InitP2P(places); ir::InitReaderQueueDeviceCount(graph, *(member_->global_scope_), member_->places_.size()); diff --git a/paddle/fluid/framework/parallel_executor.h b/paddle/fluid/framework/parallel_executor.h index 18d0ee78ffb..3dc9fbcfbf3 100644 --- a/paddle/fluid/framework/parallel_executor.h +++ b/paddle/fluid/framework/parallel_executor.h @@ -42,9 +42,9 @@ namespace framework { class ParallelExecutorPrivate; -using details::VariableInfo; using details::BuildStrategy; using details::ExecutionStrategy; +using details::VariableInfo; namespace p = paddle::platform; using DeviceType = paddle::platform::DeviceType; diff --git a/paddle/fluid/framework/phi_utils.cc b/paddle/fluid/framework/phi_utils.cc index 3eda00006f9..19f7b024b27 100644 --- a/paddle/fluid/framework/phi_utils.cc +++ b/paddle/fluid/framework/phi_utils.cc @@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/phi_utils.h" + #include #include "paddle/fluid/framework/convert_utils.h" -#include "paddle/fluid/framework/phi_utils.h" - #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/selected_rows_utils.h" diff --git a/paddle/fluid/framework/phi_utils.h b/paddle/fluid/framework/phi_utils.h index 785ede5c601..535672f2e12 100644 --- a/paddle/fluid/framework/phi_utils.h +++ b/paddle/fluid/framework/phi_utils.h @@ -21,11 +21,10 @@ limitations under the License. */ #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/framework/op_kernel_type.h" +#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/place.h" - -#include "paddle/fluid/framework/operator.h" #include "paddle/phi/api/lib/utils/tensor_utils.h" #include "paddle/phi/common/backend.h" #include "paddle/phi/core/compat/arg_map_context.h" diff --git a/paddle/fluid/framework/phi_utils_test.cc b/paddle/fluid/framework/phi_utils_test.cc index cbcdf24c9f3..02eb23f8ac1 100644 --- a/paddle/fluid/framework/phi_utils_test.cc +++ b/paddle/fluid/framework/phi_utils_test.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/phi_utils.h" + #include "gtest/gtest.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/selected_rows_utils.h" diff --git a/paddle/fluid/framework/program_desc.cc b/paddle/fluid/framework/program_desc.cc index 4a31adcca65..88738255af7 100644 --- a/paddle/fluid/framework/program_desc.cc +++ b/paddle/fluid/framework/program_desc.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/program_desc.h" + #include "paddle/fluid/framework/feed_fetch_type.h" #include "paddle/fluid/framework/version.h" diff --git a/paddle/fluid/framework/program_desc.h b/paddle/fluid/framework/program_desc.h index 4ceb0c5c824..7e1c12f4ac5 100644 --- a/paddle/fluid/framework/program_desc.h +++ b/paddle/fluid/framework/program_desc.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include #include #include diff --git a/paddle/fluid/framework/program_processing.cc b/paddle/fluid/framework/program_processing.cc index 3bcf6f8f385..95b28b79dcf 100644 --- a/paddle/fluid/framework/program_processing.cc +++ b/paddle/fluid/framework/program_processing.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/program_processing.h" + #include "paddle/fluid/framework/block_desc.h" namespace paddle { diff --git a/paddle/fluid/framework/prune.cc b/paddle/fluid/framework/prune.cc index 4c95f01ae56..fbeedcc311a 100644 --- a/paddle/fluid/framework/prune.cc +++ b/paddle/fluid/framework/prune.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_proto_maker.h" namespace paddle { diff --git a/paddle/fluid/framework/prune_test.cc b/paddle/fluid/framework/prune_test.cc index 64b30878150..5fbfda716b4 100644 --- a/paddle/fluid/framework/prune_test.cc +++ b/paddle/fluid/framework/prune_test.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/prune.h" #include + #include #include "paddle/fluid/framework/block_desc.h" diff --git a/paddle/fluid/framework/ps_gpu_trainer.cc b/paddle/fluid/framework/ps_gpu_trainer.cc index aec40a5a7eb..c86bfbc43bf 100644 --- a/paddle/fluid/framework/ps_gpu_trainer.cc +++ b/paddle/fluid/framework/ps_gpu_trainer.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include #include #include diff --git a/paddle/fluid/framework/pull_dense_worker.cc b/paddle/fluid/framework/pull_dense_worker.cc index a12079a135d..7a0fe65182d 100644 --- a/paddle/fluid/framework/pull_dense_worker.cc +++ b/paddle/fluid/framework/pull_dense_worker.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/device_worker.h" namespace phi { diff --git a/paddle/fluid/framework/reader.cc b/paddle/fluid/framework/reader.cc index b418339bf32..27940f726dc 100644 --- a/paddle/fluid/framework/reader.cc +++ b/paddle/fluid/framework/reader.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/reader.h" + #include namespace paddle { diff --git a/paddle/fluid/framework/save_load_util.cc b/paddle/fluid/framework/save_load_util.cc index 44488fca01c..284965fdfe9 100644 --- a/paddle/fluid/framework/save_load_util.cc +++ b/paddle/fluid/framework/save_load_util.cc @@ -342,8 +342,9 @@ bool LoadTensorFromDisk( uint32_t version; fin.read(reinterpret_cast(&version), sizeof(version)); CheckInStreamState(fin, sizeof(version)); - PADDLE_ENFORCE_EQ(version, 0U, platform::errors::InvalidArgument( - "Only version 0 tensor is supported.")); + PADDLE_ENFORCE_EQ(version, 0U, + platform::errors::InvalidArgument( + "Only version 0 tensor is supported.")); proto::VarType::TensorDesc desc; { // int32_t size diff --git a/paddle/fluid/framework/save_load_util_test.cc b/paddle/fluid/framework/save_load_util_test.cc index 10a34d7ce91..623f0f27bda 100644 --- a/paddle/fluid/framework/save_load_util_test.cc +++ b/paddle/fluid/framework/save_load_util_test.cc @@ -11,11 +11,12 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/save_load_util.h" + #include #include #include "gtest/gtest.h" -#include "paddle/fluid/framework/save_load_util.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/scope_guard.h b/paddle/fluid/framework/scope_guard.h index 83387842e94..9c741f7bfc5 100644 --- a/paddle/fluid/framework/scope_guard.h +++ b/paddle/fluid/framework/scope_guard.h @@ -16,6 +16,7 @@ #include #include + #include "paddle/fluid/platform/macros.h" namespace paddle { @@ -41,12 +42,12 @@ class ScopeGuard { #define _PADDLE_CONCAT_TOKEN(x, y) x##y #define PADDLE_CONCAT_TOKEN(x, y) _PADDLE_CONCAT_TOKEN(x, y) -#define DEFINE_PADDLE_SCOPE_GUARD(...) \ - auto PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__) = __VA_ARGS__; \ - ::paddle::framework::ScopeGuard::type> \ - PADDLE_CONCAT_TOKEN(__scope_guard, __LINE__)( \ - PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__)) +#define DEFINE_PADDLE_SCOPE_GUARD(...) \ + auto PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__) = __VA_ARGS__; \ + ::paddle::framework::ScopeGuard::type> \ + PADDLE_CONCAT_TOKEN(__scope_guard, __LINE__)( \ + PADDLE_CONCAT_TOKEN(__scope_guard_func, __LINE__)) } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/scope_guard_test.cc b/paddle/fluid/framework/scope_guard_test.cc index d7a7a6168a3..793b3a1652a 100644 --- a/paddle/fluid/framework/scope_guard_test.cc +++ b/paddle/fluid/framework/scope_guard_test.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/scope_guard.h" + #include "gtest/gtest.h" namespace paddle { diff --git a/paddle/fluid/framework/section_worker.cc b/paddle/fluid/framework/section_worker.cc index 1f821720d64..7bb8550926d 100644 --- a/paddle/fluid/framework/section_worker.cc +++ b/paddle/fluid/framework/section_worker.cc @@ -12,6 +12,7 @@ limitations under the License. */ #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \ defined(PADDLE_WITH_ASCEND_CL) #include + #include "paddle/fluid/framework/device_worker.h" #include "paddle/fluid/framework/executor_gc_helper.h" #include "paddle/fluid/platform/device_context.h" diff --git a/paddle/fluid/framework/selected_rows_utils.h b/paddle/fluid/framework/selected_rows_utils.h index 8606295c451..9ecff5719fb 100644 --- a/paddle/fluid/framework/selected_rows_utils.h +++ b/paddle/fluid/framework/selected_rows_utils.h @@ -21,10 +21,9 @@ limitations under the License. */ #include #include -#include "paddle/phi/core/selected_rows.h" - #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/device_context.h" +#include "paddle/phi/core/selected_rows.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/selected_rows_utils_test.cc b/paddle/fluid/framework/selected_rows_utils_test.cc index f23510c721e..db2c6c1f991 100644 --- a/paddle/fluid/framework/selected_rows_utils_test.cc +++ b/paddle/fluid/framework/selected_rows_utils_test.cc @@ -9,11 +9,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/selected_rows_utils.h" + #include + #include // NOLINT #include "gtest/gtest.h" -#include "paddle/fluid/framework/selected_rows_utils.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/string_array.cc b/paddle/fluid/framework/string_array.cc old mode 100755 new mode 100644 index 3071e6bf4cf..f6aee9b82f2 --- a/paddle/fluid/framework/string_array.cc +++ b/paddle/fluid/framework/string_array.cc @@ -12,12 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/string_array.h" + #include #include #include "glog/logging.h" -#include "paddle/fluid/framework/string_array.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/tensor.h b/paddle/fluid/framework/tensor.h index 57eddf782f0..7ad9839d79d 100644 --- a/paddle/fluid/framework/tensor.h +++ b/paddle/fluid/framework/tensor.h @@ -23,15 +23,14 @@ limitations under the License. */ #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/framework.pb.h" +#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" #include "paddle/phi/core/ddim.h" -#include "paddle/phi/core/stream.h" - -#include "paddle/fluid/framework/mixed_vector.h" #include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/stream.h" namespace paddle { diff --git a/paddle/fluid/framework/tensor_impl.h b/paddle/fluid/framework/tensor_impl.h index f5e230773fb..946b119ecb3 100644 --- a/paddle/fluid/framework/tensor_impl.h +++ b/paddle/fluid/framework/tensor_impl.h @@ -24,12 +24,13 @@ namespace framework { inline Tensor ReshapeToMatrix(const Tensor& src, int num_col_dims) { int rank = src.dims().size(); PADDLE_ENFORCE_GE( - rank, 2, platform::errors::InvalidArgument( - "'ReshapeToMatrix()' is only used for flatten high rank " - "tensors to matrixs. The dimensions of Tensor must be " - "greater or equal than 2. " - "But received dimensions of Tensor is %d", - rank)); + rank, 2, + platform::errors::InvalidArgument( + "'ReshapeToMatrix()' is only used for flatten high rank " + "tensors to matrixs. The dimensions of Tensor must be " + "greater or equal than 2. " + "But received dimensions of Tensor is %d", + rank)); if (rank == 2) { return src; } diff --git a/paddle/fluid/framework/tensor_test.cc b/paddle/fluid/framework/tensor_test.cc index 3e104807535..05dd41eb6ff 100644 --- a/paddle/fluid/framework/tensor_test.cc +++ b/paddle/fluid/framework/tensor_test.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/framework/tensor.h" #include + #include namespace framework = paddle::framework; diff --git a/paddle/fluid/framework/tensor_util.cc b/paddle/fluid/framework/tensor_util.cc index 1159280762f..1e25acb2c4e 100644 --- a/paddle/fluid/framework/tensor_util.cc +++ b/paddle/fluid/framework/tensor_util.cc @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/tensor_util.h" + #include #include #include @@ -21,10 +23,8 @@ limitations under the License. */ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/profiler/event_tracing.h" - #include "paddle/phi/core/dense_tensor.h" #ifdef PADDLE_WITH_MKLDNN @@ -1249,10 +1249,12 @@ void TensorFromStream(std::istream& is, Tensor* tensor, // proto buffer int32_t size = -1; is.read(reinterpret_cast(&size), sizeof(size)); - PADDLE_ENFORCE_EQ(is.good(), true, platform::errors::Unavailable( - "Cannot read tensor desc size")); - PADDLE_ENFORCE_GE(size, 0, platform::errors::InvalidArgument( - "Tensor desc size should >= 0")); + PADDLE_ENFORCE_EQ( + is.good(), true, + platform::errors::Unavailable("Cannot read tensor desc size")); + PADDLE_ENFORCE_GE( + size, 0, + platform::errors::InvalidArgument("Tensor desc size should >= 0")); std::unique_ptr buf(new char[size]); is.read(reinterpret_cast(buf.get()), size); PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/framework/tensor_util_test.cc b/paddle/fluid/framework/tensor_util_test.cc index 5e6e1227b1a..2511fdf27ce 100644 --- a/paddle/fluid/framework/tensor_util_test.cc +++ b/paddle/fluid/framework/tensor_util_test.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/framework/tensor_util.h" #include + #include namespace paddle { @@ -254,64 +255,61 @@ TEST(TensorToVector, Tensor) { #endif } -TEST(TensorToVector, Tensor_bool) { - { - paddle::framework::Tensor src; - bool* src_ptr = - src.mutable_data({3, 3}, paddle::platform::CPUPlace()); - for (int i = 0; i < 3 * 3; ++i) { - src_ptr[i] = static_cast(i % 2); - } +TEST(TensorToVector, Tensor_bool){{paddle::framework::Tensor src; +bool* src_ptr = src.mutable_data({3, 3}, paddle::platform::CPUPlace()); +for (int i = 0; i < 3 * 3; ++i) { + src_ptr[i] = static_cast(i % 2); +} - paddle::platform::CPUPlace place; - std::vector dst; - paddle::framework::TensorToVector(src, &dst); +paddle::platform::CPUPlace place; +std::vector dst; +paddle::framework::TensorToVector(src, &dst); - for (int i = 0; i < 3 * 3; ++i) { - EXPECT_EQ(src_ptr[i], dst[i]); - } - } +for (int i = 0; i < 3 * 3; ++i) { + EXPECT_EQ(src_ptr[i], dst[i]); +} +} // namespace framework #ifdef PADDLE_WITH_CUDA - { - std::vector src_vec = { - false, true, false, true, false, true, false, true, false, - }; - paddle::framework::Tensor gpu_tensor; - paddle::platform::CUDAPlace place; - paddle::platform::CUDADeviceContext gpu_ctx(place); - gpu_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() - .GetAllocator(place, gpu_ctx.stream()) - .get()); - gpu_ctx.PartialInitWithAllocator(); - paddle::framework::TensorFromVector(src_vec, gpu_ctx, &gpu_tensor); - - std::vector dst; - paddle::framework::TensorToVector(gpu_tensor, gpu_ctx, &dst); - - for (int i = 0; i < 3 * 3; ++i) { - EXPECT_EQ(src_vec[i], dst[i]); - } +{ + std::vector src_vec = { + false, true, false, true, false, true, false, true, false, + }; + paddle::framework::Tensor gpu_tensor; + paddle::platform::CUDAPlace place; + paddle::platform::CUDADeviceContext gpu_ctx(place); + gpu_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() + .GetAllocator(place, gpu_ctx.stream()) + .get()); + gpu_ctx.PartialInitWithAllocator(); + paddle::framework::TensorFromVector(src_vec, gpu_ctx, &gpu_tensor); + + std::vector dst; + paddle::framework::TensorToVector(gpu_tensor, gpu_ctx, &dst); + + for (int i = 0; i < 3 * 3; ++i) { + EXPECT_EQ(src_vec[i], dst[i]); } +} #endif #ifdef PADDLE_WITH_ASCEND_CL - { - std::vector src_vec = { - false, true, false, true, false, true, false, true, false, - }; - paddle::framework::Tensor npu_tensor; - paddle::platform::NPUPlace place(0); - paddle::platform::NPUDeviceContext npu_ctx(place); - paddle::framework::TensorFromVector(src_vec, npu_ctx, &npu_tensor); - - std::vector dst; - paddle::framework::TensorToVector(npu_tensor, npu_ctx, &dst); - - for (int i = 0; i < 3 * 3; ++i) { - EXPECT_EQ(src_vec[i], dst[i]); - } +{ + std::vector src_vec = { + false, true, false, true, false, true, false, true, false, + }; + paddle::framework::Tensor npu_tensor; + paddle::platform::NPUPlace place(0); + paddle::platform::NPUDeviceContext npu_ctx(place); + paddle::framework::TensorFromVector(src_vec, npu_ctx, &npu_tensor); + + std::vector dst; + paddle::framework::TensorToVector(npu_tensor, npu_ctx, &dst); + + for (int i = 0; i < 3 * 3; ++i) { + EXPECT_EQ(src_vec[i], dst[i]); } -#endif } +#endif +} // namespace paddle TEST(TensorFromDLPack, Tensor) { { diff --git a/paddle/fluid/framework/threadpool.cc b/paddle/fluid/framework/threadpool.cc index 33533b1d10f..b704ac4329d 100644 --- a/paddle/fluid/framework/threadpool.cc +++ b/paddle/fluid/framework/threadpool.cc @@ -43,8 +43,9 @@ void ThreadPool::Init() { num_threads = FLAGS_dist_threadpool_size; VLOG(1) << "set dist_threadpool_size to " << num_threads; } - PADDLE_ENFORCE_GT(num_threads, 0, platform::errors::InvalidArgument( - "The number of threads is 0.")); + PADDLE_ENFORCE_GT( + num_threads, 0, + platform::errors::InvalidArgument("The number of threads is 0.")); threadpool_.reset(new ThreadPool(num_threads)); } } diff --git a/paddle/fluid/framework/threadpool_test.cc b/paddle/fluid/framework/threadpool_test.cc index 1278a0f0643..0b6e12967fe 100644 --- a/paddle/fluid/framework/threadpool_test.cc +++ b/paddle/fluid/framework/threadpool_test.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/threadpool.h" + #include + #include namespace framework = paddle::framework; diff --git a/paddle/fluid/framework/trainer.cc b/paddle/fluid/framework/trainer.cc index b033f9a99d6..dc48a8f8d8f 100644 --- a/paddle/fluid/framework/trainer.cc +++ b/paddle/fluid/framework/trainer.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/trainer.h" + #include "io/fs.h" namespace paddle { diff --git a/paddle/fluid/framework/trainer_factory.cc b/paddle/fluid/framework/trainer_factory.cc index 1f1122d32f5..48ea9143d62 100644 --- a/paddle/fluid/framework/trainer_factory.cc +++ b/paddle/fluid/framework/trainer_factory.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/trainer_factory.h" #include + #include #include diff --git a/paddle/fluid/framework/trainer_test.cc b/paddle/fluid/framework/trainer_test.cc index f689679d486..1f4a162f906 100644 --- a/paddle/fluid/framework/trainer_test.cc +++ b/paddle/fluid/framework/trainer_test.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/trainer.h" + #include namespace paddle { @@ -23,5 +24,5 @@ TEST() { // create dataset // train for a while } -} -} +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/type_defs.h b/paddle/fluid/framework/type_defs.h index 0937d96ad4c..5feedb2c3d6 100644 --- a/paddle/fluid/framework/type_defs.h +++ b/paddle/fluid/framework/type_defs.h @@ -21,6 +21,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/platform/variant.h" #include "paddle/utils/small_vector.h" diff --git a/paddle/fluid/framework/unused_var_check.cc b/paddle/fluid/framework/unused_var_check.cc index 2f03dc41ce0..43c44ff525f 100644 --- a/paddle/fluid/framework/unused_var_check.cc +++ b/paddle/fluid/framework/unused_var_check.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/framework/unused_var_check.h" #include + #include #include "gflags/gflags.h" diff --git a/paddle/fluid/framework/unused_var_check.h b/paddle/fluid/framework/unused_var_check.h index 95f6917fbcd..cc4977e439c 100644 --- a/paddle/fluid/framework/unused_var_check.h +++ b/paddle/fluid/framework/unused_var_check.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include #include diff --git a/paddle/fluid/framework/var_desc.cc b/paddle/fluid/framework/var_desc.cc index 0a24efd003b..3a3edc9b4c6 100644 --- a/paddle/fluid/framework/var_desc.cc +++ b/paddle/fluid/framework/var_desc.cc @@ -318,18 +318,20 @@ void VarDesc::SetAttr(const std::string &name, const Attribute &v) { bool valid = attr_type == proto::AttrType::INT || attr_type == proto::AttrType::STRING || attr_type == proto::AttrType::INTS; - PADDLE_ENFORCE_EQ(valid, true, platform::errors::InvalidArgument( - "The value for attr (%s) must be " - "one of list or int or string.", - name)); + PADDLE_ENFORCE_EQ( + valid, true, + platform::errors::InvalidArgument("The value for attr (%s) must be " + "one of list or int or string.", + name)); this->attrs_[name] = v; } Attribute VarDesc::GetAttr(const std::string &name) const { auto it = attrs_.find(name); - PADDLE_ENFORCE_NE(it, attrs_.end(), platform::errors::NotFound( - "Attribute %s is not found.", name)); + PADDLE_ENFORCE_NE( + it, attrs_.end(), + platform::errors::NotFound("Attribute %s is not found.", name)); return it->second; } diff --git a/paddle/fluid/framework/var_type_inference_test.cc b/paddle/fluid/framework/var_type_inference_test.cc index 5483ef01c08..ce489a57a01 100644 --- a/paddle/fluid/framework/var_type_inference_test.cc +++ b/paddle/fluid/framework/var_type_inference_test.cc @@ -245,9 +245,12 @@ TEST(InferVarType, multiple_api) { ASSERT_ANY_THROW(infer.SetDataTypes(&ctx, "test2_a_out", {})); ASSERT_EQ(0u, infer.GetShape(&ctx, "test2_a_out").size()); - infer.SetShape(&ctx, "test2_a_out", { - 1, 3, 3, - }); + infer.SetShape(&ctx, "test2_a_out", + { + 1, + 3, + 3, + }); ASSERT_EQ(3u, infer.GetShape(&ctx, "test2_a_out").size()); ASSERT_EQ(0, infer.GetLoDLevel(&ctx, "test2_a_out")); diff --git a/paddle/fluid/framework/var_type_traits.cc b/paddle/fluid/framework/var_type_traits.cc index ec664b4513f..345928666bd 100644 --- a/paddle/fluid/framework/var_type_traits.cc +++ b/paddle/fluid/framework/var_type_traits.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/var_type_traits.h" + #include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/scope.h" @@ -25,6 +26,7 @@ #include "paddle/fluid/platform/device/gpu/nccl_helper.h" #endif #include + #include "paddle/fluid/operators/conv_cudnn_op_cache.h" #include "paddle/fluid/operators/cudnn_rnn_cache.h" #endif diff --git a/paddle/fluid/framework/var_type_traits_test.cc b/paddle/fluid/framework/var_type_traits_test.cc index 00ae5154f83..4a81f66948d 100644 --- a/paddle/fluid/framework/var_type_traits_test.cc +++ b/paddle/fluid/framework/var_type_traits_test.cc @@ -12,13 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/var_type_traits.h" + #include #include "paddle/fluid/framework/lod_rank_table.h" #include "paddle/fluid/framework/reader.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/selected_rows_utils.h" -#include "paddle/fluid/framework/var_type_traits.h" #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" #ifdef PADDLE_WITH_CUDA #if defined(PADDLE_WITH_NCCL) diff --git a/paddle/fluid/framework/version.cc b/paddle/fluid/framework/version.cc index 92042e47259..c01bef79cdc 100644 --- a/paddle/fluid/framework/version.cc +++ b/paddle/fluid/framework/version.cc @@ -24,7 +24,7 @@ bool IsProgramVersionSupported(int64_t version) { * new version. The compatibility judgment cannot be made only * by the version number. Please do not use this interface, * it may be discarded because backward compatibility. - */ + */ return true; } @@ -33,7 +33,7 @@ bool IsTensorVersionSupported(uint32_t version) { * new version. The compatibility judgment cannot be made only * by the version number. Please do not use this interface, * it may be discarded because backward compatibility. - */ + */ return true; } diff --git a/paddle/fluid/framework/version_test.cc b/paddle/fluid/framework/version_test.cc index ec5a340ee6e..7c52209981f 100644 --- a/paddle/fluid/framework/version_test.cc +++ b/paddle/fluid/framework/version_test.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/version.h" + #include "gtest/gtest.h" namespace paddle { diff --git a/paddle/fluid/imperative/all_reduce.cc b/paddle/fluid/imperative/all_reduce.cc index 436e22f00c3..f6484d5cdda 100644 --- a/paddle/fluid/imperative/all_reduce.cc +++ b/paddle/fluid/imperative/all_reduce.cc @@ -15,6 +15,7 @@ #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #include "paddle/fluid/imperative/all_reduce.h" + #include "paddle/fluid/framework/convert_utils.h" #ifdef PADDLE_WITH_NCCL diff --git a/paddle/fluid/imperative/amp_auto_cast.cc b/paddle/fluid/imperative/amp_auto_cast.cc index 3f6863d642c..ff6e297ba80 100644 --- a/paddle/fluid/imperative/amp_auto_cast.cc +++ b/paddle/fluid/imperative/amp_auto_cast.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/imperative/amp_auto_cast.h" + #include #include + #include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/imperative/type_defs.h" @@ -302,9 +304,8 @@ static inline framework::proto::VarType::Type GetPromoteType( // dtype of input(X) if (op_type == "moving_average_abs_max_scale") { for (const auto& pair : ins) { - if (pair.first == "X" && - GetDataType(pair.second.front()) == - framework::proto::VarType::FP16) { + if (pair.first == "X" && GetDataType(pair.second.front()) == + framework::proto::VarType::FP16) { dst_type = framework::proto::VarType::FP16; } } diff --git a/paddle/fluid/imperative/basic_engine.h b/paddle/fluid/imperative/basic_engine.h index 49761a8df0b..fcc30b2590a 100644 --- a/paddle/fluid/imperative/basic_engine.h +++ b/paddle/fluid/imperative/basic_engine.h @@ -19,6 +19,7 @@ #include #include #include + #include "paddle/fluid/imperative/engine.h" #include "paddle/fluid/imperative/gradient_accumulator.h" diff --git a/paddle/fluid/imperative/bkcl_context.cc b/paddle/fluid/imperative/bkcl_context.cc index 11abbfe7cf6..9990fde95ce 100644 --- a/paddle/fluid/imperative/bkcl_context.cc +++ b/paddle/fluid/imperative/bkcl_context.cc @@ -14,13 +14,14 @@ #if defined(PADDLE_WITH_XPU_BKCL) +#include "paddle/fluid/imperative/bkcl_context.h" + #include #include #include #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/variable.h" -#include "paddle/fluid/imperative/bkcl_context.h" #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/device/xpu/bkcl_helper.h" #include "paddle/fluid/platform/device_context.h" @@ -46,10 +47,11 @@ static void AllReduce(const framework::Tensor &src, framework::Tensor *dst, auto bkcl_dtype = platform::ToBKCLDataType(framework::TransToProtoVarType(src.dtype())); - PADDLE_ENFORCE_EQ(bkcl_all_reduce(comm->comm(), src_ptr, dst_ptr, src.numel(), - bkcl_dtype, BKCL_ADD, stream), - BKCL_SUCCESS, platform::errors::PreconditionNotMet( - "BKCL all reduce failed")); + PADDLE_ENFORCE_EQ( + bkcl_all_reduce(comm->comm(), src_ptr, dst_ptr, src.numel(), bkcl_dtype, + BKCL_ADD, stream), + BKCL_SUCCESS, + platform::errors::PreconditionNotMet("BKCL all reduce failed")); } /* Baidu Kunlun Communication Library(BKCL) is designed for multi Baidu Kunlun diff --git a/paddle/fluid/imperative/cncl_context.cc b/paddle/fluid/imperative/cncl_context.cc index 779b748c2d2..19f22e74029 100644 --- a/paddle/fluid/imperative/cncl_context.cc +++ b/paddle/fluid/imperative/cncl_context.cc @@ -18,14 +18,12 @@ limitations under the License. */ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/variable.h" - -#include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/gen_comm_id_helper.h" -#include "paddle/fluid/platform/place.h" - #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/device/mlu/cncl_helper.h" #include "paddle/fluid/platform/device/mlu/mlu_info.h" +#include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/gen_comm_id_helper.h" +#include "paddle/fluid/platform/place.h" namespace paddle { namespace framework { @@ -184,8 +182,9 @@ paddle::platform::DeviceContext *CNCLParallelContext::GetDeviceContext( } void CNCLParallelContext::WaitCompute(int ring_id) { - PADDLE_ENFORCE_GE(ring_id, 0, platform::errors::OutOfRange( - "ring id must >= 0, but got %d", ring_id)); + PADDLE_ENFORCE_GE( + ring_id, 0, + platform::errors::OutOfRange("ring id must >= 0, but got %d", ring_id)); PADDLE_ENFORCE_LT(ring_id, compute_events_.size(), platform::errors::OutOfRange( "ring id must < compute events size," @@ -205,8 +204,9 @@ void CNCLParallelContext::WaitCompute(int ring_id) { } void CNCLParallelContext::WaitComm(int ring_id) { - PADDLE_ENFORCE_GE(ring_id, 0, platform::errors::OutOfRange( - "ring id must >= 0, but got %d", ring_id)); + PADDLE_ENFORCE_GE( + ring_id, 0, + platform::errors::OutOfRange("ring id must >= 0, but got %d", ring_id)); PADDLE_ENFORCE_LT(ring_id, comm_events_.size(), platform::errors::OutOfRange( "ring id must < comm events size," diff --git a/paddle/fluid/imperative/data_loader.cc b/paddle/fluid/imperative/data_loader.cc index c43149c9b56..66eed298106 100644 --- a/paddle/fluid/imperative/data_loader.cc +++ b/paddle/fluid/imperative/data_loader.cc @@ -19,6 +19,7 @@ #include #include #include + #include #include "glog/logging.h" diff --git a/paddle/fluid/imperative/data_loader.h b/paddle/fluid/imperative/data_loader.h index fdfa117eafe..e66a3b9edc3 100644 --- a/paddle/fluid/imperative/data_loader.h +++ b/paddle/fluid/imperative/data_loader.h @@ -17,6 +17,7 @@ #ifndef _WIN32 #include + #include #include diff --git a/paddle/fluid/imperative/execution_context.h b/paddle/fluid/imperative/execution_context.h index 124c31df733..fe426a76b32 100644 --- a/paddle/fluid/imperative/execution_context.h +++ b/paddle/fluid/imperative/execution_context.h @@ -16,6 +16,7 @@ #include #include + #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/framework/variable.h" diff --git a/paddle/fluid/imperative/flags.cc b/paddle/fluid/imperative/flags.cc index c2d668eccda..df424b32fca 100644 --- a/paddle/fluid/imperative/flags.cc +++ b/paddle/fluid/imperative/flags.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/imperative/flags.h" + #include "paddle/fluid/platform/flags.h" PADDLE_DEFINE_EXPORTED_uint64(dygraph_debug, 0, diff --git a/paddle/fluid/imperative/gloo_context.cc b/paddle/fluid/imperative/gloo_context.cc index dd34b8b619f..c5bcab4daa9 100644 --- a/paddle/fluid/imperative/gloo_context.cc +++ b/paddle/fluid/imperative/gloo_context.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/imperative/gloo_context.h" + #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/fleet/gloo_wrapper.h" #include "paddle/fluid/framework/tensor_util.h" diff --git a/paddle/fluid/imperative/gloo_context.h b/paddle/fluid/imperative/gloo_context.h index 23e4e02945b..5e0973e7e99 100644 --- a/paddle/fluid/imperative/gloo_context.h +++ b/paddle/fluid/imperative/gloo_context.h @@ -16,6 +16,7 @@ #include #include #include + #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/selected_rows_utils.h" #include "paddle/fluid/framework/variable.h" diff --git a/paddle/fluid/imperative/gradient_accumulator.cc b/paddle/fluid/imperative/gradient_accumulator.cc index 499cf4d8ad6..36e6f551dc6 100644 --- a/paddle/fluid/imperative/gradient_accumulator.cc +++ b/paddle/fluid/imperative/gradient_accumulator.cc @@ -874,8 +874,9 @@ void SortedGradientAccumulator::SumGrad(std::shared_ptr var, } PADDLE_ENFORCE_EQ(var_info.var->Var().IsType(), - true, platform::errors::PermissionDenied( - "Gradient var must be LoDTensor")); + true, + platform::errors::PermissionDenied( + "Gradient var must be LoDTensor")); if (CurCnt() == 0) { MoveOrCopyVar(dst_var->MutableVar(), var_info.var->MutableVar(), var_info.unchange_input); @@ -896,9 +897,10 @@ void SortedGradientAccumulator::SumGrad(std::shared_ptr var, PADDLE_ENFORCE_EQ( var_info.var->Var().IsType() || var_info.var->Var().IsType(), - true, platform::errors::PermissionDenied("The type of Gradient " - "var must be LoDTensor " - "or SelectedRows")); + true, + platform::errors::PermissionDenied("The type of Gradient " + "var must be LoDTensor " + "or SelectedRows")); if (CurCnt() == 0) { MoveOrCopyVar(dst_var->MutableVar(), var_info.var->MutableVar(), var_info.unchange_input); diff --git a/paddle/fluid/imperative/gradient_accumulator.h b/paddle/fluid/imperative/gradient_accumulator.h index 03f6775defc..382623b6276 100644 --- a/paddle/fluid/imperative/gradient_accumulator.h +++ b/paddle/fluid/imperative/gradient_accumulator.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/imperative/hooks.h" #include "paddle/fluid/imperative/layer.h" diff --git a/paddle/fluid/imperative/hccl_context.cc b/paddle/fluid/imperative/hccl_context.cc index 31d988753f2..8fb434cbc2a 100644 --- a/paddle/fluid/imperative/hccl_context.cc +++ b/paddle/fluid/imperative/hccl_context.cc @@ -13,18 +13,16 @@ // limitations under the License. #include "paddle/fluid/imperative/hccl_context.h" -#include "paddle/fluid/framework/convert_utils.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/variable.h" - +#include "paddle/fluid/platform/collective_helper.h" +#include "paddle/fluid/platform/device/npu/hccl_helper.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/gen_comm_id_helper.h" #include "paddle/fluid/platform/place.h" -#include "paddle/fluid/platform/collective_helper.h" -#include "paddle/fluid/platform/device/npu/hccl_helper.h" - namespace paddle { namespace framework { class Variable; @@ -193,8 +191,9 @@ paddle::platform::DeviceContext *HCCLParallelContext::GetDeviceContext( } void HCCLParallelContext::WaitCompute(int ring_id) { - PADDLE_ENFORCE_GE(ring_id, 0, platform::errors::OutOfRange( - "ring id must >= 0, but got %d", ring_id)); + PADDLE_ENFORCE_GE( + ring_id, 0, + platform::errors::OutOfRange("ring id must >= 0, but got %d", ring_id)); PADDLE_ENFORCE_LT(ring_id, compute_events_.size(), platform::errors::OutOfRange( "ring id must < compute events size," @@ -214,8 +213,9 @@ void HCCLParallelContext::WaitCompute(int ring_id) { } void HCCLParallelContext::WaitComm(int ring_id) { - PADDLE_ENFORCE_GE(ring_id, 0, platform::errors::OutOfRange( - "ring id must >= 0, but got %d", ring_id)); + PADDLE_ENFORCE_GE( + ring_id, 0, + platform::errors::OutOfRange("ring id must >= 0, but got %d", ring_id)); PADDLE_ENFORCE_LT(ring_id, comm_events_.size(), platform::errors::OutOfRange( "ring id must < comm events size," diff --git a/paddle/fluid/imperative/infer_var_type_context.h b/paddle/fluid/imperative/infer_var_type_context.h index 297ec840db4..079e180c2a7 100644 --- a/paddle/fluid/imperative/infer_var_type_context.h +++ b/paddle/fluid/imperative/infer_var_type_context.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/framework/var_type_inference.h" #include "paddle/fluid/imperative/type_defs.h" diff --git a/paddle/fluid/imperative/jit/program_desc_tracer.cc b/paddle/fluid/imperative/jit/program_desc_tracer.cc index 35ff262fe3d..e0f52beb6e5 100644 --- a/paddle/fluid/imperative/jit/program_desc_tracer.cc +++ b/paddle/fluid/imperative/jit/program_desc_tracer.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/imperative/jit/program_desc_tracer.h" + #include "paddle/fluid/framework/convert_utils.h" namespace paddle { diff --git a/paddle/fluid/imperative/layer.cc b/paddle/fluid/imperative/layer.cc index 76f64ab73a6..7357db4e200 100644 --- a/paddle/fluid/imperative/layer.cc +++ b/paddle/fluid/imperative/layer.cc @@ -16,7 +16,6 @@ #include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/framework/convert_utils.h" - #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/imperative/infer_var_type_context.h" #include "paddle/fluid/imperative/op_base.h" @@ -284,9 +283,10 @@ std::shared_ptr VarBase::NewVarBase(const platform::Place& dst_place, PADDLE_ENFORCE_EQ( Var().IsInitialized() && (Var().IsType() || Var().IsType()), - true, platform::errors::InvalidArgument( - "Variable is not initialized or Variable's type is not " - "LoDTensor or SelectedRows when getting numpy tensor")); + true, + platform::errors::InvalidArgument( + "Variable is not initialized or Variable's type is not " + "LoDTensor or SelectedRows when getting numpy tensor")); if (Var().IsType()) { auto& src_tensor = Var().Get(); diff --git a/paddle/fluid/imperative/layout_autotune.cc b/paddle/fluid/imperative/layout_autotune.cc index ed0526eaad3..e936505b2ae 100644 --- a/paddle/fluid/imperative/layout_autotune.cc +++ b/paddle/fluid/imperative/layout_autotune.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/imperative/layout_autotune.h" + #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/imperative/layout_transformer.h" #include "paddle/phi/backends/gpu/gpu_info.h" @@ -119,8 +120,9 @@ paddle::imperative::NameVarMap AutoTuneLayout( LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NHWC); VLOG(3) << "Tune the layout from " << BOOST_GET_CONST(std::string, (*attrs)["data_format"]) - << " to " << paddle::framework::DataLayoutToString( - LayoutAutoTune::Instance().GetDesiredLayout()); + << " to " + << paddle::framework::DataLayoutToString( + LayoutAutoTune::Instance().GetDesiredLayout()); } else { LayoutAutoTune::Instance().DisableLayoutAutoTune(); return ins; diff --git a/paddle/fluid/imperative/layout_autotune.h b/paddle/fluid/imperative/layout_autotune.h index df3772b826d..2da368910e6 100644 --- a/paddle/fluid/imperative/layout_autotune.h +++ b/paddle/fluid/imperative/layout_autotune.h @@ -14,8 +14,10 @@ #pragma once #include + #include #include + #include "paddle/fluid/framework/type_defs.h" #include "paddle/phi/common/layout.h" diff --git a/paddle/fluid/imperative/nccl_context.cc b/paddle/fluid/imperative/nccl_context.cc index e9d987cc704..4a0dcb1b3bb 100644 --- a/paddle/fluid/imperative/nccl_context.cc +++ b/paddle/fluid/imperative/nccl_context.cc @@ -22,6 +22,7 @@ #ifdef PADDLE_WITH_NCCL #include + #include "paddle/fluid/platform/dynload/nccl.h" #endif @@ -159,8 +160,9 @@ paddle::platform::DeviceContext *NCCLParallelContext::GetDeviceContext( } void NCCLParallelContext::WaitCompute(int ring_id) { - PADDLE_ENFORCE_GE(ring_id, 0, platform::errors::OutOfRange( - "ring id must >= 0, but got %d", ring_id)); + PADDLE_ENFORCE_GE( + ring_id, 0, + platform::errors::OutOfRange("ring id must >= 0, but got %d", ring_id)); PADDLE_ENFORCE_LT(ring_id, compute_events_.size(), platform::errors::OutOfRange( "ring id must < compute events size," @@ -185,8 +187,9 @@ void NCCLParallelContext::WaitCompute(int ring_id) { } void NCCLParallelContext::WaitComm(int ring_id) { - PADDLE_ENFORCE_GE(ring_id, 0, platform::errors::OutOfRange( - "ring id must >= 0, but got %d", ring_id)); + PADDLE_ENFORCE_GE( + ring_id, 0, + platform::errors::OutOfRange("ring id must >= 0, but got %d", ring_id)); PADDLE_ENFORCE_LT(ring_id, comm_events_.size(), platform::errors::OutOfRange( "ring id must < comm events size," diff --git a/paddle/fluid/imperative/op_base.h b/paddle/fluid/imperative/op_base.h index b8a616ae67d..ba0221a1729 100644 --- a/paddle/fluid/imperative/op_base.h +++ b/paddle/fluid/imperative/op_base.h @@ -20,6 +20,7 @@ #include #include #include + #include "paddle/fluid/framework/type_defs.h" #include "paddle/fluid/imperative/saved_variable_wrapper_list.h" #include "paddle/fluid/imperative/type_defs.h" diff --git a/paddle/fluid/imperative/partial_grad_engine.cc b/paddle/fluid/imperative/partial_grad_engine.cc index f2f64d92a23..a4baca6f257 100644 --- a/paddle/fluid/imperative/partial_grad_engine.cc +++ b/paddle/fluid/imperative/partial_grad_engine.cc @@ -24,6 +24,7 @@ #include #include #include + #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/imperative/gradient_accumulator.h" #include "paddle/fluid/imperative/layer.h" diff --git a/paddle/fluid/imperative/partial_grad_engine.h b/paddle/fluid/imperative/partial_grad_engine.h index b5da39f8d42..4ec6cdb3fcd 100644 --- a/paddle/fluid/imperative/partial_grad_engine.h +++ b/paddle/fluid/imperative/partial_grad_engine.h @@ -16,6 +16,7 @@ #include #include + #include "paddle/fluid/imperative/engine.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index cfd3813d60d..ac997557863 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -258,7 +258,7 @@ PreparedOp PrepareImpl( #if defined(PADDLE_WITH_XPU) && !defined(PADDLE_WITH_XPU_KP) && !is_xpu_unsupport #endif - ) { + ) { VLOG(6) << "Dynamic mode PrepareImpl - kernel name: " << pt_kernel_name << " | kernel key: " << pt_kernel_key << " | kernel: " << phi_kernel; @@ -306,7 +306,7 @@ PreparedOp PrepareImpl( #if defined(PADDLE_WITH_XPU_KP) || (is_xpu_unsupport && !is_xpu_kp_support) #endif - ) { + ) { if (has_phi_kernel) { auto pt_cpu_kernel_key = FallBackToCpu(expected_kernel_key, pt_kernel_key, op); diff --git a/paddle/fluid/imperative/prepared_operator.h b/paddle/fluid/imperative/prepared_operator.h index ccc8d64517f..0c2d70dfe3c 100644 --- a/paddle/fluid/imperative/prepared_operator.h +++ b/paddle/fluid/imperative/prepared_operator.h @@ -19,6 +19,7 @@ #include #include "paddle/fluid/eager/eager_tensor.h" +#include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_transform.h" #include "paddle/fluid/framework/op_kernel_type.h" #include "paddle/fluid/framework/operator.h" @@ -28,8 +29,6 @@ #include "paddle/fluid/imperative/layer.h" #include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/imperative/var_helper.h" - -#include "paddle/fluid/framework/convert_utils.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_context.h" #include "paddle/phi/core/selected_rows.h" diff --git a/paddle/fluid/imperative/profiler.cc b/paddle/fluid/imperative/profiler.cc index 48af63056c5..097f62fe422 100644 --- a/paddle/fluid/imperative/profiler.cc +++ b/paddle/fluid/imperative/profiler.cc @@ -18,7 +18,9 @@ #include "gperftools/profiler.h" #endif #include + #include // NOLINT + #include "paddle/fluid/platform/flags.h" PADDLE_DEFINE_EXPORTED_string( diff --git a/paddle/fluid/imperative/py_layer_fwd.h b/paddle/fluid/imperative/py_layer_fwd.h index 2d7d3192038..f5951a52d71 100644 --- a/paddle/fluid/imperative/py_layer_fwd.h +++ b/paddle/fluid/imperative/py_layer_fwd.h @@ -16,12 +16,12 @@ #include #include -#include "paddle/fluid/imperative/layer.h" -#include "paddle/fluid/imperative/prepared_operator.h" -#include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/type_defs.h" +#include "paddle/fluid/imperative/layer.h" +#include "paddle/fluid/imperative/prepared_operator.h" +#include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/operators/py_layer_op.h" namespace paddle { diff --git a/paddle/fluid/imperative/reducer.cc b/paddle/fluid/imperative/reducer.cc index c7fd2215eb4..47d7b6366f7 100644 --- a/paddle/fluid/imperative/reducer.cc +++ b/paddle/fluid/imperative/reducer.cc @@ -18,13 +18,10 @@ #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/imperative/layer.h" -#include "paddle/fluid/string/string_helper.h" - +#include "paddle/fluid/imperative/parallel_context.h" #include "paddle/fluid/operators/math/concat_and_split.h" #include "paddle/fluid/operators/strided_memcpy.h" - -#include "paddle/fluid/imperative/parallel_context.h" - +#include "paddle/fluid/string/string_helper.h" #include "paddle/phi/core/dense_tensor.h" namespace paddle { namespace imperative { @@ -452,8 +449,9 @@ void Reducer::InitializeDenseGroups( "Tensor %s is not initialized.", var_name)); const auto size = lod_tensor->numel(); PADDLE_ENFORCE_GT( - size, 0, platform::errors::PreconditionNotMet( - "The number of tensor %s's elements is 0.", var_name)); + size, 0, + platform::errors::PreconditionNotMet( + "The number of tensor %s's elements is 0.", var_name)); all_length += size; p_group->length_.push_back(size); diff --git a/paddle/fluid/imperative/reducer.h b/paddle/fluid/imperative/reducer.h index 9fac4b41cbd..852d8cf076a 100644 --- a/paddle/fluid/imperative/reducer.h +++ b/paddle/fluid/imperative/reducer.h @@ -14,6 +14,7 @@ #pragma once #include + #include #include #include diff --git a/paddle/fluid/imperative/tests/bkcl_context_test.cc b/paddle/fluid/imperative/tests/bkcl_context_test.cc index 580d86b1696..b4d299ba829 100644 --- a/paddle/fluid/imperative/tests/bkcl_context_test.cc +++ b/paddle/fluid/imperative/tests/bkcl_context_test.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include // NOLINT - #include "paddle/fluid/imperative/bkcl_context.h" +#include // NOLINT + #include "gtest/gtest.h" namespace imperative = paddle::imperative; diff --git a/paddle/fluid/imperative/tests/cncl_context_test.cc b/paddle/fluid/imperative/tests/cncl_context_test.cc index 1d5ee8e7fc8..1019d4eacdc 100644 --- a/paddle/fluid/imperative/tests/cncl_context_test.cc +++ b/paddle/fluid/imperative/tests/cncl_context_test.cc @@ -12,15 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/imperative/cncl_context.h" + #include // NOLINT +#include "gtest/gtest.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/variable.h" -#include "paddle/fluid/imperative/cncl_context.h" #include "paddle/fluid/platform/gen_comm_id_helper.h" -#include "gtest/gtest.h" - namespace imperative = paddle::imperative; namespace platform = paddle::platform; namespace framework = paddle::framework; diff --git a/paddle/fluid/imperative/tests/heter_ccl_context_test.cc b/paddle/fluid/imperative/tests/heter_ccl_context_test.cc index 91f38f82ed0..67059916d03 100644 --- a/paddle/fluid/imperative/tests/heter_ccl_context_test.cc +++ b/paddle/fluid/imperative/tests/heter_ccl_context_test.cc @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/imperative/heter_ccl_context.h" + #include #include // NOLINT +#include "gtest/gtest.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/variable.h" -#include "paddle/fluid/imperative/heter_ccl_context.h" - -#include "gtest/gtest.h" namespace imperative = paddle::imperative; namespace platform = paddle::platform; diff --git a/paddle/fluid/imperative/tests/nccl_context_test.cc b/paddle/fluid/imperative/tests/nccl_context_test.cc index 9ee083626c5..48479e1412b 100644 --- a/paddle/fluid/imperative/tests/nccl_context_test.cc +++ b/paddle/fluid/imperative/tests/nccl_context_test.cc @@ -12,15 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/imperative/nccl_context.h" + #include // NOLINT +#include "gtest/gtest.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/variable.h" -#include "paddle/fluid/imperative/nccl_context.h" #include "paddle/fluid/platform/gen_comm_id_helper.h" -#include "gtest/gtest.h" - namespace imperative = paddle::imperative; namespace platform = paddle::platform; namespace framework = paddle::framework; diff --git a/paddle/fluid/imperative/tests/test_eager.cc b/paddle/fluid/imperative/tests/test_eager.cc index 3def103ae9a..1d6ec733075 100644 --- a/paddle/fluid/imperative/tests/test_eager.cc +++ b/paddle/fluid/imperative/tests/test_eager.cc @@ -88,8 +88,9 @@ TEST(test_var_helper, eager_var_helper) { egr_tensor, framework::OpKernelType(framework::proto::VarType::FP32, platform::CPUPlace())); SetCachedValue( - egr_tensor, framework::OpKernelType(framework::proto::VarType::FP32, - platform::CPUPlace()), + egr_tensor, + framework::OpKernelType(framework::proto::VarType::FP32, + platform::CPUPlace()), egr_tensor2); ASSERT_ANY_THROW(GetPlace(egr_tensor2)); ASSERT_ANY_THROW(SetType( diff --git a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc index 88b18a4c176..d2e768d6ef1 100644 --- a/paddle/fluid/imperative/tests/test_gradient_accmulator.cc +++ b/paddle/fluid/imperative/tests/test_gradient_accmulator.cc @@ -384,7 +384,7 @@ static void TestGradientAccumulatorTestUnchangeInput( for (auto use_tensor2 : use_tensors) { /** g_accum1 && g_accum2: has not been initialized * test accumulate on this graph - */ + */ auto g_var1 = std::make_shared("g_var1"); g_var1->SetOverridedStopGradient(false); auto g_accum1 = CreateAccumulator(g_var1, sort_gradient); @@ -437,7 +437,7 @@ static void TestGradientAccumulatorTestUnchangeInput( /** g_accum3 && g_accum4: has been initialized * test accumulate on previous graph - */ + */ auto var3 = create_var(use_tensor1); auto var_wrapper3_3 = std::make_shared("tmp1_3"); auto var_wrapper4_3 = std::make_shared("tmp2_3"); diff --git a/paddle/fluid/imperative/tests/test_group.cc b/paddle/fluid/imperative/tests/test_group.cc index 5e674af1a08..0025103c531 100644 --- a/paddle/fluid/imperative/tests/test_group.cc +++ b/paddle/fluid/imperative/tests/test_group.cc @@ -14,8 +14,8 @@ #include #include -#include "gtest/gtest.h" +#include "gtest/gtest.h" #include "paddle/fluid/imperative/reducer.h" namespace paddle { diff --git a/paddle/fluid/imperative/tests/test_prepare_op.cc b/paddle/fluid/imperative/tests/test_prepare_op.cc index 4cda3f32fdf..cfda7a0cac4 100644 --- a/paddle/fluid/imperative/tests/test_prepare_op.cc +++ b/paddle/fluid/imperative/tests/test_prepare_op.cc @@ -17,9 +17,11 @@ // #include + #include #include #include + #include "gtest/gtest.h" #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/imperative/prepared_operator.h" diff --git a/paddle/fluid/imperative/tracer.cc b/paddle/fluid/imperative/tracer.cc index 350263bc545..2295ea4bf67 100644 --- a/paddle/fluid/imperative/tracer.cc +++ b/paddle/fluid/imperative/tracer.cc @@ -12,10 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/imperative/tracer.h" + #include #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/imperative/amp_auto_cast.h" #include "paddle/fluid/imperative/execution_context.h" diff --git a/paddle/fluid/imperative/tracer.h b/paddle/fluid/imperative/tracer.h index 4e671d52457..b9048c48470 100644 --- a/paddle/fluid/imperative/tracer.h +++ b/paddle/fluid/imperative/tracer.h @@ -21,6 +21,7 @@ #include #include #include + #include "ThreadPool.h" #include "paddle/fluid/framework/garbage_collector.h" #include "paddle/fluid/imperative/amp_auto_cast.h" diff --git a/paddle/fluid/imperative/var_helper.h b/paddle/fluid/imperative/var_helper.h index 9ce456b1103..91788e73fa5 100644 --- a/paddle/fluid/imperative/var_helper.h +++ b/paddle/fluid/imperative/var_helper.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/variable.h" namespace egr { diff --git a/paddle/fluid/inference/analysis/analysis_pass.h b/paddle/fluid/inference/analysis/analysis_pass.h index 14a1c3eea34..a95498d82d0 100644 --- a/paddle/fluid/inference/analysis/analysis_pass.h +++ b/paddle/fluid/inference/analysis/analysis_pass.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include #include diff --git a/paddle/fluid/inference/analysis/analyzer.cc b/paddle/fluid/inference/analysis/analyzer.cc index be7d6ab8680..2b56f8e00d6 100644 --- a/paddle/fluid/inference/analysis/analyzer.cc +++ b/paddle/fluid/inference/analysis/analyzer.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/inference/analysis/analyzer.h" + #include #include + #include "paddle/fluid/inference/analysis/passes/passes.h" #include "paddle/fluid/string/pretty_log.h" diff --git a/paddle/fluid/inference/analysis/analyzer.h b/paddle/fluid/inference/analysis/analyzer.h index 4db54706285..95a985158e6 100644 --- a/paddle/fluid/inference/analysis/analyzer.h +++ b/paddle/fluid/inference/analysis/analyzer.h @@ -37,6 +37,7 @@ limitations under the License. */ #include #include + #include "gflags/gflags.h" #include "paddle/fluid/inference/analysis/analysis_pass.h" #include "paddle/fluid/inference/analysis/flags.h" diff --git a/paddle/fluid/inference/analysis/analyzer_tester.cc b/paddle/fluid/inference/analysis/analyzer_tester.cc index 3f96fd69e4e..84fcd4e3c39 100644 --- a/paddle/fluid/inference/analysis/analyzer_tester.cc +++ b/paddle/fluid/inference/analysis/analyzer_tester.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/analysis/analyzer.h" - #include #include + +#include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/analysis/ut_helper.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/api/paddle_inference_pass.h" diff --git a/paddle/fluid/inference/analysis/dot.h b/paddle/fluid/inference/analysis/dot.h index 6d883f55870..619e3461d3e 100644 --- a/paddle/fluid/inference/analysis/dot.h +++ b/paddle/fluid/inference/analysis/dot.h @@ -20,6 +20,7 @@ #pragma once #include + #include #include #include diff --git a/paddle/fluid/inference/analysis/dot_tester.cc b/paddle/fluid/inference/analysis/dot_tester.cc index c785a312bf9..0b669093a1f 100644 --- a/paddle/fluid/inference/analysis/dot_tester.cc +++ b/paddle/fluid/inference/analysis/dot_tester.cc @@ -12,11 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/analysis/dot.h" - #include + #include +#include "paddle/fluid/inference/analysis/dot.h" + namespace paddle { namespace inference { namespace analysis { diff --git a/paddle/fluid/inference/analysis/helper.h b/paddle/fluid/inference/analysis/helper.h index 88ae61ff1fc..f9520165161 100644 --- a/paddle/fluid/inference/analysis/helper.h +++ b/paddle/fluid/inference/analysis/helper.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include #include #include @@ -72,8 +73,9 @@ struct DataTypeNamer { template const std::string &repr() const { auto x = std::type_index(typeid(T)); - PADDLE_ENFORCE_GT(dic_.count(x), 0, platform::errors::PreconditionNotMet( - "unknown type for representation")); + PADDLE_ENFORCE_GT(dic_.count(x), 0, + platform::errors::PreconditionNotMet( + "unknown type for representation")); return dic_.at(x); } diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index c5c60564b0f..6c74d7b738c 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/inference/analysis/ir_pass_manager.h" + #include #include #include @@ -20,6 +21,7 @@ #include #include #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/scope.h" @@ -29,8 +31,8 @@ namespace paddle { namespace inference { namespace analysis { -using string::PrettyLogEndl; using string::PrettyLog; +using string::PrettyLogEndl; using string::Style; IRPassManager::IRPassManager(Argument *argument) { diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.h b/paddle/fluid/inference/analysis/ir_pass_manager.h index 823dc8907ea..9f9a5fc3471 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.h +++ b/paddle/fluid/inference/analysis/ir_pass_manager.h @@ -27,6 +27,7 @@ #include #include #include + #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/program_desc.h" diff --git a/paddle/fluid/inference/analysis/ir_passes/dlnne_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/dlnne_subgraph_pass.cc index 8f789139af9..b2a07722829 100644 --- a/paddle/fluid/inference/analysis/ir_passes/dlnne_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/dlnne_subgraph_pass.cc @@ -11,19 +11,19 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include +#include "paddle/fluid/inference/analysis/ir_passes/dlnne_subgraph_pass.h" +#include #include #include +#include +#include #include "paddle/fluid/framework/ir/graph_pattern_detector.h" #include "paddle/fluid/framework/ir/subgraph_detector.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/inference/analysis/helper.h" #include "paddle/fluid/inference/analysis/ir_passes/dlnne_reg_py.h" -#include "paddle/fluid/inference/analysis/ir_passes/dlnne_subgraph_pass.h" #include "paddle/fluid/string/pretty_log.h" namespace paddle { @@ -52,18 +52,39 @@ using framework::ir::Node; void analysis::DlnneSubgraphPass::ApplyImpl(framework::ir::Graph *graph) const { static std::unordered_set teller_set{ - "mul", "matmul", "conv2d", "pool2d", "relu", "softmax", "sigmoid", - "hard_swish", "depthwise_conv2d", "batch_norm", "concat", "tanh", "pad", - "elementwise_add", "elementwise_mul", "dropout", "prelu", - "conv2d_transpose", "leaky_relu", + "mul", + "matmul", + "conv2d", + "pool2d", + "relu", + "softmax", + "sigmoid", + "hard_swish", + "depthwise_conv2d", + "batch_norm", + "concat", + "tanh", + "pad", + "elementwise_add", + "elementwise_mul", + "dropout", + "prelu", + "conv2d_transpose", + "leaky_relu", // "fc", - "shuffle_channel", "swish", "split", + "shuffle_channel", + "swish", + "split", // "instance_norm", "gelu", // "layer_norm", // "scale", // "stack", - "relu6", "reshape2", "transpose2", "concat", "slice", + "relu6", + "reshape2", + "transpose2", + "concat", + "slice", }; framework::ir::FusePassBase::Init("dlnne_subgraph_pass", graph); diff --git a/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc index 083fc899119..b5ddacd440e 100644 --- a/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc @@ -12,7 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h" + #include +#include +#include #include #include #include @@ -21,28 +25,22 @@ #include #include -#include -#include - +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/ir/subgraph_detector.h" #include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/inference/lite/engine.h" #include "paddle/fluid/inference/lite/op_teller.h" #include "paddle/fluid/inference/utils/singleton.h" - -#include "paddle/fluid/framework/ir/graph_pattern_detector.h" -#include "paddle/fluid/framework/ir/subgraph_detector.h" -#include "paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h" #include "paddle/fluid/string/pretty_log.h" -#include "paddle/fluid/inference/lite/engine.h" - namespace paddle { namespace inference { namespace analysis { -using framework::ir::Node; using framework::ir::Agent; -using framework::ir::SubGraphFuser; using framework::ir::Graph; +using framework::ir::Node; +using framework::ir::SubGraphFuser; namespace lite { diff --git a/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h index e79a64f0f72..198a86c185b 100644 --- a/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h +++ b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h @@ -14,10 +14,12 @@ #pragma once #include + #include #include #include #include + #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/inference/analysis/ir_passes/subgraph_util.h" diff --git a/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass_tester.cc b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass_tester.cc index 90ad7ec0b44..8c88e2869cc 100644 --- a/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass_tester.cc +++ b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass_tester.cc @@ -12,8 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h" #include + +#include "paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.h" #include "paddle/fluid/inference/io.h" #include "paddle/fluid/inference/lite/op_teller.h" @@ -29,7 +30,7 @@ void AppendLiteSubBlocks(const std::vector& subgraph_ops, framework::ProgramDesc* engine_program, framework::ProgramDesc* host_program, const int32_t host_sub_id); -} +} // namespace lite TEST(LiteSubgraphPass, basic) { framework::ProgramDesc host_program; diff --git a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc index 34192965297..05bda4e75c9 100644 --- a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h" + #include #include + #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/inference/analysis/ir_pass_manager.h" diff --git a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.h b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.h index 2c2113c06d9..fca431b5d77 100644 --- a/paddle/fluid/inference/analysis/passes/ir_analysis_pass.h +++ b/paddle/fluid/inference/analysis/passes/ir_analysis_pass.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/inference/analysis/analysis_pass.h" namespace paddle { diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc index 321716b1c8a..fca5e256342 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h" + #include #include + #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/inference/io.h" diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h index adbde0433fa..e7ef23e791e 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h +++ b/paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/inference/analysis/analysis_pass.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.cc b/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.cc index 0f3633ca6fa..999fb4ad8d7 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h" + #include "paddle/fluid/framework/ir/graph_to_program_pass.h" #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/program_desc.h" diff --git a/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h b/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h index 613eb04497e..5b20667d62a 100644 --- a/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h +++ b/paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/inference/analysis/analysis_pass.h" namespace paddle { diff --git a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc index 614eea24a0e..a0c7a94cd1b 100644 --- a/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc +++ b/paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h" + #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/lod_tensor.h" diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc index 3fa417c2ea6..70620e8692c 100644 --- a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc +++ b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc @@ -61,7 +61,8 @@ void MemoryOptimizePass::CollectLifeCycle( auto reads = op_node->inputs; auto writes = op_node->outputs; - std::vector requires(reads.begin(), reads.end()); + std::vector + requires(reads.begin(), reads.end()); requires.insert(requires.end(), writes.begin(), writes.end()); // Disable reuse of feed variables. diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h index 8ca5ffa2581..5dcd8b1059e 100644 --- a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h +++ b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.h @@ -35,16 +35,15 @@ namespace inference { namespace analysis { /* Memory optimization. -* We will perform the following operation: -* 1. Collect all var's lifetime. -* 2. Make reuse plan: the vars can be reused if there is no overlap(on lifetime) -* between -* them. -* The final plan is a mapping table in which the key represents the original -* name of var and the value in the table represents the current name of var. -* 3. Perform reuse plan: Replace all var's name in the model according to the -* mapping table. -*/ + * We will perform the following operation: + * 1. Collect all var's lifetime. + * 2. Make reuse plan: the vars can be reused if there is no overlap(on + * lifetime) between them. The final plan is a mapping table in which the key + * represents the original name of var and the value in the table represents the + * current name of var. + * 3. Perform reuse plan: Replace all var's name in the model according to the + * mapping table. + */ class MemoryOptimizePass : public AnalysisPass { public: using space_table_t = std::unordered_map; diff --git a/paddle/fluid/inference/analysis/passes/passes.cc b/paddle/fluid/inference/analysis/passes/passes.cc index ca0b25c29d4..19aab1a948d 100644 --- a/paddle/fluid/inference/analysis/passes/passes.cc +++ b/paddle/fluid/inference/analysis/passes/passes.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/inference/analysis/passes/passes.h" + #include "paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.h" #include "paddle/fluid/inference/analysis/passes/inference_op_replace_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h" diff --git a/paddle/fluid/inference/analysis/passes/passes.h b/paddle/fluid/inference/analysis/passes/passes.h index 8a13091d083..b3b240c280c 100644 --- a/paddle/fluid/inference/analysis/passes/passes.h +++ b/paddle/fluid/inference/analysis/passes/passes.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/inference/analysis/analysis_pass.h" namespace paddle { diff --git a/paddle/fluid/inference/analysis/ut_helper.h b/paddle/fluid/inference/analysis/ut_helper.h index 56565c8f3f7..6c7690a4779 100644 --- a/paddle/fluid/inference/analysis/ut_helper.h +++ b/paddle/fluid/inference/analysis/ut_helper.h @@ -14,8 +14,10 @@ limitations under the License. */ #pragma once #include + #include #include + #include "gflags/gflags.h" #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/inference/analysis/helper.h" diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 5bb26d8f080..c23397a0828 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -15,6 +15,7 @@ #include #include #include + #include "paddle/fluid/inference/api/paddle_analysis_config.h" #include "paddle/fluid/inference/api/paddle_pass_builder.h" #include "paddle/fluid/inference/utils/table_printer.h" @@ -1105,8 +1106,9 @@ LiteNNAdapterConfig &LiteNNAdapterConfig::SetModelCacheBuffers( platform::errors::InvalidArgument( "model_cache_buffer should not be empty.")); PADDLE_ENFORCE_EQ(nnadapter_model_cache_buffers.count(model_cache_token), - false, platform::errors::InvalidArgument( - "model_cache_token has already been set.")); + false, + platform::errors::InvalidArgument( + "model_cache_token has already been set.")); nnadapter_model_cache_buffers[model_cache_token] = model_cache_buffer; return *this; diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index b40377855bd..5f9051ff2fd 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -83,9 +83,9 @@ namespace paddle { using inference::Singleton; #if PADDLE_WITH_TENSORRT -using inference::tensorrt::TRTInt8Calibrator; using inference::tensorrt::TRTCalibratorEngine; using inference::tensorrt::TRTCalibratorEngineManager; +using inference::tensorrt::TRTInt8Calibrator; #endif int AnalysisPredictor::clone_num_ = 1; @@ -1027,8 +1027,9 @@ void AnalysisPredictor::OptimizeInferenceProgram() { } template <> -std::unique_ptr CreatePaddlePredictor< - AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) { +std::unique_ptr +CreatePaddlePredictor( + const AnalysisConfig &config) { // TODO(NHZlX): Should add the link to the doc of // paddle_infer::CreatePredictor if (config.glog_info_disabled()) { diff --git a/paddle/fluid/inference/api/analysis_predictor.h b/paddle/fluid/inference/api/analysis_predictor.h index e96526730fd..1cfdaf1a558 100644 --- a/paddle/fluid/inference/api/analysis_predictor.h +++ b/paddle/fluid/inference/api/analysis_predictor.h @@ -41,7 +41,7 @@ using float16 = paddle::platform::float16; namespace experimental { class InternalUtils; }; -} +} // namespace paddle_infer /// /// \file analysis_predictor.h /// @@ -55,10 +55,10 @@ class InternalUtils; namespace paddle { -using inference::analysis::Argument; -using inference::analysis::Analyzer; -using framework::proto::ProgramDesc; using framework::NaiveExecutor; +using framework::proto::ProgramDesc; +using inference::analysis::Analyzer; +using inference::analysis::Argument; /// /// \class AnalysisPredictor diff --git a/paddle/fluid/inference/api/analysis_predictor_tester.cc b/paddle/fluid/inference/api/analysis_predictor_tester.cc index e8a1384166a..f16054565a7 100644 --- a/paddle/fluid/inference/api/analysis_predictor_tester.cc +++ b/paddle/fluid/inference/api/analysis_predictor_tester.cc @@ -18,7 +18,9 @@ #endif #include #include + #include // NOLINT + #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/inference/api/helper.h" diff --git a/paddle/fluid/inference/api/api.cc b/paddle/fluid/inference/api/api.cc index e2befadf0a8..9e4633774a2 100644 --- a/paddle/fluid/inference/api/api.cc +++ b/paddle/fluid/inference/api/api.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "gflags/gflags.h" #include "paddle/fluid/framework/commit.h" #include "paddle/fluid/framework/lod_tensor.h" diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index 1c4369af646..38960aecb70 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -12,13 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/inference/api/api_impl.h" + #include + #include #include #include #include "paddle/fluid/framework/feed_fetch_method.h" -#include "paddle/fluid/inference/api/api_impl.h" #include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/place.h" @@ -348,8 +350,9 @@ bool NativePaddlePredictor::GetFetch(std::vector *outputs, } template <> -std::unique_ptr CreatePaddlePredictor< - NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) { +std::unique_ptr +CreatePaddlePredictor( + const NativeConfig &config) { // TODO(NHZlX): Should add the link to the doc of // paddle_infer::CreatePredictor VLOG(3) << "create NativePaddlePredictor"; diff --git a/paddle/fluid/inference/api/api_impl.h b/paddle/fluid/inference/api/api_impl.h index b91eff4573e..d503d258139 100644 --- a/paddle/fluid/inference/api/api_impl.h +++ b/paddle/fluid/inference/api/api_impl.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include #include #include diff --git a/paddle/fluid/inference/api/api_tester.cc b/paddle/fluid/inference/api/api_tester.cc index 46724fa6b1a..1faf46fad2b 100644 --- a/paddle/fluid/inference/api/api_tester.cc +++ b/paddle/fluid/inference/api/api_tester.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include #include diff --git a/paddle/fluid/inference/api/demo_ci/onnxruntime_mobilenet_demo.cc b/paddle/fluid/inference/api/demo_ci/onnxruntime_mobilenet_demo.cc index ef5c08cd041..f9ac07a8304 100644 --- a/paddle/fluid/inference/api/demo_ci/onnxruntime_mobilenet_demo.cc +++ b/paddle/fluid/inference/api/demo_ci/onnxruntime_mobilenet_demo.cc @@ -17,7 +17,9 @@ limitations under the License. */ */ #include // use glog instead of CHECK to avoid importing other paddle header files. + #include + #include "gflags/gflags.h" #include "utils.h" // NOLINT diff --git a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc index 9edb4ecbfd2..551b66fcaf7 100644 --- a/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc +++ b/paddle/fluid/inference/api/demo_ci/trt_mobilenet_demo.cc @@ -17,6 +17,7 @@ limitations under the License. */ */ #include // use glog instead of CHECK to avoid importing other paddle header files. + #include "gflags/gflags.h" #include "utils.h" // NOLINT diff --git a/paddle/fluid/inference/api/demo_ci/utils.h b/paddle/fluid/inference/api/demo_ci/utils.h index b4f40194aa9..dfba4b8ebf6 100644 --- a/paddle/fluid/inference/api/demo_ci/utils.h +++ b/paddle/fluid/inference/api/demo_ci/utils.h @@ -14,11 +14,13 @@ #pragma once #include + #include #include #include #include #include + #include "paddle/include/paddle_inference_api.h" namespace paddle { diff --git a/paddle/fluid/inference/api/demo_ci/vis_demo.cc b/paddle/fluid/inference/api/demo_ci/vis_demo.cc index 818444fbcb6..352efc1e63d 100644 --- a/paddle/fluid/inference/api/demo_ci/vis_demo.cc +++ b/paddle/fluid/inference/api/demo_ci/vis_demo.cc @@ -17,6 +17,7 @@ limitations under the License. */ */ #include + #include "gflags/gflags.h" #include "utils.h" // NOLINT diff --git a/paddle/fluid/inference/api/demo_ci/windows_mobilenet.cc b/paddle/fluid/inference/api/demo_ci/windows_mobilenet.cc index 8d0538f8fa5..b1f770066e7 100644 --- a/paddle/fluid/inference/api/demo_ci/windows_mobilenet.cc +++ b/paddle/fluid/inference/api/demo_ci/windows_mobilenet.cc @@ -13,14 +13,15 @@ // limitations under the License. #include + #include #include #include #include #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/include/paddle_inference_api.h" DEFINE_string(modeldir, "", "Directory of the inference model."); diff --git a/paddle/fluid/inference/api/details/zero_copy_tensor.cc b/paddle/fluid/inference/api/details/zero_copy_tensor.cc index bb966dc5c6c..661d9def406 100644 --- a/paddle/fluid/inference/api/details/zero_copy_tensor.cc +++ b/paddle/fluid/inference/api/details/zero_copy_tensor.cc @@ -340,8 +340,9 @@ void Tensor::CopyToCpuImpl(T *data, void *exec_stream, CallbackFunc cb, #ifdef PADDLE_WITH_MKLDNN if (tensor->layout() == paddle::framework::DataLayout::kMKLDNN) paddle::framework::innerTransDataLayoutFromMKLDNN( - tensor->layout(), paddle::platform::MKLDNNDeviceContext::tls() - .get_cur_paddle_data_layout(), + tensor->layout(), + paddle::platform::MKLDNNDeviceContext::tls() + .get_cur_paddle_data_layout(), *tensor, &out, paddle::platform::CPUPlace(), true); else std::memcpy(static_cast(data), t_data, ele_num * sizeof(T)); @@ -852,8 +853,9 @@ void InternalUtils::CopyToCpuWithIoStream(paddle_infer::Tensor *t, T *data, #ifdef PADDLE_WITH_MKLDNN if (tensor->layout() == paddle::framework::DataLayout::kMKLDNN) paddle::framework::innerTransDataLayoutFromMKLDNN( - tensor->layout(), paddle::platform::MKLDNNDeviceContext::tls() - .get_cur_paddle_data_layout(), + tensor->layout(), + paddle::platform::MKLDNNDeviceContext::tls() + .get_cur_paddle_data_layout(), *tensor, &out, paddle::platform::CPUPlace(), true); else std::memcpy(static_cast(data), t_data, ele_num * sizeof(T)); diff --git a/paddle/fluid/inference/api/helper.cc b/paddle/fluid/inference/api/helper.cc index b9e0e90a403..3454c5c8fd1 100644 --- a/paddle/fluid/inference/api/helper.cc +++ b/paddle/fluid/inference/api/helper.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/inference/api/helper.h" + #include "paddle/fluid/framework/custom_operator.h" #include "paddle/fluid/framework/operator.h" #include "paddle/phi/api/ext/op_meta_info.h" diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h index acc52ac0468..1c58b004e6d 100644 --- a/paddle/fluid/inference/api/helper.h +++ b/paddle/fluid/inference/api/helper.h @@ -15,6 +15,7 @@ #pragma once #include + #include #if !defined(_WIN32) #include @@ -377,8 +378,9 @@ static void PrintTime(int batch_size, int repeat, int num_threads, int tid, double batch_latency, int epoch = 1, const framework::proto::VarType::Type data_type = framework::proto::VarType::FP32) { - PADDLE_ENFORCE_GT(batch_size, 0, platform::errors::InvalidArgument( - "Non-positive batch size.")); + PADDLE_ENFORCE_GT( + batch_size, 0, + platform::errors::InvalidArgument("Non-positive batch size.")); double sample_latency = batch_latency / batch_size; LOG(INFO) << "====== threads: " << num_threads << ", thread id: " << tid << " ======"; diff --git a/paddle/fluid/inference/api/infer_context.h b/paddle/fluid/inference/api/infer_context.h index b7a8bf637d8..c2a23a7ca2c 100644 --- a/paddle/fluid/inference/api/infer_context.h +++ b/paddle/fluid/inference/api/infer_context.h @@ -25,21 +25,21 @@ class InferCPUContext : public phi::CPUContext { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) class InferGPUContext : public phi::GPUContext { public: - using phi::GPUContext::SetStream; - using phi::GPUContext::SetEigenDevice; using phi::GPUContext::SetBlasHandle; using phi::GPUContext::SetBlasTensorCoreHandle; using phi::GPUContext::SetBlasTF32Handle; using phi::GPUContext::SetDnnHandle; + using phi::GPUContext::SetEigenDevice; using phi::GPUContext::SetSolverHandle; using phi::GPUContext::SetSparseHandle; + using phi::GPUContext::SetStream; // using phi::GPUContext::SetDnnWorkspaceHandle; using phi::GPUContext::SetComputeCapability; + using phi::GPUContext::SetDriverVersion; + using phi::GPUContext::SetMaxGridDimSize; + using phi::GPUContext::SetMaxThreadsPerBlock; using phi::GPUContext::SetMaxThreadsPerMultiProcessor; using phi::GPUContext::SetMultiProcessors; - using phi::GPUContext::SetMaxThreadsPerBlock; - using phi::GPUContext::SetMaxGridDimSize; - using phi::GPUContext::SetDriverVersion; using phi::GPUContext::SetRuntimeVersion; }; #endif diff --git a/paddle/fluid/inference/api/mkldnn_quantizer.cc b/paddle/fluid/inference/api/mkldnn_quantizer.cc index 4dc80a1d753..73096973c38 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer.cc +++ b/paddle/fluid/inference/api/mkldnn_quantizer.cc @@ -13,12 +13,14 @@ // limitations under the License. #include "paddle/fluid/inference/api/mkldnn_quantizer.h" + #include #include #include #include #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/ir/fuse_pass_base.h" #include "paddle/fluid/framework/ir/graph.h" @@ -33,10 +35,10 @@ namespace paddle { -using platform::CPUPlace; using framework::LoDTensor; using framework::Variable; using framework::ir::Graph; +using platform::CPUPlace; using ConstEigenVectorArrayMap = Eigen::Map>; using EigenMatrixDoubleArray = @@ -57,8 +59,9 @@ static void check_var(const Variable* var, const std::string& var_name) { } static void check_tensor(const LoDTensor& tensor) { - PADDLE_ENFORCE_GT(tensor.dims().size(), 0, platform::errors::InvalidArgument( - "Tensor dimension is empty.")); + PADDLE_ENFORCE_GT( + tensor.dims().size(), 0, + platform::errors::InvalidArgument("Tensor dimension is empty.")); } void AnalysisPredictor::MkldnnQuantizer::CalculateScalesForRNNWeights( @@ -531,8 +534,9 @@ AnalysisPredictor::MkldnnQuantizer::Histogram( PADDLE_ENFORCE_GE(max_val, min_val, platform::errors::InvalidArgument( "MkldnnQuantizer: To calculate Histogram, max_val (" + - std::to_string(max_val) + ") must be greater or equal" - "to min_val (" + + std::to_string(max_val) + + ") must be greater or equal" + "to min_val (" + std::to_string(min_val) + ").")); ConstEigenVectorArrayMap eigen_tensor{var_tensor.data(), var_tensor.numel(), 1}; @@ -570,7 +574,8 @@ void AnalysisPredictor::MkldnnQuantizer::PrepareArgument() const { auto* builder = predictor_.config_.pass_builder(); builder->SetPasses({ - "cpu_quantize_pass", "cpu_quantize_squash_pass", + "cpu_quantize_pass", + "cpu_quantize_squash_pass", "int8_scale_calculation_mkldnn_pass", }); if (predictor_.config_.ir_debug_) builder->TurnOnDebug(); diff --git a/paddle/fluid/inference/api/mkldnn_quantizer.h b/paddle/fluid/inference/api/mkldnn_quantizer.h index 5e7aa39de52..811f2941a7d 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer.h +++ b/paddle/fluid/inference/api/mkldnn_quantizer.h @@ -20,6 +20,7 @@ #include #include #include + #include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/inference/analysis/analyzer.h" #include "paddle/fluid/inference/api/analysis_predictor.h" diff --git a/paddle/fluid/inference/api/mkldnn_quantizer_tester.cc b/paddle/fluid/inference/api/mkldnn_quantizer_tester.cc index 2bee4763d4f..05077f8ba34 100644 --- a/paddle/fluid/inference/api/mkldnn_quantizer_tester.cc +++ b/paddle/fluid/inference/api/mkldnn_quantizer_tester.cc @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/api/mkldnn_quantizer.h" #include + #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/inference/api/analysis_predictor.h" +#include "paddle/fluid/inference/api/mkldnn_quantizer.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" DEFINE_string(dirname, "", "dirname to tests."); diff --git a/paddle/fluid/inference/api/onnxruntime_predictor.h b/paddle/fluid/inference/api/onnxruntime_predictor.h index d01756e4b96..294a83a4335 100644 --- a/paddle/fluid/inference/api/onnxruntime_predictor.h +++ b/paddle/fluid/inference/api/onnxruntime_predictor.h @@ -18,6 +18,9 @@ #include #include #include + +#include "onnxruntime_c_api.h" // NOLINT +#include "onnxruntime_cxx_api.h" // NOLINT #include "paddle/fluid/framework/naive_executor.h" #include "paddle/fluid/framework/op_compatible_info.h" #include "paddle/fluid/inference/analysis/analyzer.h" @@ -27,9 +30,6 @@ #include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/platform/device/gpu/gpu_types.h" #include "paddle/fluid/string/printf.h" - -#include "onnxruntime_c_api.h" // NOLINT -#include "onnxruntime_cxx_api.h" // NOLINT #include "paddle2onnx/converter.h" #ifdef PADDLE_WITH_TESTING diff --git a/paddle/fluid/inference/api/onnxruntime_predictor_tester.cc b/paddle/fluid/inference/api/onnxruntime_predictor_tester.cc index 4a702edacc9..ff8528c0850 100644 --- a/paddle/fluid/inference/api/onnxruntime_predictor_tester.cc +++ b/paddle/fluid/inference/api/onnxruntime_predictor_tester.cc @@ -12,16 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/api/onnxruntime_predictor.h" - #include #include + #include #include // NOLINT #include + #include "paddle/fluid/framework/ir/pass.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/inference/api/helper.h" +#include "paddle/fluid/inference/api/onnxruntime_predictor.h" #include "paddle/fluid/inference/api/paddle_api.h" #include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index ab2265bff24..489c32bc59d 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -912,11 +912,18 @@ struct PD_INFER_DECL AnalysisConfig { bool thread_local_stream_{false}; bool use_gpu_fp16_{false}; std::unordered_set gpu_fp16_disabled_op_types_{ - "conv2d_fusion", "conv2d", "roll", "strided_slice", "depthwise_conv2d", - "unfold", "generate_proposals_v2", "nearest_interp_v2", + "conv2d_fusion", + "conv2d", + "roll", + "strided_slice", + "depthwise_conv2d", + "unfold", + "generate_proposals_v2", + "nearest_interp_v2", "bilinear_interp_v2" "yolo_box", - "multiclass_nms3", "matrix_nms"}; + "multiclass_nms3", + "matrix_nms"}; bool use_cudnn_{false}; diff --git a/paddle/fluid/inference/api/paddle_api.h b/paddle/fluid/inference/api/paddle_api.h index 711998e9956..78af756c24b 100644 --- a/paddle/fluid/inference/api/paddle_api.h +++ b/paddle/fluid/inference/api/paddle_api.h @@ -27,6 +27,7 @@ #include #include #include + #include "crypto/cipher.h" #include "paddle_infer_declare.h" // NOLINT #include "paddle_tensor.h" // NOLINT @@ -391,12 +392,14 @@ PD_INFER_DECL std::unique_ptr CreatePaddlePredictor( const ConfigT& config); template <> -PD_INFER_DECL std::unique_ptr CreatePaddlePredictor< - NativeConfig, PaddleEngineKind::kNative>(const NativeConfig& config); +PD_INFER_DECL std::unique_ptr +CreatePaddlePredictor( + const NativeConfig& config); template <> -PD_INFER_DECL std::unique_ptr CreatePaddlePredictor< - AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig& config); +PD_INFER_DECL std::unique_ptr +CreatePaddlePredictor( + const AnalysisConfig& config); template <> PD_INFER_DECL std::unique_ptr diff --git a/paddle/fluid/inference/api/paddle_infer_contrib.cc b/paddle/fluid/inference/api/paddle_infer_contrib.cc index d27f20a93b3..e785e91a671 100644 --- a/paddle/fluid/inference/api/paddle_infer_contrib.cc +++ b/paddle/fluid/inference/api/paddle_infer_contrib.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/inference/api/paddle_infer_contrib.h" + #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/device_context.h" diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 04e77faf2e3..9e5b76db4ac 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -20,6 +20,7 @@ #include #endif #include + #include #include diff --git a/paddle/fluid/inference/api/resource_manager.h b/paddle/fluid/inference/api/resource_manager.h index c41968dc585..24e76598e40 100644 --- a/paddle/fluid/inference/api/resource_manager.h +++ b/paddle/fluid/inference/api/resource_manager.h @@ -15,6 +15,7 @@ #include #include + #include "paddle/phi/api/include/tensor.h" #include "paddle/phi/backends/cpu/forwards.h" diff --git a/paddle/fluid/inference/capi/c_api.cc b/paddle/fluid/inference/capi/c_api.cc index 07493c742c4..f2a9838f4bc 100644 --- a/paddle/fluid/inference/capi/c_api.cc +++ b/paddle/fluid/inference/capi/c_api.cc @@ -14,6 +14,7 @@ #include #include + #include "paddle/fluid/inference/capi/c_api_internal.h" #include "paddle/fluid/inference/capi/paddle_c_api.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/inference/capi/c_api_internal.h b/paddle/fluid/inference/capi/c_api_internal.h index 7e69b721076..11728fb9878 100644 --- a/paddle/fluid/inference/capi/c_api_internal.h +++ b/paddle/fluid/inference/capi/c_api_internal.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/inference/api/paddle_analysis_config.h" #include "paddle/fluid/inference/api/paddle_api.h" #include "paddle/fluid/inference/capi/paddle_c_api.h" diff --git a/paddle/fluid/inference/capi/pd_config.cc b/paddle/fluid/inference/capi/pd_config.cc index 9bb52ba5780..2bacc94c0d1 100644 --- a/paddle/fluid/inference/capi/pd_config.cc +++ b/paddle/fluid/inference/capi/pd_config.cc @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/inference/capi/c_api_internal.h" #include "paddle/fluid/inference/capi/paddle_c_api.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/inference/capi/pd_predictor.cc b/paddle/fluid/inference/capi/pd_predictor.cc index 12d7f78e169..e88fbfc5a86 100644 --- a/paddle/fluid/inference/capi/pd_predictor.cc +++ b/paddle/fluid/inference/capi/pd_predictor.cc @@ -19,6 +19,7 @@ #include #include #include + #include "paddle/fluid/inference/api/paddle_api.h" #include "paddle/fluid/inference/capi/c_api_internal.h" #include "paddle/fluid/inference/capi/paddle_c_api.h" diff --git a/paddle/fluid/inference/capi/pd_tensor.cc b/paddle/fluid/inference/capi/pd_tensor.cc index 9b1eedd7c5a..199db92d1b0 100644 --- a/paddle/fluid/inference/capi/pd_tensor.cc +++ b/paddle/fluid/inference/capi/pd_tensor.cc @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/inference/capi/c_api_internal.h" #include "paddle/fluid/inference/capi/paddle_c_api.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/inference/capi_exp/lod_demo.cc b/paddle/fluid/inference/capi_exp/lod_demo.cc index 2b049e992e7..c67d6f870bd 100644 --- a/paddle/fluid/inference/capi_exp/lod_demo.cc +++ b/paddle/fluid/inference/capi_exp/lod_demo.cc @@ -27,8 +27,10 @@ #include #include #include + #include #include + #include "paddle/fluid/inference/capi_exp/pd_inference_api.h" int main(int argc, char *argv[]) { diff --git a/paddle/fluid/inference/capi_exp/pd_config.cc b/paddle/fluid/inference/capi_exp/pd_config.cc index d290f44d2ee..4e1c5a2a0dd 100644 --- a/paddle/fluid/inference/capi_exp/pd_config.cc +++ b/paddle/fluid/inference/capi_exp/pd_config.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/inference/capi_exp/pd_config.h" + #include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/capi_exp/pd_types.h" #include "paddle/fluid/inference/capi_exp/utils_internal.h" diff --git a/paddle/fluid/inference/capi_exp/pd_predictor.cc b/paddle/fluid/inference/capi_exp/pd_predictor.cc index 5ca58b0e413..c85dfdf522e 100644 --- a/paddle/fluid/inference/capi_exp/pd_predictor.cc +++ b/paddle/fluid/inference/capi_exp/pd_predictor.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/inference/capi_exp/pd_predictor.h" + #include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/capi_exp/pd_types.h" #include "paddle/fluid/inference/capi_exp/pd_utils.h" diff --git a/paddle/fluid/inference/capi_exp/pd_tensor.cc b/paddle/fluid/inference/capi_exp/pd_tensor.cc index 9c661dea6f2..520cfa813f4 100644 --- a/paddle/fluid/inference/capi_exp/pd_tensor.cc +++ b/paddle/fluid/inference/capi_exp/pd_tensor.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/inference/capi_exp/pd_tensor.h" + #include "paddle/fluid/inference/api/paddle_inference_api.h" #include "paddle/fluid/inference/capi_exp/pd_types.h" #include "paddle/fluid/inference/capi_exp/pd_utils.h" diff --git a/paddle/fluid/inference/capi_exp/pd_utils.cc b/paddle/fluid/inference/capi_exp/pd_utils.cc index efca350fbaf..7942a860c4e 100644 --- a/paddle/fluid/inference/capi_exp/pd_utils.cc +++ b/paddle/fluid/inference/capi_exp/pd_utils.cc @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/inference/capi_exp/pd_utils.h" + #include #include "paddle/fluid/inference/api/paddle_inference_api.h" -#include "paddle/fluid/inference/capi_exp/pd_utils.h" #include "paddle/fluid/inference/capi_exp/utils_internal.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Config.cpp b/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Config.cpp index 593ba3cb51d..efea093fa24 100644 --- a/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Config.cpp +++ b/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Config.cpp @@ -13,9 +13,10 @@ // limitations under the License. #include "com_baidu_paddle_inference_Config.h" + #include -#include "jni_convert_util.h" // NOLINT +#include "jni_convert_util.h" // NOLINT #include "pd_inference_api.h" // NOLINT JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Config_cppConfigDestroy( diff --git a/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Predictor.cpp b/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Predictor.cpp index 7eff03690ae..0912c2ad57a 100644 --- a/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Predictor.cpp +++ b/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Predictor.cpp @@ -13,7 +13,9 @@ // limitations under the License. #include "com_baidu_paddle_inference_Predictor.h" + #include + #include "jni_convert_util.h" // NOLINT #include "pd_inference_api.h" // NOLINT diff --git a/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Tensor.cpp b/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Tensor.cpp index b9be4a73ac2..a90ae165ebd 100644 --- a/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Tensor.cpp +++ b/paddle/fluid/inference/experimental/javaapi/native/com_baidu_paddle_inference_Tensor.cpp @@ -13,7 +13,9 @@ // limitations under the License. #include "com_baidu_paddle_inference_Tensor.h" + #include + #include "pd_inference_api.h" // NOLINT JNIEXPORT void JNICALL Java_com_baidu_paddle_inference_Tensor_cppTensorDestroy( diff --git a/paddle/fluid/inference/experimental/javaapi/native/jni_convert_util.h b/paddle/fluid/inference/experimental/javaapi/native/jni_convert_util.h index 0026ec2f410..c363559298f 100644 --- a/paddle/fluid/inference/experimental/javaapi/native/jni_convert_util.h +++ b/paddle/fluid/inference/experimental/javaapi/native/jni_convert_util.h @@ -17,6 +17,7 @@ #include #include + #include #include @@ -54,8 +55,8 @@ inline jstring cpp_string_to_jstring(JNIEnv *env, std::string str) { reinterpret_cast(data)); jstring encoding = env->NewStringUTF("UTF-8"); - jstring res = (jstring)( - env->NewObject(strClass, strClassInitMethodID, bytes, encoding)); + jstring res = (jstring)(env->NewObject(strClass, strClassInitMethodID, bytes, + encoding)); env->DeleteLocalRef(strClass); env->DeleteLocalRef(encoding); diff --git a/paddle/fluid/inference/io.h b/paddle/fluid/inference/io.h index 317ef9d93ac..1106ad261ec 100644 --- a/paddle/fluid/inference/io.h +++ b/paddle/fluid/inference/io.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" diff --git a/paddle/fluid/inference/lite/engine.cc b/paddle/fluid/inference/lite/engine.cc index cd78cfecd86..8f8f68b170b 100644 --- a/paddle/fluid/inference/lite/engine.cc +++ b/paddle/fluid/inference/lite/engine.cc @@ -25,6 +25,7 @@ #endif #include "paddle/fluid/inference/lite/engine.h" + #include namespace paddle { diff --git a/paddle/fluid/inference/lite/op_teller.cc b/paddle/fluid/inference/lite/op_teller.cc index 3a162c3fde1..3d2ed0a5c98 100644 --- a/paddle/fluid/inference/lite/op_teller.cc +++ b/paddle/fluid/inference/lite/op_teller.cc @@ -12,12 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/inference/lite/op_teller.h" + #include #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/inference/lite/engine.h" -#include "paddle/fluid/inference/lite/op_teller.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/lite/op_teller.h b/paddle/fluid/inference/lite/op_teller.h index b9391a98a2e..1a969f1293d 100644 --- a/paddle/fluid/inference/lite/op_teller.h +++ b/paddle/fluid/inference/lite/op_teller.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/framework/op_desc.h" namespace paddle { diff --git a/paddle/fluid/inference/lite/tensor_utils.cc b/paddle/fluid/inference/lite/tensor_utils.cc index eeaa1282903..f70455f18eb 100644 --- a/paddle/fluid/inference/lite/tensor_utils.cc +++ b/paddle/fluid/inference/lite/tensor_utils.cc @@ -13,9 +13,11 @@ // limitations under the License. #include "paddle/fluid/inference/lite/tensor_utils.h" + #include #include #include + #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/inference/lite/engine.h" @@ -26,9 +28,9 @@ namespace inference { namespace lite { namespace utils { -using paddle::lite_api::TargetType; -using paddle::lite_api::PrecisionType; using paddle::lite_api::DataLayoutType; +using paddle::lite_api::PrecisionType; +using paddle::lite_api::TargetType; template void SetLoD(DstLoD* dst, const SrcLoD& src) { diff --git a/paddle/fluid/inference/lite/test_engine_lite.cc b/paddle/fluid/inference/lite/test_engine_lite.cc index 85f7d3ee363..dee83f70ba2 100644 --- a/paddle/fluid/inference/lite/test_engine_lite.cc +++ b/paddle/fluid/inference/lite/test_engine_lite.cc @@ -14,14 +14,12 @@ #include -#include "paddle/fluid/inference/utils/singleton.h" - #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" - #include "paddle/fluid/inference/lite/engine.h" +#include "paddle/fluid/inference/utils/singleton.h" #include "paddle/fluid/operators/lite/ut_helper.h" namespace paddle { @@ -29,9 +27,9 @@ namespace inference { namespace lite { using inference::lite::AddTensorToBlockDesc; -using paddle::inference::lite::AddFetchListToBlockDesc; using inference::lite::CreateTensor; using inference::lite::serialize_params; +using paddle::inference::lite::AddFetchListToBlockDesc; void make_fake_model(std::string* model, std::string* param) { framework::ProgramDesc program; diff --git a/paddle/fluid/inference/lite/test_tensor_utils.cc b/paddle/fluid/inference/lite/test_tensor_utils.cc index b0c7c7448a5..09a6cda62b3 100644 --- a/paddle/fluid/inference/lite/test_tensor_utils.cc +++ b/paddle/fluid/inference/lite/test_tensor_utils.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/inference/lite/tensor_utils.h" @@ -21,9 +22,9 @@ namespace inference { namespace lite { namespace utils { -using paddle::lite_api::TargetType; -using paddle::lite_api::PrecisionType; using paddle::lite_api::DataLayoutType; +using paddle::lite_api::PrecisionType; +using paddle::lite_api::TargetType; TEST(LiteEngineOp, GetNativePlace) { ::testing::FLAGS_gtest_death_test_style = "threadsafe"; diff --git a/paddle/fluid/inference/tensorrt/convert/activation_op.cc b/paddle/fluid/inference/tensorrt/convert/activation_op.cc index b86351e394b..2ef8ec16c76 100644 --- a/paddle/fluid/inference/tensorrt/convert/activation_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/activation_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include #include "glog/logging.h" diff --git a/paddle/fluid/inference/tensorrt/convert/deformable_conv_op.cc b/paddle/fluid/inference/tensorrt/convert/deformable_conv_op.cc index 2bbe6ea3d2f..df6c601500c 100644 --- a/paddle/fluid/inference/tensorrt/convert/deformable_conv_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/deformable_conv_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/plugin/deformable_conv_op_plugin.h" diff --git a/paddle/fluid/inference/tensorrt/convert/flatten_contiguous_range_op.cc b/paddle/fluid/inference/tensorrt/convert/flatten_contiguous_range_op.cc index e08f50833ed..c293282b761 100644 --- a/paddle/fluid/inference/tensorrt/convert/flatten_contiguous_range_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/flatten_contiguous_range_op.cc @@ -50,10 +50,11 @@ class FlattenContiguousRangeOpConverter : public OpConverter { for (int i = 0, j = 0; i < dims; ++i) { if (start_axis <= i + 1 && i + 1 <= stop_axis) { int dim_i = input_dim.d[i]; - PADDLE_ENFORCE_GT(dim_i, 0, platform::errors::InvalidArgument( - "flatten_contiguous_range input dim " - "should be > 0, but got %d.", - dim_i)); + PADDLE_ENFORCE_GT(dim_i, 0, + platform::errors::InvalidArgument( + "flatten_contiguous_range input dim " + "should be > 0, but got %d.", + dim_i)); dim_prod *= dim_i; if (i + 1 == stop_axis) { flatten_dim.d[j++] = dim_prod; diff --git a/paddle/fluid/inference/tensorrt/convert/group_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/group_norm_op.cc index 910a807d362..2a62f9009e2 100644 --- a/paddle/fluid/inference/tensorrt/convert/group_norm_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/group_norm_op.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" namespace paddle { diff --git a/paddle/fluid/inference/tensorrt/convert/io_converter.cc b/paddle/fluid/inference/tensorrt/convert/io_converter.cc index b468518fa5a..02e9610ea1e 100644 --- a/paddle/fluid/inference/tensorrt/convert/io_converter.cc +++ b/paddle/fluid/inference/tensorrt/convert/io_converter.cc @@ -13,15 +13,17 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/inference/tensorrt/convert/io_converter.h" + #include + #include "paddle/fluid/platform/enforce.h" namespace paddle { namespace inference { namespace tensorrt { -using platform::is_gpu_place; using platform::is_cpu_place; +using platform::is_gpu_place; class DefaultIOConverter : public EngineIOConverter { public: @@ -49,8 +51,9 @@ class DefaultIOConverter : public EngineIOConverter { out, in.data(), size, cudaMemcpyHostToDevice, *stream_)); } else if (is_gpu_place(place)) { PADDLE_ENFORCE_EQ( - 0, cudaMemcpyAsync(out, in.data(), size, - cudaMemcpyDeviceToDevice, *stream_), + 0, + cudaMemcpyAsync(out, in.data(), size, cudaMemcpyDeviceToDevice, + *stream_), platform::errors::External( "cudaMemcpyAsync(cudaMemcpyDeviceToDevice) error.")); } else { @@ -78,14 +81,16 @@ class DefaultIOConverter : public EngineIOConverter { "But out's memory_size = %u, max_size = %u.", size, max_size)); if (is_cpu_place(place)) { - PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(out->data(), in, size, - cudaMemcpyDeviceToHost, *stream_), + PADDLE_ENFORCE_EQ(0, + cudaMemcpyAsync(out->data(), in, size, + cudaMemcpyDeviceToHost, *stream_), platform::errors::External( "cudaMemcpyAsync(cudaMemcpyDeviceToHost) error.")); } else if (is_gpu_place(place)) { PADDLE_ENFORCE_EQ( - 0, cudaMemcpyAsync(out->data(), in, size, - cudaMemcpyDeviceToDevice, *stream_), + 0, + cudaMemcpyAsync(out->data(), in, size, + cudaMemcpyDeviceToDevice, *stream_), platform::errors::External( "cudaMemcpyAsync(cudaMemcpyDeviceToDevice) error.")); } else { diff --git a/paddle/fluid/inference/tensorrt/convert/io_converter.h b/paddle/fluid/inference/tensorrt/convert/io_converter.h index 58c178028b8..3ff78a6dc7a 100644 --- a/paddle/fluid/inference/tensorrt/convert/io_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/io_converter.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/inference/utils/singleton.h" diff --git a/paddle/fluid/inference/tensorrt/convert/multiclass_nms3_op.cc b/paddle/fluid/inference/tensorrt/convert/multiclass_nms3_op.cc index a968ea2a2c4..ae392675339 100644 --- a/paddle/fluid/inference/tensorrt/convert/multiclass_nms3_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/multiclass_nms3_op.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" namespace paddle { diff --git a/paddle/fluid/inference/tensorrt/convert/multiclass_nms_op.cc b/paddle/fluid/inference/tensorrt/convert/multiclass_nms_op.cc index b0d67a5bf90..d630f7e9967 100644 --- a/paddle/fluid/inference/tensorrt/convert/multiclass_nms_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/multiclass_nms_op.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" namespace paddle { diff --git a/paddle/fluid/inference/tensorrt/convert/op_converter.h b/paddle/fluid/inference/tensorrt/convert/op_converter.h index 0a99b12edc2..077ba32ba89 100644 --- a/paddle/fluid/inference/tensorrt/convert/op_converter.h +++ b/paddle/fluid/inference/tensorrt/convert/op_converter.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/scope.h" @@ -268,14 +269,16 @@ class OpConverter { } } engine->DeclareInput( - input, FluidDataType2TRT( - var->Proto()->type().lod_tensor().tensor().data_type()), + input, + FluidDataType2TRT( + var->Proto()->type().lod_tensor().tensor().data_type()), Vec2TRT_Dims(input_shape, input, true)); #endif } else { engine->DeclareInput( - input, FluidDataType2TRT( - var->Proto()->type().lod_tensor().tensor().data_type()), + input, + FluidDataType2TRT( + var->Proto()->type().lod_tensor().tensor().data_type()), Vec2TRT_Dims(var_shape, input)); } } diff --git a/paddle/fluid/inference/tensorrt/convert/softmax_op.cc b/paddle/fluid/inference/tensorrt/convert/softmax_op.cc index 46e6c18bfb8..66acee964cd 100644 --- a/paddle/fluid/inference/tensorrt/convert/softmax_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/softmax_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" namespace paddle { diff --git a/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc b/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc index 1ad82df4173..7a034f2c166 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_activation_op.cc @@ -13,6 +13,7 @@ limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" diff --git a/paddle/fluid/inference/tensorrt/convert/test_batch_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/test_batch_norm_op.cc index 92e34e48bdb..caa9e9ee289 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_batch_norm_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_batch_norm_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" diff --git a/paddle/fluid/inference/tensorrt/convert/test_concat_op.cc b/paddle/fluid/inference/tensorrt/convert/test_concat_op.cc index 6c876964297..b1319312adf 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_concat_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_concat_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" diff --git a/paddle/fluid/inference/tensorrt/convert/test_conv2d_op.cc b/paddle/fluid/inference/tensorrt/convert/test_conv2d_op.cc index a856d141444..0b9f4a5fd84 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_conv2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_conv2d_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" diff --git a/paddle/fluid/inference/tensorrt/convert/test_dropout_op.cc b/paddle/fluid/inference/tensorrt/convert/test_dropout_op.cc index cf377396087..2d77b9b32db 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_dropout_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_dropout_op.cc @@ -12,6 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" diff --git a/paddle/fluid/inference/tensorrt/convert/test_elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/test_elementwise_op.cc index 9c6ea51fe5a..5221843db19 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_elementwise_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_elementwise_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" diff --git a/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc b/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc index 8134d389469..4647521dd32 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_fc_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" diff --git a/paddle/fluid/inference/tensorrt/convert/test_io_converter.cc b/paddle/fluid/inference/tensorrt/convert/test_io_converter.cc index 8f91309a0a0..a2fe32b75f3 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_io_converter.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_io_converter.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/inference/tensorrt/convert/io_converter.h" diff --git a/paddle/fluid/inference/tensorrt/convert/test_leaky_relu_op.cc b/paddle/fluid/inference/tensorrt/convert/test_leaky_relu_op.cc index f17e00de0ee..f7984dd0ab7 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_leaky_relu_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_leaky_relu_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" diff --git a/paddle/fluid/inference/tensorrt/convert/test_mish_op.cc b/paddle/fluid/inference/tensorrt/convert/test_mish_op.cc index c84c30255fa..d2dbb7fb592 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_mish_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_mish_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" diff --git a/paddle/fluid/inference/tensorrt/convert/test_mul_op.cc b/paddle/fluid/inference/tensorrt/convert/test_mul_op.cc index 86cb7543d42..35b8fe1ee6a 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_mul_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_mul_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" diff --git a/paddle/fluid/inference/tensorrt/convert/test_nearest_interp_v2_op.cc b/paddle/fluid/inference/tensorrt/convert/test_nearest_interp_v2_op.cc index f5ab6a99249..96b14c4e40c 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_nearest_interp_v2_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_nearest_interp_v2_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" diff --git a/paddle/fluid/inference/tensorrt/convert/test_op_converter.cc b/paddle/fluid/inference/tensorrt/convert/test_op_converter.cc index 9bfae64fe80..9a4d4db3435 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_op_converter.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_op_converter.cc @@ -12,11 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" - #include // NOLINT #include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/tensorrt/convert/test_pad_op.cc b/paddle/fluid/inference/tensorrt/convert/test_pad_op.cc index ba35d7ddbb2..a8e36f827d8 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_pad_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_pad_op.cc @@ -13,6 +13,7 @@ limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" diff --git a/paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc b/paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc index 36f13262a73..b917aa865d2 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_pool2d_op.cc @@ -12,7 +12,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" diff --git a/paddle/fluid/inference/tensorrt/convert/test_prelu_op.cc b/paddle/fluid/inference/tensorrt/convert/test_prelu_op.cc index f2541ff7c0b..d71cf051972 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_prelu_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_prelu_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" diff --git a/paddle/fluid/inference/tensorrt/convert/test_shuffle_channel_op.cc b/paddle/fluid/inference/tensorrt/convert/test_shuffle_channel_op.cc index 3ebb51afdf4..b5e640ea244 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_shuffle_channel_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_shuffle_channel_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" diff --git a/paddle/fluid/inference/tensorrt/convert/test_softmax_op.cc b/paddle/fluid/inference/tensorrt/convert/test_softmax_op.cc index 9cd5e811415..babe682ab4e 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_softmax_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_softmax_op.cc @@ -12,6 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" diff --git a/paddle/fluid/inference/tensorrt/convert/test_split_op.cc b/paddle/fluid/inference/tensorrt/convert/test_split_op.cc index 3b6a4a80044..1d23aeedc5a 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_split_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_split_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" diff --git a/paddle/fluid/inference/tensorrt/convert/test_swish_op.cc b/paddle/fluid/inference/tensorrt/convert/test_swish_op.cc index 7a5a886affe..94ca6f0ed46 100644 --- a/paddle/fluid/inference/tensorrt/convert/test_swish_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/test_swish_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/convert/ut_helper.h" diff --git a/paddle/fluid/inference/tensorrt/convert/unary_op.cc b/paddle/fluid/inference/tensorrt/convert/unary_op.cc index aa3d38ebe20..72d5cb2aeb4 100644 --- a/paddle/fluid/inference/tensorrt/convert/unary_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/unary_op.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include + #include "glog/logging.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" diff --git a/paddle/fluid/inference/tensorrt/convert/yolo_box_op.cc b/paddle/fluid/inference/tensorrt/convert/yolo_box_op.cc index 17d217dff43..f5ab63daa88 100644 --- a/paddle/fluid/inference/tensorrt/convert/yolo_box_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/yolo_box_op.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/inference/tensorrt/convert/op_converter.h" #include "paddle/fluid/inference/tensorrt/plugin/yolo_box_op_plugin.h" diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc index 00a6b2ffbf9..7f308fd3a04 100644 --- a/paddle/fluid/inference/tensorrt/engine.cc +++ b/paddle/fluid/inference/tensorrt/engine.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include #include "cuda_runtime_api.h" // NOLINT diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h index 598d751ad5f..b28fe827156 100644 --- a/paddle/fluid/inference/tensorrt/engine.h +++ b/paddle/fluid/inference/tensorrt/engine.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include #include #include // NOLINT @@ -151,7 +152,7 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector& shape, std::string input, return dims; } } -} // NOLINT +} // namespace class TRTInt8Calibrator; diff --git a/paddle/fluid/inference/tensorrt/helper.h b/paddle/fluid/inference/tensorrt/helper.h index b8051d86104..e283000cdac 100644 --- a/paddle/fluid/inference/tensorrt/helper.h +++ b/paddle/fluid/inference/tensorrt/helper.h @@ -17,9 +17,11 @@ #include #include #include + #include #include #include + #include "paddle/fluid/platform/dynload/tensorrt.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 79a5e7d7a6a..dc7c77bc66a 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/inference/tensorrt/op_teller.h" + #include + #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/data_layout.h" diff --git a/paddle/fluid/inference/tensorrt/op_teller.h b/paddle/fluid/inference/tensorrt/op_teller.h index 0a0cbeae51b..40f1a0055c7 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.h +++ b/paddle/fluid/inference/tensorrt/op_teller.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/framework/ir/node.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/inference/tensorrt/engine.h" diff --git a/paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.cu index e5584f26580..a339f880ac3 100644 --- a/paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/anchor_generator_op_plugin.cu @@ -14,6 +14,7 @@ #include #include + #include #include diff --git a/paddle/fluid/inference/tensorrt/plugin/deformable_conv_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/deformable_conv_op_plugin.cu index 6128f8f0e41..7ea664ded66 100644 --- a/paddle/fluid/inference/tensorrt/plugin/deformable_conv_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/deformable_conv_op_plugin.cu @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include #include @@ -88,9 +89,10 @@ DeformableConvPlugin::DeformableConvPlugin( dilations_.insert(dilations_.end(), dilations.cbegin(), dilations.cend()); PADDLE_ENFORCE_EQ(data_type_ == nvinfer1::DataType::kFLOAT || data_type_ == nvinfer1::DataType::kHALF, - true, platform::errors::InvalidArgument( - "The DeformableConv TRT Plugin's input type " - "should be float or half.")); + true, + platform::errors::InvalidArgument( + "The DeformableConv TRT Plugin's input type " + "should be float or half.")); PADDLE_ENFORCE_EQ( paddings_.size(), strides_.size(), platform::errors::InvalidArgument( @@ -124,9 +126,10 @@ DeformableConvPlugin::DeformableConvPlugin( output_dim_.insert(output_dim_.end(), output_dim.cbegin(), output_dim.cend()); PADDLE_ENFORCE_EQ(data_type_ == nvinfer1::DataType::kFLOAT || data_type_ == nvinfer1::DataType::kHALF, - true, platform::errors::InvalidArgument( - "The DeformableConv TRT Plugin's input type " - "should be float or half.")); + true, + platform::errors::InvalidArgument( + "The DeformableConv TRT Plugin's input type " + "should be float or half.")); PADDLE_ENFORCE_EQ( paddings_.size(), strides_.size(), platform::errors::InvalidArgument( @@ -363,13 +366,11 @@ __global__ void ModulatedDeformableIm2colGpuKernel( const float* data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width; const float* data_offset_ptr = - data_offset + - (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * - kernel_w * height_col * width_col; + data_offset + (b_col * deformable_group + deformable_group_index) * 2 * + kernel_h * kernel_w * height_col * width_col; const float* data_mask_ptr = - data_mask + - (b_col * deformable_group + deformable_group_index) * kernel_h * - kernel_w * height_col * width_col; + data_mask + (b_col * deformable_group + deformable_group_index) * + kernel_h * kernel_w * height_col * width_col; for (int i = 0; i < kernel_h; ++i) { for (int j = 0; j < kernel_w; ++j) { @@ -432,13 +433,11 @@ __global__ void ModulatedDeformableIm2colGpuKernel( const half* data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width; const half* data_offset_ptr = - data_offset + - (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * - kernel_w * height_col * width_col; + data_offset + (b_col * deformable_group + deformable_group_index) * 2 * + kernel_h * kernel_w * height_col * width_col; const half* data_mask_ptr = - data_mask + - (b_col * deformable_group + deformable_group_index) * kernel_h * - kernel_w * height_col * width_col; + data_mask + (b_col * deformable_group + deformable_group_index) * + kernel_h * kernel_w * height_col * width_col; for (int i = 0; i < kernel_h; ++i) { for (int j = 0; j < kernel_w; ++j) { diff --git a/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu index 1070a88cee7..5f4abee2838 100644 --- a/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.h" namespace paddle { @@ -67,14 +68,16 @@ __global__ void elementwise_kernel(const size_t total, const T *x_data, nvinfer1::Dims ElementWisePlugin::getOutputDimensions( int index, const nvinfer1::Dims *input_dims, int num_inputs) TRT_NOEXCEPT { - PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( - "There is only one output in TRT elementwise " - "op plugin, but got output index: %d.", - index)); - PADDLE_ENFORCE_EQ(num_inputs, 2, platform::errors::InvalidArgument( - "There are 2 inputs in TRT elementwise " - "op plugin, but got input number: %d.", - num_inputs)); + PADDLE_ENFORCE_EQ(index, 0, + platform::errors::InvalidArgument( + "There is only one output in TRT elementwise " + "op plugin, but got output index: %d.", + index)); + PADDLE_ENFORCE_EQ( + num_inputs, 2, + platform::errors::InvalidArgument("There are 2 inputs in TRT elementwise " + "op plugin, but got input number: %d.", + num_inputs)); PADDLE_ENFORCE_NOT_NULL( input_dims, platform::errors::InvalidArgument( diff --git a/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.h index aa1ab5389a5..51fc1bebd90 100644 --- a/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/elementwise_op_plugin.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" namespace paddle { diff --git a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu index 82f4420a2a0..6c7530cdc1f 100644 --- a/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/emb_eltwise_layernorm_plugin.cu @@ -13,9 +13,11 @@ // limitations under the License. #include + #include #include // NOLINT #include + #include "glog/logging.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor_util.h" @@ -253,10 +255,11 @@ nvinfer1::DataType EmbEltwiseLayernormPluginDynamic::getOutputDataType( int index, const nvinfer1::DataType *input_types, int nb_inputs) const TRT_NOEXCEPT { PADDLE_ENFORCE_EQ( - index, 0, platform::errors::InvalidArgument( - "The EmbEltwiseLayernorm Plugin only has one input, so the " - "index value should be 0, but get %d.", - index)); + index, 0, + platform::errors::InvalidArgument( + "The EmbEltwiseLayernorm Plugin only has one input, so the " + "index value should be 0, but get %d.", + index)); if (with_fp16_) return nvinfer1::DataType::kHALF; else diff --git a/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.h index 841fb2f6fe3..f27b66b03f5 100644 --- a/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.h @@ -15,9 +15,11 @@ #pragma once #include + #include #include #include + #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" diff --git a/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.cu index 08b259e0f95..cba1bb04c36 100644 --- a/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.cu @@ -15,6 +15,7 @@ #include #include #include + #include "paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.h" #include "paddle/fluid/platform/float16.h" @@ -112,15 +113,15 @@ int GeluPlugin::enqueue(int batch_size, const void* const* inputs, VLOG(1) << "TRT Plugin DataType selected. Gelu-->fp32"; const float* input = static_cast(inputs[0]); float* output = static_cast(outputs[0]); - gelu_kernel<<>>( - kA, num, input, output); + gelu_kernel + <<>>(kA, num, input, output); } else if (type == nvinfer1::DataType::kHALF) { VLOG(1) << "TRT Plugin DataType selected. Gelu-->fp16"; const half* input = static_cast(inputs[0]); half* output = static_cast(outputs[0]); - no_exact_gelu_kernel<<>>( - kAT, kBT, kCT, num, input, output); + no_exact_gelu_kernel + <<>>(kAT, kBT, kCT, num, input, + output); } else { PADDLE_THROW(platform::errors::InvalidArgument( "The Gelu TRT Plugin's input type should be float or half.")); @@ -170,10 +171,11 @@ bool GeluPluginDynamic::supportsFormatCombination( nvinfer1::DataType GeluPluginDynamic::getOutputDataType( int index, const nvinfer1::DataType* input_types, int nb_inputs) const TRT_NOEXCEPT { - PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( - "The Gelu Plugin only has one input, so the " - "index value should be 0, but get %d.", - index)); + PADDLE_ENFORCE_EQ(index, 0, + platform::errors::InvalidArgument( + "The Gelu Plugin only has one input, so the " + "index value should be 0, but get %d.", + index)); return input_types[0]; } @@ -192,15 +194,15 @@ int GeluPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc* input_desc, VLOG(1) << "TRT Plugin DataType selected. Gelu-->fp32"; const float* input = static_cast(inputs[0]); float* output = static_cast(outputs[0]); - gelu_kernel<<>>( - kA, num, input, output); + gelu_kernel + <<>>(kA, num, input, output); } else if (input_type == nvinfer1::DataType::kHALF) { VLOG(1) << "TRT Plugin DataType selected. Gelu-->fp16"; const half* input = static_cast(inputs[0]); half* output = static_cast(outputs[0]); - no_exact_gelu_kernel<<>>( - kAT, kBT, kCT, num, input, output); + no_exact_gelu_kernel + <<>>(kAT, kBT, kCT, num, input, + output); } else { PADDLE_THROW(platform::errors::InvalidArgument( "The Gelu TRT Plugin's input type should be float or half.")); diff --git a/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.h index 7efdd2798b2..8436ccad78a 100644 --- a/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/gelu_op_plugin.h @@ -14,9 +14,11 @@ #pragma once #include + #include #include #include + #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" namespace paddle { diff --git a/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.cu index 9872b1ff8d9..05ed76bd3c9 100644 --- a/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.cu @@ -14,6 +14,7 @@ #include #include + #include "paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.h" namespace paddle { diff --git a/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.h index 475c908c13b..b1e693799bd 100644 --- a/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/hard_swish_op_plugin.h @@ -14,9 +14,11 @@ #pragma once #include + #include #include #include + #include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" diff --git a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu index 03686aefc13..9acd688f707 100644 --- a/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.cu @@ -13,8 +13,10 @@ // limitations under the License. #include + #include #include + #include "glog/logging.h" #include "paddle/fluid/inference/tensorrt/plugin/instance_norm_op_plugin.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" diff --git a/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu index 67d44184a76..16e2a284d4b 100644 --- a/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.cu @@ -13,8 +13,10 @@ // limitations under the License. #include + #include #include + #include "glog/logging.h" #include "paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.h" #include "paddle/phi/kernels/layer_norm_kernel.h" diff --git a/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.h index 9e8ce302833..42dfa2b8aa0 100644 --- a/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/layer_norm_op_plugin.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/inference/tensorrt/engine.h" diff --git a/paddle/fluid/inference/tensorrt/plugin/matmul_op_int8_plugin.h b/paddle/fluid/inference/tensorrt/plugin/matmul_op_int8_plugin.h index be8f1c418fc..9ca6ff29240 100644 --- a/paddle/fluid/inference/tensorrt/plugin/matmul_op_int8_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/matmul_op_int8_plugin.h @@ -14,9 +14,9 @@ limitations under the License. */ #pragma once #include - #include #include + #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" #include "paddle/fluid/platform/dynload/cublasLt.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/inference/tensorrt/plugin/mish_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/mish_op_plugin.cu index 6e268e7b0b3..f655d23e628 100644 --- a/paddle/fluid/inference/tensorrt/plugin/mish_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/mish_op_plugin.cu @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "glog/logging.h" #include "paddle/fluid/inference/tensorrt/plugin/mish_op_plugin.h" @@ -38,11 +39,12 @@ bool MishPlugin::supportsFormat( nvinfer1::Dims MishPlugin::getOutputDimensions(int index, const nvinfer1::Dims* in_dims, int nb_inputs) TRT_NOEXCEPT { - PADDLE_ENFORCE_EQ(nb_inputs, 1, platform::errors::InvalidArgument( - "We expect [number of inputs] == 1" - "in TRT Mish op plugin, but got " - "[number of inputs] = %d.", - nb_inputs)); + PADDLE_ENFORCE_EQ( + nb_inputs, 1, + platform::errors::InvalidArgument("We expect [number of inputs] == 1" + "in TRT Mish op plugin, but got " + "[number of inputs] = %d.", + nb_inputs)); PADDLE_ENFORCE_LT(index, this->getNbOutputs(), platform::errors::InvalidArgument( "We expect [index] < [number of outputs]" @@ -123,14 +125,14 @@ int MishPlugin::enqueue(int batchSize, const void* const* inputs, VLOG(1) << "TRT Plugin DataType selected. Mish-->fp32"; const float* input = static_cast(inputs[0]); float* output = static_cast(outputs[0]); - mish_kernel<<>>(threshold_, num, - input, output); + mish_kernel + <<>>(threshold_, num, input, output); } else if (type == nvinfer1::DataType::kHALF) { VLOG(1) << "TRT Plugin DataType selected. Mish-->fp16"; const half* input = static_cast(inputs[0]); half* output = static_cast(outputs[0]); - mish_kernel<<>>(threshold_, num, - input, output); + mish_kernel + <<>>(threshold_, num, input, output); } else { PADDLE_THROW(platform::errors::InvalidArgument( "The Mish TRT Plugin's input type should be float or half.")); @@ -192,10 +194,11 @@ bool MishPluginDynamic::supportsFormatCombination( nvinfer1::DataType MishPluginDynamic::getOutputDataType( int index, const nvinfer1::DataType* input_types, int nb_inputs) const TRT_NOEXCEPT { - PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( - "The Mish Plugin only has one input, so the " - "index value should be 0, but get %d.", - index)); + PADDLE_ENFORCE_EQ(index, 0, + platform::errors::InvalidArgument( + "The Mish Plugin only has one input, so the " + "index value should be 0, but get %d.", + index)); return input_types[0]; } @@ -214,14 +217,14 @@ int MishPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc* input_desc, VLOG(1) << "TRT Plugin DataType selected. Mish-->fp32"; const float* input = static_cast(inputs[0]); float* output = static_cast(outputs[0]); - mish_kernel<<>>(threshold_, num, - input, output); + mish_kernel + <<>>(threshold_, num, input, output); } else if (input_type == nvinfer1::DataType::kHALF) { VLOG(1) << "TRT Plugin DataType selected. Mish-->fp16"; const half* input = static_cast(inputs[0]); half* output = static_cast(outputs[0]); - mish_kernel<<>>(threshold_, num, - input, output); + mish_kernel + <<>>(threshold_, num, input, output); } else { PADDLE_THROW(platform::errors::InvalidArgument( "The Mish TRT Plugin's input type should be float or half.")); diff --git a/paddle/fluid/inference/tensorrt/plugin/mish_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/mish_op_plugin.h index 75390666ea0..fdef7b93f32 100644 --- a/paddle/fluid/inference/tensorrt/plugin/mish_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/mish_op_plugin.h @@ -14,8 +14,10 @@ #pragma once #include + #include #include + #include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/inference/tensorrt/plugin/pool3d_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/pool3d_op_plugin.cu index 5596a89a083..40cb2b88e71 100644 --- a/paddle/fluid/inference/tensorrt/plugin/pool3d_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/pool3d_op_plugin.cu @@ -70,10 +70,11 @@ nvinfer1::Dims Pool3DPlugin::getOutputDimensions( "The Pool3D Plugin only has one input, so the nbInputs " "value should be 1, but get %d.", nbInputs)); - PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( - "The Pool3D Plugin only has one input, so " - "the index value should be 0, but get %d.", - index)); + PADDLE_ENFORCE_EQ(index, 0, + platform::errors::InvalidArgument( + "The Pool3D Plugin only has one input, so " + "the index value should be 0, but get %d.", + index)); PADDLE_ENFORCE_EQ(inputDims[0].nbDims, 4, platform::errors::InvalidArgument( "The Pool3D Plugin only has four Dimensions, so the " diff --git a/paddle/fluid/inference/tensorrt/plugin/pool3d_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/pool3d_op_plugin.h index 7c9a8625d70..d54ce067e5e 100644 --- a/paddle/fluid/inference/tensorrt/plugin/pool3d_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/pool3d_op_plugin.h @@ -14,9 +14,11 @@ #pragma once #include + #include #include #include + #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" namespace paddle { diff --git a/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.cu index 9bfe98d759d..80f7e349dac 100644 --- a/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.cu @@ -240,10 +240,11 @@ bool PoolPluginDynamic::supportsFormatCombination( nvinfer1::DataType PoolPluginDynamic::getOutputDataType( int index, const nvinfer1::DataType *input_types, int nb_inputs) const TRT_NOEXCEPT { - PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( - "The Pool Plugin only has one input, so the " - "index value should be 0, but get %d.", - index)); + PADDLE_ENFORCE_EQ(index, 0, + platform::errors::InvalidArgument( + "The Pool Plugin only has one input, so the " + "index value should be 0, but get %d.", + index)); PADDLE_ENFORCE_EQ((input_types[0] == nvinfer1::DataType::kFLOAT), true, platform::errors::InvalidArgument( "The input type should be half or float")); diff --git a/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h index d1bf2cd02e8..155d69cc457 100644 --- a/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.h @@ -14,9 +14,11 @@ #pragma once #include + #include #include #include + #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" namespace paddle { diff --git a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu index 1ea2b8b5f6e..72c1d546e9a 100644 --- a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.cu @@ -144,10 +144,11 @@ bool PReluPluginDynamic::supportsFormatCombination( nvinfer1::DataType PReluPluginDynamic::getOutputDataType( int index, const nvinfer1::DataType *input_types, int nb_inputs) const TRT_NOEXCEPT { - PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( - "The PRelu Plugin only has one input, so the " - "index value should be 0, but get %d.", - index)); + PADDLE_ENFORCE_EQ(index, 0, + platform::errors::InvalidArgument( + "The PRelu Plugin only has one input, so the " + "index value should be 0, but get %d.", + index)); PADDLE_ENFORCE_EQ((input_types[0] == nvinfer1::DataType::kFLOAT), true, platform::errors::InvalidArgument( "The input type should be half or float")); diff --git a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h index e0a77de6f54..0025e1ee5b4 100644 --- a/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/prelu_op_plugin.h @@ -17,9 +17,9 @@ #include #include #include + #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor_util.h" - #include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" diff --git a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu index e2f1aab9b64..d3da5d7225d 100644 --- a/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/qkv_to_context_plugin.cu @@ -13,9 +13,11 @@ // limitations under the License. #include + #include #include // NOLINT #include + #include "glog/logging.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor_util.h" @@ -103,8 +105,8 @@ inline void TransposeQKV(const int batch, const int seq_len, platform::errors::InvalidArgument( "head_num (%d) * head_size (%d) should <= %d", head_num, head_size, 1024)); - TransposeQkvKernel<<>>(head_size, input, - output); + TransposeQkvKernel + <<>>(head_size, input, output); } } @@ -142,8 +144,8 @@ inline void TransposeQKV(const int batch, const int seq_len, platform::errors::InvalidArgument( "head_num (%d) * head_size (%d) should <= %d", head_num, head_size, 1024)); - TransposeQkvKernel<<>>(head_size, input, - output); + TransposeQkvKernel + <<>>(head_size, input, output); } } @@ -218,10 +220,11 @@ nvinfer1::DataType QkvToContextPluginDynamic::getOutputDataType( int index, const nvinfer1::DataType *input_types, int nb_inputs) const TRT_NOEXCEPT { PADDLE_ENFORCE_EQ( - index, 0, platform::errors::InvalidArgument( - "The EmbEltwiseLayernorm Plugin only has one input, so the " - "index value should be 0, but get %d.", - index)); + index, 0, + platform::errors::InvalidArgument( + "The EmbEltwiseLayernorm Plugin only has one input, so the " + "index value should be 0, but get %d.", + index)); return input_types[0]; } diff --git a/paddle/fluid/inference/tensorrt/plugin/recover_padding_plugin.h b/paddle/fluid/inference/tensorrt/plugin/recover_padding_plugin.h index 896cd05eef1..71b576610e2 100644 --- a/paddle/fluid/inference/tensorrt/plugin/recover_padding_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/recover_padding_plugin.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once #include - #include + #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/inference/tensorrt/plugin/remove_padding_plugin.h b/paddle/fluid/inference/tensorrt/plugin/remove_padding_plugin.h index 6679f2f0819..89fda3dd775 100644 --- a/paddle/fluid/inference/tensorrt/plugin/remove_padding_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/remove_padding_plugin.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once #include - #include + #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/inference/tensorrt/plugin/roi_align_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/roi_align_op_plugin.cu index 7dc31fb4471..7eded9e823e 100644 --- a/paddle/fluid/inference/tensorrt/plugin/roi_align_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/roi_align_op_plugin.cu @@ -14,6 +14,7 @@ #include #include + #include #include "paddle/fluid/inference/tensorrt/plugin/roi_align_op_plugin.h" @@ -281,13 +282,12 @@ int RoiAlignPluginDynamic::enqueue_impl( width, pooled_height_, pooled_width_, sampling_ratio_, rois_num / batch, aligned_, static_cast(outputs[0])); } else { - GPUROIAlignOpt< - T, OutT, - false><<>>( - output_size, static_cast(inputs[0]), - static_cast(inputs[1]), spatial_scale_, channels, height, - width, pooled_height_, pooled_width_, sampling_ratio_, rois_num / batch, - aligned_, static_cast(outputs[0])); + GPUROIAlignOpt + <<>>( + output_size, static_cast(inputs[0]), + static_cast(inputs[1]), spatial_scale_, channels, height, + width, pooled_height_, pooled_width_, sampling_ratio_, + rois_num / batch, aligned_, static_cast(outputs[0])); } return cudaGetLastError() != cudaSuccess; diff --git a/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu index fb14749f3d1..e1527f85088 100644 --- a/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.cu @@ -14,9 +14,11 @@ #include #include + #include #include // NOLINT #include + #include "glog/logging.h" #include "paddle/fluid/inference/tensorrt/plugin/skip_layernorm_op_plugin.h" #include "paddle/fluid/operators/math/bert_encoder_functor.h" @@ -105,8 +107,9 @@ nvinfer1::DataType SkipLayerNormPluginDynamic::getOutputDataType( index)); PADDLE_ENFORCE_EQ((input_types[0] == nvinfer1::DataType::kFLOAT || input_types[0] == nvinfer1::DataType::kHALF), - true, platform::errors::InvalidArgument( - "The input type should be half or float")); + true, + platform::errors::InvalidArgument( + "The input type should be half or float")); return input_types[0]; } diff --git a/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu index 0a6d24f9072..ad426204d5a 100644 --- a/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.cu @@ -14,9 +14,11 @@ #include #include + #include #include // NOLINT #include + #include "glog/logging.h" #include "paddle/fluid/inference/tensorrt/plugin/slice_op_plugin.h" @@ -301,14 +303,16 @@ bool SlicePluginDynamic::supportsFormatCombination( nvinfer1::DataType SlicePluginDynamic::getOutputDataType( int index, const nvinfer1::DataType *input_types, int nb_inputs) const TRT_NOEXCEPT { - PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( - "The Slice Plugin only has one input, so the " - "index value should be 0, but get %d.", - index)); + PADDLE_ENFORCE_EQ(index, 0, + platform::errors::InvalidArgument( + "The Slice Plugin only has one input, so the " + "index value should be 0, but get %d.", + index)); PADDLE_ENFORCE_EQ((input_types[0] == nvinfer1::DataType::kFLOAT || input_types[0] == nvinfer1::DataType::kHALF), - true, platform::errors::InvalidArgument( - "The input type should be half or float")); + true, + platform::errors::InvalidArgument( + "The input type should be half or float")); return input_types[0]; } diff --git a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu index ec4fcca6d74..1cfc9fade7b 100644 --- a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu @@ -13,7 +13,9 @@ // limitations under the License. #include + #include + #include "paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h" namespace paddle { diff --git a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h index 7a41fe1d1ee..49f028493ee 100644 --- a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h @@ -15,9 +15,11 @@ #pragma once #include + #include #include #include + #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" namespace paddle { diff --git a/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.cu index 74a6c3cdf3e..1c6dae78b38 100644 --- a/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.cu @@ -15,6 +15,7 @@ #include #include #include + #include "paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.h" namespace paddle { @@ -128,8 +129,9 @@ bool StackPluginDynamic::supportsFormatCombination( nvinfer1::DataType StackPluginDynamic::getOutputDataType( int index, const nvinfer1::DataType* input_types, int nb_inputs) const TRT_NOEXCEPT { - PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( - "The index should be equal to 0")); + PADDLE_ENFORCE_EQ( + index, 0, + platform::errors::InvalidArgument("The index should be equal to 0")); return input_types[0]; } diff --git a/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.h index 965c53e2698..12beafdadb3 100644 --- a/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.h @@ -14,9 +14,11 @@ #pragma once #include + #include #include #include + #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" diff --git a/paddle/fluid/inference/tensorrt/plugin/swish_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/swish_op_plugin.cu index 2c2fad74b9a..1992dd57d68 100644 --- a/paddle/fluid/inference/tensorrt/plugin/swish_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/swish_op_plugin.cu @@ -13,8 +13,10 @@ // limitations under the License. #include + #include #include + #include "glog/logging.h" #include "paddle/fluid/inference/tensorrt/plugin/swish_op_plugin.h" @@ -181,10 +183,11 @@ bool SwishPluginDynamic::supportsFormatCombination( nvinfer1::DataType SwishPluginDynamic::getOutputDataType( int index, const nvinfer1::DataType *input_types, int nb_inputs) const TRT_NOEXCEPT { - PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( - "The Swish Plugin only has one input, so the " - "index value should be 0, but get %d.", - index)); + PADDLE_ENFORCE_EQ(index, 0, + platform::errors::InvalidArgument( + "The Swish Plugin only has one input, so the " + "index value should be 0, but get %d.", + index)); return input_types[0]; } @@ -203,8 +206,8 @@ int SwishPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc *input_desc, VLOG(1) << "TRT Plugin DataType selected. Swish-->fp32"; const float *input = static_cast(inputs[0]); float *output = static_cast(outputs[0]); - swish_kernel<<>>(num, input, output, - beta_); + swish_kernel + <<>>(num, input, output, beta_); } else if (input_type == nvinfer1::DataType::kHALF) { VLOG(1) << "TRT Plugin DataType selected. Swish-->fp16"; const half *input = static_cast(inputs[0]); diff --git a/paddle/fluid/inference/tensorrt/plugin/test_split_plugin.cc b/paddle/fluid/inference/tensorrt/plugin/test_split_plugin.cc index 46f585e6557..9cb680da5a9 100644 --- a/paddle/fluid/inference/tensorrt/plugin/test_split_plugin.cc +++ b/paddle/fluid/inference/tensorrt/plugin/test_split_plugin.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h" namespace paddle { diff --git a/paddle/fluid/inference/tensorrt/plugin/transformer_input_convert_plugin.h b/paddle/fluid/inference/tensorrt/plugin/transformer_input_convert_plugin.h index 87dc876fa9c..92aa0c48a49 100644 --- a/paddle/fluid/inference/tensorrt/plugin/transformer_input_convert_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/transformer_input_convert_plugin.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once #include - #include + #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin.h b/paddle/fluid/inference/tensorrt/plugin/trt_plugin.h index 9210cd48d07..a1316384cd4 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin.h @@ -15,6 +15,7 @@ #pragma once #include + #include #include #include diff --git a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h index 16751c764bd..cf9c66f0eb3 100644 --- a/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h +++ b/paddle/fluid/inference/tensorrt/plugin/trt_plugin_utils.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/fluid/inference/tensorrt/plugin/yolo_box_head_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/yolo_box_head_op_plugin.h index 2094dbfc9db..7116093ae36 100644 --- a/paddle/fluid/inference/tensorrt/plugin/yolo_box_head_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/yolo_box_head_op_plugin.h @@ -15,6 +15,7 @@ #pragma once #include #include + #include "paddle/fluid/inference/tensorrt/engine.h" #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" diff --git a/paddle/fluid/inference/tensorrt/test_tensorrt.cc b/paddle/fluid/inference/tensorrt/test_tensorrt.cc index 2f5b75c1020..70f36ec34b7 100644 --- a/paddle/fluid/inference/tensorrt/test_tensorrt.cc +++ b/paddle/fluid/inference/tensorrt/test_tensorrt.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include + #include "NvInfer.h" #include "paddle/fluid/inference/tensorrt/helper.h" #include "paddle/fluid/platform/dynload/tensorrt.h" diff --git a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h index c84cb45b7ec..35c776b9e53 100644 --- a/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h +++ b/paddle/fluid/inference/tensorrt/trt_int8_calibrator.h @@ -16,6 +16,7 @@ #include #include + #include #include #include // NOLINT diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_exp_gpu_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_exp_gpu_tester.cc index d11d09458e4..ae838955adc 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_exp_gpu_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_exp_gpu_tester.cc @@ -15,8 +15,10 @@ limitations under the License. */ #include #include #include + #include #include + #include "paddle/fluid/inference/capi_exp/pd_inference_api.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_exp_int_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_exp_int_tester.cc index d3a15cb2857..dfcf5fda476 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_exp_int_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_exp_int_tester.cc @@ -15,8 +15,10 @@ limitations under the License. */ #include #include #include + #include #include + #include "paddle/fluid/inference/capi_exp/pd_inference_api.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_exp_ner_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_exp_ner_tester.cc index 4369cd78dfa..db5406b8ef6 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_exp_ner_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_exp_ner_tester.cc @@ -15,8 +15,10 @@ #include #include #include + #include #include + #include "paddle/fluid/inference/capi_exp/pd_inference_api.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_config_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_config_tester.cc index a341ffd7a08..8b094e8a6cb 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_config_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_config_tester.cc @@ -15,8 +15,10 @@ limitations under the License. */ #include #include #include + #include #include + #include "paddle/fluid/inference/capi_exp/pd_inference_api.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_tensor_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_tensor_tester.cc index f4017fc5a7f..33685e6a960 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_tensor_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_tensor_tester.cc @@ -15,11 +15,13 @@ limitations under the License. */ #include #include #include + #include #include #include #include #include + #include "paddle/fluid/inference/capi_exp/pd_inference_api.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_threads_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_threads_tester.cc index 8951c446b1f..f59b337d6af 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_threads_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_exp_pd_threads_tester.cc @@ -15,11 +15,13 @@ limitations under the License. */ #include #include #include + #include #include #include #include #include + #include "paddle/fluid/inference/capi_exp/pd_inference_api.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_exp_xpu_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_exp_xpu_tester.cc index a84c19de255..347f0e6e253 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_exp_xpu_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_exp_xpu_tester.cc @@ -15,8 +15,10 @@ limitations under the License. */ #include #include #include + #include #include + #include "paddle/fluid/inference/capi_exp/pd_inference_api.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_gpu_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_gpu_tester.cc index c60e0a25f28..524d39854de 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_gpu_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_gpu_tester.cc @@ -15,8 +15,10 @@ limitations under the License. */ #include #include #include + #include #include + #include "paddle/fluid/inference/capi/paddle_c_api.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_int_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_int_tester.cc index c0c8ff083de..cf8582ee778 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_int_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_int_tester.cc @@ -15,8 +15,10 @@ limitations under the License. */ #include #include #include + #include #include + #include "paddle/fluid/inference/capi/paddle_c_api.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_ner_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_ner_tester.cc index bf0576f9f93..b74f51af980 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_ner_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_ner_tester.cc @@ -15,8 +15,10 @@ #include #include #include + #include #include + #include "paddle/fluid/inference/capi/paddle_c_api.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_pd_tensor_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_pd_tensor_tester.cc index a9c24c4503f..d0cd55e918e 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_pd_tensor_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_pd_tensor_tester.cc @@ -15,11 +15,13 @@ limitations under the License. */ #include #include #include + #include #include #include #include #include + #include "paddle/fluid/inference/capi/c_api_internal.h" #include "paddle/fluid/inference/capi/paddle_c_api.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" @@ -69,8 +71,9 @@ void PD_run() { PD_DeletePaddleTensor(input); int size; const int* out_shape = PD_GetPaddleTensorShape(out_data, &size); - PADDLE_ENFORCE_EQ(size, 2, paddle::platform::errors::InvalidArgument( - "The Output shape's size is NOT match.")); + PADDLE_ENFORCE_EQ(size, 2, + paddle::platform::errors::InvalidArgument( + "The Output shape's size is NOT match.")); std::vector ref_outshape_size({9, 6}); for (int i = 0; i < 2; ++i) { PADDLE_ENFORCE_EQ(out_shape[i], ref_outshape_size[i], diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc index 0b2be0076fd..4ff3e27f420 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_tester.cc @@ -15,8 +15,10 @@ limitations under the License. */ #include #include #include + #include #include + #include "paddle/fluid/inference/capi/paddle_c_api.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_xpu_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_xpu_tester.cc index 33a67d81405..e6a6a8c1037 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_xpu_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_xpu_tester.cc @@ -15,8 +15,10 @@ limitations under the License. */ #include #include #include + #include #include + #include "paddle/fluid/inference/capi/paddle_c_api.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc index 820bbf07017..e3bdb98ec52 100644 --- a/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_dam_tester.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "paddle/fluid/inference/analysis/helper.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc index 384bef8a4b4..c21785f7ce7 100644 --- a/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_detect_functional_mkldnn_tester.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include #include + #include "paddle/fluid/inference/tests/api/tester_helper.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc b/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc index 5333f0052d7..166bdc621c1 100644 --- a/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_detect_tester.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include #include + #include "paddle/fluid/inference/tests/api/tester_helper.h" DEFINE_string(infer_shape, "", "data shape file"); diff --git a/paddle/fluid/inference/tests/api/analyzer_image_classification_tester.cc b/paddle/fluid/inference/tests/api/analyzer_image_classification_tester.cc index af0a51e4ddb..cf3380d0406 100644 --- a/paddle/fluid/inference/tests/api/analyzer_image_classification_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_image_classification_tester.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/inference/tests/api/tester_helper.h" DEFINE_bool(disable_mkldnn_fc, false, "Disable usage of MKL-DNN's FC op"); diff --git a/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc b/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc index d11b5f0c218..c6d266ceb21 100644 --- a/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/inference/api/paddle_analysis_config.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc b/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc index 57ab1b00908..18990dba314 100644 --- a/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_int8_object_detection_tester.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/inference/api/paddle_analysis_config.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc index bd3a1d737af..2b69a15e26a 100644 --- a/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_lac_tester.cc @@ -148,8 +148,9 @@ TEST(Analyzer_LAC, profile) { "The size of output should be equal to 1.")); size_t size = GetSize(output[0]); size_t batch1_size = sizeof(lac_ref_data) / sizeof(int64_t); - PADDLE_ENFORCE_GE(size, batch1_size, paddle::platform::errors::Fatal( - "The size of batch is invaild.")); + PADDLE_ENFORCE_GE( + size, batch1_size, + paddle::platform::errors::Fatal("The size of batch is invaild.")); int64_t *pdata = static_cast(output[0].data.data()); for (size_t i = 0; i < batch1_size; ++i) { EXPECT_EQ(pdata[i], lac_ref_data[i]); diff --git a/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc b/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc index 141e60513eb..7e754ad93bc 100644 --- a/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_lexical_analysis_gru_tester.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/inference/api/paddle_analysis_config.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" @@ -211,18 +212,15 @@ std::vector Lexical_Test( } } // nums_infer, nums_label, nums_correct - auto precision = - acc_sum[0] - ? static_cast(acc_sum[2]) / static_cast(acc_sum[0]) - : 0; - auto recall = - acc_sum[1] - ? static_cast(acc_sum[2]) / static_cast(acc_sum[1]) - : 0; - auto f1_score = - acc_sum[2] - ? static_cast(2 * precision * recall) / (precision + recall) - : 0; + auto precision = acc_sum[0] ? static_cast(acc_sum[2]) / + static_cast(acc_sum[0]) + : 0; + auto recall = acc_sum[1] ? static_cast(acc_sum[2]) / + static_cast(acc_sum[1]) + : 0; + auto f1_score = acc_sum[2] ? static_cast(2 * precision * recall) / + (precision + recall) + : 0; LOG(INFO) << "Precision: " << std::fixed << std::setw(6) << std::setprecision(5) << precision; diff --git a/paddle/fluid/inference/tests/api/analyzer_mmp_tester.cc b/paddle/fluid/inference/tests/api/analyzer_mmp_tester.cc index 4a5ec95934a..43fed05db13 100644 --- a/paddle/fluid/inference/tests/api/analyzer_mmp_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_mmp_tester.cc @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include + #include "paddle/fluid/framework/transfer_scope_cache.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" -#include - // Here add missing commands DEFINE_string(infer_model2, "", "model path"); DEFINE_string(infer_model3, "", "model path"); @@ -96,8 +96,9 @@ void compare(bool use_mkldnn = false) { xx_output.begin(), xx_output.end(), xx2_output.begin(), [](const float& l, const float& r) { return fabs(l - r) < 1e-4; }); - PADDLE_ENFORCE_EQ(result, true, paddle::platform::errors::Fatal( - "Results of model run independently " + PADDLE_ENFORCE_EQ( + result, true, + paddle::platform::errors::Fatal("Results of model run independently " "differs from results of the same model " "run as a sequence of models")); } diff --git a/paddle/fluid/inference/tests/api/analyzer_paddle_tensor_tester.cc b/paddle/fluid/inference/tests/api/analyzer_paddle_tensor_tester.cc index 2eb75c4dc53..2c02b87ba2b 100644 --- a/paddle/fluid/inference/tests/api/analyzer_paddle_tensor_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_paddle_tensor_tester.cc @@ -16,9 +16,8 @@ #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/inference/utils/singleton.h" - #include "paddle/fluid/inference/tests/api/tester_helper.h" +#include "paddle/fluid/inference/utils/singleton.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/tests/api/analyzer_quant_image_classification_tester.cc b/paddle/fluid/inference/tests/api/analyzer_quant_image_classification_tester.cc index 4bb59f3c8df..1618ba575a2 100644 --- a/paddle/fluid/inference/tests/api/analyzer_quant_image_classification_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_quant_image_classification_tester.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/inference/api/paddle_analysis_config.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc index 978aaf1c6a3..883d946dff5 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc @@ -47,8 +47,9 @@ struct DataRecord { num_lines++; std::vector data; split(line, '\t', &data); - PADDLE_ENFORCE_GT(data.size(), 4, paddle::platform::errors::Fatal( - "The size of data is invaild.")); + PADDLE_ENFORCE_GT( + data.size(), 4, + paddle::platform::errors::Fatal("The size of data is invaild.")); // load title1 data std::vector title1_data; split_to_int64(data[0], ' ', &title1_data); diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_compare_determine_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_compare_determine_tester.cc index 8f0778b83e5..1ef5e81e18a 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_compare_determine_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_compare_determine_tester.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester_helper.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_compare_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_compare_tester.cc index 099ff1f31a7..5a78d36276c 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_compare_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_compare_tester.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester_helper.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_compare_zero_copy_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_compare_zero_copy_tester.cc index 1fbcbf1a3f4..30cea4f69bd 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_compare_zero_copy_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_compare_zero_copy_tester.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester_helper.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc index d33b11c389a..15f4b3a3a5b 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_fuse_statis_tester.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester_helper.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_profile_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_profile_tester.cc index 0ccd95f2a17..063d29abee9 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_profile_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_profile_tester.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester_helper.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester_helper.h b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester_helper.h index 5d7f7c290f6..ef00c020973 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester_helper.h +++ b/paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester_helper.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/inference/tests/api/tester_helper.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/analyzer_transformer_tester_helper.h b/paddle/fluid/inference/tests/api/analyzer_transformer_tester_helper.h index e43456ed832..a384c75e0bb 100644 --- a/paddle/fluid/inference/tests/api/analyzer_transformer_tester_helper.h +++ b/paddle/fluid/inference/tests/api/analyzer_transformer_tester_helper.h @@ -15,6 +15,7 @@ #include #include #include + #include "paddle/fluid/inference/tests/api/tester_helper.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc index faa15fc4f0a..0a43d166e93 100644 --- a/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_vis_tester.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include #include + #include "paddle/fluid/inference/tests/api/tester_helper.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc b/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc index 029f2f0421d..08f26bae37b 100644 --- a/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_vit_ocr_tester.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/inference/tests/api/tester_helper.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/analyzer_zerocopy_tensor_tester.cc b/paddle/fluid/inference/tests/api/analyzer_zerocopy_tensor_tester.cc index e1ee1b196e4..d8ba615c8ed 100644 --- a/paddle/fluid/inference/tests/api/analyzer_zerocopy_tensor_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_zerocopy_tensor_tester.cc @@ -16,9 +16,8 @@ #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" -#include "paddle/fluid/inference/utils/singleton.h" - #include "paddle/fluid/inference/tests/api/tester_helper.h" +#include "paddle/fluid/inference/utils/singleton.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/tests/api/config_printer.h b/paddle/fluid/inference/tests/api/config_printer.h index b952b62f13e..6ef3eb95dd2 100644 --- a/paddle/fluid/inference/tests/api/config_printer.h +++ b/paddle/fluid/inference/tests/api/config_printer.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/inference/api/paddle_inference_api.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/ipu_resnet50_fp16_test.cc b/paddle/fluid/inference/tests/api/ipu_resnet50_fp16_test.cc index 1d69069da07..38cf475d3da 100644 --- a/paddle/fluid/inference/tests/api/ipu_resnet50_fp16_test.cc +++ b/paddle/fluid/inference/tests/api/ipu_resnet50_fp16_test.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include #include "gflags/gflags.h" diff --git a/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc b/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc index 5fde8e6a5e1..cbfe8229d31 100644 --- a/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc +++ b/paddle/fluid/inference/tests/api/ipu_resnet50_test.cc @@ -11,6 +11,7 @@ limitations under the License. */ #include #include + #include #include "gflags/gflags.h" diff --git a/paddle/fluid/inference/tests/api/ipu_word2vec_sample.cc b/paddle/fluid/inference/tests/api/ipu_word2vec_sample.cc index d38c5c34163..a0e36e9779d 100644 --- a/paddle/fluid/inference/tests/api/ipu_word2vec_sample.cc +++ b/paddle/fluid/inference/tests/api/ipu_word2vec_sample.cc @@ -31,8 +31,8 @@ limitations under the License. */ DEFINE_string(infer_model, "", "Directory of the inference model."); using paddle_infer::Config; -using paddle_infer::Predictor; using paddle_infer::CreatePredictor; +using paddle_infer::Predictor; void inference(std::string model_path, bool use_ipu, std::vector *out_data) { diff --git a/paddle/fluid/inference/tests/api/lite_mul_model_test.cc b/paddle/fluid/inference/tests/api/lite_mul_model_test.cc index 9211ea246a5..1adbf0ec7a5 100644 --- a/paddle/fluid/inference/tests/api/lite_mul_model_test.cc +++ b/paddle/fluid/inference/tests/api/lite_mul_model_test.cc @@ -14,11 +14,12 @@ limitations under the License. */ #include #include + #include #include // NOLINT #include // NOLINT -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/lite_resnet50_test.cc b/paddle/fluid/inference/tests/api/lite_resnet50_test.cc index 59bbaa2b78f..169d0b9987d 100644 --- a/paddle/fluid/inference/tests/api/lite_resnet50_test.cc +++ b/paddle/fluid/inference/tests/api/lite_resnet50_test.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include #include "gflags/gflags.h" diff --git a/paddle/fluid/inference/tests/api/mkldnn_quantizer_config_tester.cc b/paddle/fluid/inference/tests/api/mkldnn_quantizer_config_tester.cc index 4a2527a217f..d972945db7d 100644 --- a/paddle/fluid/inference/tests/api/mkldnn_quantizer_config_tester.cc +++ b/paddle/fluid/inference/tests/api/mkldnn_quantizer_config_tester.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/inference/api/paddle_mkldnn_quantizer_config.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" @@ -90,9 +91,10 @@ TEST(Mkldnn_quantizer_config, configuration) { PADDLE_ENFORCE_EQ( cfg.mkldnn_quantizer_config()->scale_algo("conv2d", "Input"), - conv2d_scale_algo, platform::errors::InvalidArgument( - "Scale algorithm got from config differs with the " - "one set previously.")); + conv2d_scale_algo, + platform::errors::InvalidArgument( + "Scale algorithm got from config differs with the " + "one set previously.")); PADDLE_ENFORCE_EQ( cfg.mkldnn_quantizer_config()->scale_algo("unknown", "unknown"), diff --git a/paddle/fluid/inference/tests/api/paddle_infer_api_copy_tensor_tester.cc b/paddle/fluid/inference/tests/api/paddle_infer_api_copy_tensor_tester.cc index 2be69781c4e..38bcb7645ab 100644 --- a/paddle/fluid/inference/tests/api/paddle_infer_api_copy_tensor_tester.cc +++ b/paddle/fluid/inference/tests/api/paddle_infer_api_copy_tensor_tester.cc @@ -14,8 +14,10 @@ limitations under the License. */ #include #include + #include #include + #include "gflags/gflags.h" #include "glog/logging.h" #include "paddle/fluid/inference/api/paddle_infer_contrib.h" diff --git a/paddle/fluid/inference/tests/api/paddle_infer_api_errors_tester.cc b/paddle/fluid/inference/tests/api/paddle_infer_api_errors_tester.cc index c5a0746c4d7..ab82c82b1e3 100644 --- a/paddle/fluid/inference/tests/api/paddle_infer_api_errors_tester.cc +++ b/paddle/fluid/inference/tests/api/paddle_infer_api_errors_tester.cc @@ -15,7 +15,6 @@ #include "gflags/gflags.h" #include "glog/logging.h" #include "gtest/gtest.h" - #include "paddle/fluid/inference/api/paddle_infer_contrib.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/inference/tests/api/paddle_infer_api_test.cc b/paddle/fluid/inference/tests/api/paddle_infer_api_test.cc index 88ebd85c79a..8cbc410eb5f 100644 --- a/paddle/fluid/inference/tests/api/paddle_infer_api_test.cc +++ b/paddle/fluid/inference/tests/api/paddle_infer_api_test.cc @@ -15,10 +15,11 @@ limitations under the License. */ #include #include #include + #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/inference/tests/api/trt_test_helper.h" namespace paddle_infer { diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index f2df018f497..d7784a909af 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -1081,7 +1081,7 @@ static bool CompareTensor(const framework::LoDTensor &a, } void ConvertFP32toFP16(paddle::PaddleTensor &tensor // NOLINT - ) { +) { int num = 1; for (auto dim : tensor.shape) { num *= dim; @@ -1101,7 +1101,7 @@ void ConvertFP32toFP16(paddle::PaddleTensor &tensor // NOLINT } void ConvertFP16toFP32(paddle::PaddleTensor &tensor // NOLINT - ) { +) { int num = 1; for (auto dim : tensor.shape) { num *= dim; diff --git a/paddle/fluid/inference/tests/api/trt_cascade_rcnn_test.cc b/paddle/fluid/inference/tests/api/trt_cascade_rcnn_test.cc index a1f31c3108b..ab059496ad8 100644 --- a/paddle/fluid/inference/tests/api/trt_cascade_rcnn_test.cc +++ b/paddle/fluid/inference/tests/api/trt_cascade_rcnn_test.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/inference/tests/api/trt_test_helper.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_fp16_serialize_deserialize_test.cc b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_fp16_serialize_deserialize_test.cc index 7e9f71c8b3c..b0c4c13dbbc 100644 --- a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_fp16_serialize_deserialize_test.cc +++ b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_fp16_serialize_deserialize_test.cc @@ -22,8 +22,8 @@ limitations under the License. */ #define GLOG_NO_ABBREVIATED_SEVERITIES #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_serialize_deserialize_test.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_serialize_deserialize_test.cc b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_serialize_deserialize_test.cc index 209dd90c480..f269432d4da 100644 --- a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_serialize_deserialize_test.cc +++ b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_serialize_deserialize_test.cc @@ -22,8 +22,8 @@ limitations under the License. */ #define GLOG_NO_ABBREVIATED_SEVERITIES #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_serialize_deserialize_test.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_serialize_deserialize_test.h b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_serialize_deserialize_test.h index 5ae14576dfe..3ca62afba1d 100644 --- a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_serialize_deserialize_test.h +++ b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_serialize_deserialize_test.h @@ -24,8 +24,8 @@ limitations under the License. */ #include #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/inference/tests/api/trt_test_helper.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc index 262b7269cb3..977c6856f8c 100644 --- a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc +++ b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/inference/tensorrt/helper.h" #include "paddle/fluid/inference/tests/api/trt_test_helper.h" @@ -226,13 +226,78 @@ void run(paddle_infer::Predictor* predictor, std::vector* out_data) { int32_t i1[run_seq_len] = { // sentence 1 - 1, 3558, 4, 75, 491, 89, 340, 313, 93, 4, 255, 10, 75, 321, 4095, 1902, 4, - 134, 49, 75, 311, 14, 44, 178, 543, 15, 12043, 2, 75, 201, 340, 9, 14, 44, - 486, 218, 1140, 279, 12043, 2, + 1, + 3558, + 4, + 75, + 491, + 89, + 340, + 313, + 93, + 4, + 255, + 10, + 75, + 321, + 4095, + 1902, + 4, + 134, + 49, + 75, + 311, + 14, + 44, + 178, + 543, + 15, + 12043, + 2, + 75, + 201, + 340, + 9, + 14, + 44, + 486, + 218, + 1140, + 279, + 12043, + 2, // sentence 2 - 101, 2054, 2234, 2046, 2486, 2044, 1996, 2047, 4552, 2001, 9536, 1029, - 102, 2004, 1997, 2008, 2154, 1010, 1996, 2047, 4552, 9536, 2075, 1996, - 2117, 3072, 2234, 2046, 2486, 1012, 102, + 101, + 2054, + 2234, + 2046, + 2486, + 2044, + 1996, + 2047, + 4552, + 2001, + 9536, + 1029, + 102, + 2004, + 1997, + 2008, + 2154, + 1010, + 1996, + 2047, + 4552, + 9536, + 2075, + 1996, + 2117, + 3072, + 2234, + 2046, + 2486, + 1012, + 102, }; int32_t i2[run_seq_len] = { // sentence 1 diff --git a/paddle/fluid/inference/tests/api/trt_dynamic_shape_test.cc b/paddle/fluid/inference/tests/api/trt_dynamic_shape_test.cc index ccdf237ffa5..4b22bba2bcc 100644 --- a/paddle/fluid/inference/tests/api/trt_dynamic_shape_test.cc +++ b/paddle/fluid/inference/tests/api/trt_dynamic_shape_test.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/inference/tests/api/trt_test_helper.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/trt_dynamic_shape_transformer_prune_test.cc b/paddle/fluid/inference/tests/api/trt_dynamic_shape_transformer_prune_test.cc index 2d7aa72a036..a238e62fc7c 100644 --- a/paddle/fluid/inference/tests/api/trt_dynamic_shape_transformer_prune_test.cc +++ b/paddle/fluid/inference/tests/api/trt_dynamic_shape_transformer_prune_test.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/inference/tests/api/trt_test_helper.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/trt_fc_prelu_test.cc b/paddle/fluid/inference/tests/api/trt_fc_prelu_test.cc index c0be1944931..93d4a88383c 100644 --- a/paddle/fluid/inference/tests/api/trt_fc_prelu_test.cc +++ b/paddle/fluid/inference/tests/api/trt_fc_prelu_test.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/inference/tests/api/trt_test_helper.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/trt_instance_norm_converter_test.cc b/paddle/fluid/inference/tests/api/trt_instance_norm_converter_test.cc index ceb8b99774e..243be1d3319 100644 --- a/paddle/fluid/inference/tests/api/trt_instance_norm_converter_test.cc +++ b/paddle/fluid/inference/tests/api/trt_instance_norm_converter_test.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/inference/tests/api/trt_test_helper.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/trt_mobilenet_test.cc b/paddle/fluid/inference/tests/api/trt_mobilenet_test.cc index a87bf7b085b..bcf8a23b9b9 100644 --- a/paddle/fluid/inference/tests/api/trt_mobilenet_test.cc +++ b/paddle/fluid/inference/tests/api/trt_mobilenet_test.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/inference/tests/api/trt_test_helper.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/trt_quant_int8_test.cc b/paddle/fluid/inference/tests/api/trt_quant_int8_test.cc index ca25967b59a..3a884abe888 100644 --- a/paddle/fluid/inference/tests/api/trt_quant_int8_test.cc +++ b/paddle/fluid/inference/tests/api/trt_quant_int8_test.cc @@ -14,9 +14,10 @@ limitations under the License. */ #include #include + #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/inference/tests/api/trt_test_helper.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/trt_quant_int8_yolov3_r50_test.cc b/paddle/fluid/inference/tests/api/trt_quant_int8_yolov3_r50_test.cc index 1fa24dddead..d9e1e3f8c9e 100644 --- a/paddle/fluid/inference/tests/api/trt_quant_int8_yolov3_r50_test.cc +++ b/paddle/fluid/inference/tests/api/trt_quant_int8_yolov3_r50_test.cc @@ -11,9 +11,10 @@ limitations under the License. */ #include #include + #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/inference/tests/api/trt_test_helper.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/trt_resnet50_test.cc b/paddle/fluid/inference/tests/api/trt_resnet50_test.cc index 2975967e0c0..cdc6586f127 100644 --- a/paddle/fluid/inference/tests/api/trt_resnet50_test.cc +++ b/paddle/fluid/inference/tests/api/trt_resnet50_test.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/inference/tests/api/trt_test_helper.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/trt_resnext_test.cc b/paddle/fluid/inference/tests/api/trt_resnext_test.cc index b525a1b7068..374074957c8 100644 --- a/paddle/fluid/inference/tests/api/trt_resnext_test.cc +++ b/paddle/fluid/inference/tests/api/trt_resnext_test.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/inference/tests/api/trt_test_helper.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/trt_split_converter_test.cc b/paddle/fluid/inference/tests/api/trt_split_converter_test.cc index c00b36b520b..0726db28343 100644 --- a/paddle/fluid/inference/tests/api/trt_split_converter_test.cc +++ b/paddle/fluid/inference/tests/api/trt_split_converter_test.cc @@ -14,8 +14,8 @@ limitations under the License. */ #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/inference/tests/api/trt_test_helper.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/api/trt_test_helper.h b/paddle/fluid/inference/tests/api/trt_test_helper.h index aaa285b2fc2..cadf996e071 100644 --- a/paddle/fluid/inference/tests/api/trt_test_helper.h +++ b/paddle/fluid/inference/tests/api/trt_test_helper.h @@ -13,13 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once #include + #include #include #include "gflags/gflags.h" #include "glog/logging.h" #include "gtest/gtest.h" - #include "paddle/fluid/inference/tests/api/tester_helper.h" namespace paddle { diff --git a/paddle/fluid/inference/tests/infer_ut/test_suite.h b/paddle/fluid/inference/tests/infer_ut/test_suite.h index a5c8c524021..8737afa8099 100644 --- a/paddle/fluid/inference/tests/infer_ut/test_suite.h +++ b/paddle/fluid/inference/tests/infer_ut/test_suite.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once #include + #include #include #include @@ -26,7 +27,6 @@ #include "gflags/gflags.h" #include "glog/logging.h" #include "gtest/gtest.h" - #include "paddle/include/paddle_inference_api.h" namespace paddle { @@ -64,7 +64,7 @@ void SingleThreadPrediction(paddle_infer::Predictor *predictor, int repeat_times = 2) { // prepare input tensor auto input_names = predictor->GetInputNames(); - for (const auto & [ key, value ] : *input_data_map) { + for (const auto &[key, value] : *input_data_map) { switch (value.type) { case paddle::PaddleDType::INT64: { std::vector input_value = @@ -150,7 +150,7 @@ void SingleThreadPrediction(paddle_infer::Predictor *predictor, void CompareRecord(std::map *truth_output_data, std::map *infer_output_data, float epislon = 1e-5) { - for (const auto & [ key, value ] : *infer_output_data) { + for (const auto &[key, value] : *infer_output_data) { auto truth_record = (*truth_output_data)[key]; VLOG(1) << "output name: " << key; size_t numel = value.data.size() / sizeof(float); @@ -190,7 +190,7 @@ double SingleThreadProfile(paddle_infer::Predictor *predictor, int repeat_times = 2) { // prepare input tensor auto input_names = predictor->GetInputNames(); - for (const auto & [ key, value ] : *input_data_map) { + for (const auto &[key, value] : *input_data_map) { switch (value.type) { case paddle::PaddleDType::INT64: { std::vector input_value = diff --git a/paddle/fluid/inference/utils/benchmark_tester.cc b/paddle/fluid/inference/utils/benchmark_tester.cc index 0c48c2db9b6..8f7614cb10a 100644 --- a/paddle/fluid/inference/utils/benchmark_tester.cc +++ b/paddle/fluid/inference/utils/benchmark_tester.cc @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/utils/benchmark.h" #include #include +#include "paddle/fluid/inference/utils/benchmark.h" + using namespace paddle::inference; // NOLINT TEST(Benchmark, basic) { Benchmark benchmark; diff --git a/paddle/fluid/inference/utils/io_utils.cc b/paddle/fluid/inference/utils/io_utils.cc index 87331e1978f..425c67d2fd2 100644 --- a/paddle/fluid/inference/utils/io_utils.cc +++ b/paddle/fluid/inference/utils/io_utils.cc @@ -158,8 +158,9 @@ void SerializePDTensorsToFile(const std::string &path, void DeserializePDTensorsToFile(const std::string &path, std::vector *tensors) { bool is_present = analysis::FileExists(path); - PADDLE_ENFORCE_EQ(is_present, true, platform::errors::InvalidArgument( - "Cannot open %s to read", path)); + PADDLE_ENFORCE_EQ( + is_present, true, + platform::errors::InvalidArgument("Cannot open %s to read", path)); std::ifstream fin(path, std::ios::binary); DeserializePDTensorsToStream(fin, tensors); fin.close(); diff --git a/paddle/fluid/inference/utils/io_utils_tester.cc b/paddle/fluid/inference/utils/io_utils_tester.cc index ffd97232652..e8ebb72acc3 100644 --- a/paddle/fluid/inference/utils/io_utils_tester.cc +++ b/paddle/fluid/inference/utils/io_utils_tester.cc @@ -12,11 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/utils/io_utils.h" #include #include + #include + #include "paddle/fluid/inference/api/helper.h" +#include "paddle/fluid/inference/utils/io_utils.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/inference/utils/singleton.h b/paddle/fluid/inference/utils/singleton.h index 6828924c300..5fccd3458a1 100644 --- a/paddle/fluid/inference/utils/singleton.h +++ b/paddle/fluid/inference/utils/singleton.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/fluid/inference/utils/table_printer_tester.cc b/paddle/fluid/inference/utils/table_printer_tester.cc index 8faac79c517..fc482807b28 100644 --- a/paddle/fluid/inference/utils/table_printer_tester.cc +++ b/paddle/fluid/inference/utils/table_printer_tester.cc @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/utils/table_printer.h" #include #include +#include "paddle/fluid/inference/utils/table_printer.h" + namespace paddle { namespace inference {} // namespace inference } // namespace paddle diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 7cd5fffea2a..d72af70657a 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -28,6 +28,7 @@ #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #include + #include "paddle/fluid/memory/allocation/cuda_allocator.h" #include "paddle/fluid/memory/allocation/cuda_managed_allocator.h" #include "paddle/fluid/memory/allocation/pinned_allocator.h" diff --git a/paddle/fluid/memory/allocation/allocator_facade.h b/paddle/fluid/memory/allocation/allocator_facade.h index 94b07e3e6c1..a37c11c0c04 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.h +++ b/paddle/fluid/memory/allocation/allocator_facade.h @@ -14,6 +14,7 @@ #pragma once #include + #include "paddle/fluid/memory/allocation/allocator.h" #ifdef PADDLE_WITH_ASCEND_CL #include "paddle/fluid/memory/allocation/npu_pinned_allocator.h" diff --git a/paddle/fluid/memory/allocation/allocator_facade_abs_flags_test.cc b/paddle/fluid/memory/allocation/allocator_facade_abs_flags_test.cc index fca07ba8e25..d3f16ec6286 100644 --- a/paddle/fluid/memory/allocation/allocator_facade_abs_flags_test.cc +++ b/paddle/fluid/memory/allocation/allocator_facade_abs_flags_test.cc @@ -12,9 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/memory/allocation/allocator_facade.h" #include +#include "paddle/fluid/memory/allocation/allocator_facade.h" + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) DECLARE_double(fraction_of_gpu_memory_to_use); DECLARE_double(fraction_of_cuda_pinned_memory_to_use); diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc index 782062283e9..d460480bc73 100644 --- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc @@ -16,6 +16,7 @@ #include #include // NOLINT + #include "paddle/fluid/memory/allocation/aligned_allocator.h" #include "paddle/fluid/platform/flags.h" #include "paddle/fluid/platform/profiler/event_tracing.h" diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc index 4469673b305..70c43145cc8 100644 --- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc +++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_facade_test.cc @@ -13,10 +13,12 @@ // limitations under the License. #include + #include // NOLINT #include // NOLINT #include #include // NOLINT + #include "gflags/gflags.h" #include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" diff --git a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc index 8d2f6e07a29..441e80dfa4f 100644 --- a/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc +++ b/paddle/fluid/memory/allocation/auto_growth_best_fit_allocator_test.cc @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include - -#include "paddle/fluid/memory/allocation/aligned_allocator.h" #include "paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.h" +#include + #include "gtest/gtest.h" +#include "paddle/fluid/memory/allocation/aligned_allocator.h" DECLARE_bool(free_idle_chunk); DECLARE_bool(free_when_no_cache_hit); diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.cc b/paddle/fluid/memory/allocation/best_fit_allocator.cc index 4cfe3997d89..c93645bf7a0 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/best_fit_allocator.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/memory/allocation/best_fit_allocator.h" + #include #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/memory/allocation/best_fit_allocator.h b/paddle/fluid/memory/allocation/best_fit_allocator.h index 69cb7c2708f..64ee632c387 100644 --- a/paddle/fluid/memory/allocation/best_fit_allocator.h +++ b/paddle/fluid/memory/allocation/best_fit_allocator.h @@ -14,6 +14,7 @@ #pragma once #include + #include #include #include diff --git a/paddle/fluid/memory/allocation/cuda_allocator.cc b/paddle/fluid/memory/allocation/cuda_allocator.cc index 62a2dd78128..de6cac63e9d 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.cc +++ b/paddle/fluid/memory/allocation/cuda_allocator.cc @@ -24,6 +24,7 @@ #endif #include + #include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/memory/allocation/cuda_allocator.h b/paddle/fluid/memory/allocation/cuda_allocator.h index 522b1d623e8..f3df3082741 100644 --- a/paddle/fluid/memory/allocation/cuda_allocator.h +++ b/paddle/fluid/memory/allocation/cuda_allocator.h @@ -14,6 +14,7 @@ #pragma once #include // NOLINT + #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/memory/allocation/cuda_ipc_allocator.cc b/paddle/fluid/memory/allocation/cuda_ipc_allocator.cc index b2f24d5aed1..dff93736a6e 100644 --- a/paddle/fluid/memory/allocation/cuda_ipc_allocator.cc +++ b/paddle/fluid/memory/allocation/cuda_ipc_allocator.cc @@ -15,15 +15,16 @@ #ifndef _WIN32 #include "paddle/fluid/memory/allocation/cuda_ipc_allocator.h" -#include "paddle/fluid/platform/cuda_device_guard.h" #include #include #include + #include #include #include "glog/logging.h" +#include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/fluid/memory/allocation/cuda_managed_allocator.cc b/paddle/fluid/memory/allocation/cuda_managed_allocator.cc index 0c83d4d3663..ac62b10c0e0 100644 --- a/paddle/fluid/memory/allocation/cuda_managed_allocator.cc +++ b/paddle/fluid/memory/allocation/cuda_managed_allocator.cc @@ -24,6 +24,7 @@ #endif #include + #include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.cc b/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.cc index a235b3871b3..9494141615f 100644 --- a/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.cc +++ b/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.cc @@ -18,6 +18,7 @@ #endif #include + #include "paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.h b/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.h index e7b296e6a5a..ff26a96a0e1 100644 --- a/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.h +++ b/paddle/fluid/memory/allocation/cuda_virtual_mem_allocator.h @@ -16,10 +16,12 @@ #ifdef PADDLE_WITH_CUDA #include + #include "paddle/fluid/platform/cuda_device_guard.h" #endif #include // NOLINT + #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/memory/allocation/custom_allocator.cc b/paddle/fluid/memory/allocation/custom_allocator.cc index e53d7b1cc76..2cd969e2bd1 100644 --- a/paddle/fluid/memory/allocation/custom_allocator.cc +++ b/paddle/fluid/memory/allocation/custom_allocator.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/memory/allocation/custom_allocator.h" + #include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/memory/allocation/custom_allocator.h b/paddle/fluid/memory/allocation/custom_allocator.h index 0f34bc156c8..b10f840f60d 100644 --- a/paddle/fluid/memory/allocation/custom_allocator.h +++ b/paddle/fluid/memory/allocation/custom_allocator.h @@ -14,6 +14,7 @@ #pragma once #include // NOLINT + #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/memory/allocation/mmap_allocator.cc b/paddle/fluid/memory/allocation/mmap_allocator.cc index 25c2235cce8..6fd87fb6a77 100644 --- a/paddle/fluid/memory/allocation/mmap_allocator.cc +++ b/paddle/fluid/memory/allocation/mmap_allocator.cc @@ -19,6 +19,7 @@ #include #include #include + #include #include @@ -217,9 +218,9 @@ std::shared_ptr AllocateMemoryMapWriterAllocation( const std::string &ipc_name = GetIPCName(); int flags = O_RDWR | O_CREAT; int fd = shm_open(ipc_name.c_str(), flags, 0600); - PADDLE_ENFORCE_NE( - fd, -1, platform::errors::Unavailable("File descriptor %s open failed", - ipc_name.c_str())); + PADDLE_ENFORCE_NE(fd, -1, + platform::errors::Unavailable( + "File descriptor %s open failed", ipc_name.c_str())); PADDLE_ENFORCE_EQ(ftruncate(fd, size), 0, platform::errors::Unavailable( "Fruncate a file to a specified length failed!")); @@ -239,9 +240,9 @@ std::shared_ptr RebuildMemoryMapReaderAllocation( flags &= ~O_CREAT; int fd = shm_open(ipc_name.c_str(), flags, 0600); - PADDLE_ENFORCE_NE( - fd, -1, platform::errors::Unavailable("File descriptor %s open failed", - ipc_name.c_str())); + PADDLE_ENFORCE_NE(fd, -1, + platform::errors::Unavailable( + "File descriptor %s open failed", ipc_name.c_str())); void *ptr = mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); PADDLE_ENFORCE_NE(ptr, MAP_FAILED, platform::errors::Unavailable( diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc index 5efbfce7fed..7cc95de8310 100644 --- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc @@ -24,7 +24,6 @@ #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/profiler.h" - #include "paddle/fluid/string/printf.h" #include "paddle/fluid/string/split.h" #include "paddle/phi/common/place.h" diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator.h b/paddle/fluid/memory/allocation/naive_best_fit_allocator.h index 05db0d7341a..3d6500d0f56 100644 --- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.h +++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.h @@ -14,6 +14,7 @@ #pragma once #include + #include #include // NOLINT #include diff --git a/paddle/fluid/memory/allocation/npu_allocator.cc b/paddle/fluid/memory/allocation/npu_allocator.cc index d69663f636e..1c277c5db84 100644 --- a/paddle/fluid/memory/allocation/npu_allocator.cc +++ b/paddle/fluid/memory/allocation/npu_allocator.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/memory/allocation/npu_allocator.h" + #include + #include "paddle/fluid/platform/device/npu/npu_info.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/memory/allocation/npu_allocator.h b/paddle/fluid/memory/allocation/npu_allocator.h index ff55ba70c52..04832c6fd9b 100644 --- a/paddle/fluid/memory/allocation/npu_allocator.h +++ b/paddle/fluid/memory/allocation/npu_allocator.h @@ -14,6 +14,7 @@ #pragma once #include // NOLINT + #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/memory/allocation/pinned_allocator.cc b/paddle/fluid/memory/allocation/pinned_allocator.cc index 5e5aea6dab2..ad11d818752 100644 --- a/paddle/fluid/memory/allocation/pinned_allocator.cc +++ b/paddle/fluid/memory/allocation/pinned_allocator.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/memory/allocation/pinned_allocator.h" + #include "paddle/fluid/memory/stats.h" namespace paddle { namespace memory { diff --git a/paddle/fluid/memory/allocation/retry_allocator.cc b/paddle/fluid/memory/allocation/retry_allocator.cc index d6074975720..2914da4f636 100644 --- a/paddle/fluid/memory/allocation/retry_allocator.cc +++ b/paddle/fluid/memory/allocation/retry_allocator.cc @@ -44,8 +44,9 @@ void RetryAllocator::FreeImpl(phi::Allocation* allocation) { size_t size = allocation->size(); underlying_allocator_->Free(allocation); if (UNLIKELY(waited_allocate_size_)) { - VLOG(10) << "Free " << size << " bytes and notify all waited threads, " - "where waited_allocate_size_ = " + VLOG(10) << "Free " << size + << " bytes and notify all waited threads, " + "where waited_allocate_size_ = " << waited_allocate_size_; cv_.notify_all(); } diff --git a/paddle/fluid/memory/allocation/retry_allocator_test.cc b/paddle/fluid/memory/allocation/retry_allocator_test.cc index cb593f5ab74..e7370036cee 100644 --- a/paddle/fluid/memory/allocation/retry_allocator_test.cc +++ b/paddle/fluid/memory/allocation/retry_allocator_test.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/memory/allocation/retry_allocator.h" #include // NOLINT + #include "gtest/gtest.h" #include "paddle/fluid/memory/allocation/best_fit_allocator.h" #include "paddle/fluid/memory/allocation/cpu_allocator.h" diff --git a/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc b/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc index 80877cb670b..81a87ef07b5 100644 --- a/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc +++ b/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/memory/allocation/stream_safe_cuda_allocator.h" + #include "paddle/fluid/platform/profiler/event_tracing.h" #ifdef PADDLE_WITH_CUDA diff --git a/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.h b/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.h index 32d3896e66b..ac4b7c790c9 100644 --- a/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.h +++ b/paddle/fluid/memory/allocation/stream_safe_cuda_allocator.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/allocation/spin_lock.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/memory/allocation/thread_local_allocator_test.cc b/paddle/fluid/memory/allocation/thread_local_allocator_test.cc index c5378d9f59c..74c83149b4c 100644 --- a/paddle/fluid/memory/allocation/thread_local_allocator_test.cc +++ b/paddle/fluid/memory/allocation/thread_local_allocator_test.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/memory/allocation/thread_local_allocator.h" + #include // NOLINT #include // NOLINT + #include "gtest/gtest.h" #include "paddle/fluid/memory/malloc.h" diff --git a/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.cc b/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.cc index c8b4e980566..07ad149a307 100644 --- a/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.cc @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h" + #include #include "paddle/fluid/memory/allocation/aligned_allocator.h" -#include "paddle/fluid/memory/allocation/virtual_memory_auto_growth_best_fit_allocator.h" namespace paddle { namespace memory { diff --git a/paddle/fluid/memory/buffer.h b/paddle/fluid/memory/buffer.h index 99b25ca289c..f42b5262e34 100644 --- a/paddle/fluid/memory/buffer.h +++ b/paddle/fluid/memory/buffer.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/memory/detail/system_allocator.cc b/paddle/fluid/memory/detail/system_allocator.cc index e1077d66c54..244445d59b8 100644 --- a/paddle/fluid/memory/detail/system_allocator.cc +++ b/paddle/fluid/memory/detail/system_allocator.cc @@ -168,8 +168,9 @@ void* GPUAllocator::Alloc(size_t* index, size_t size) { } void GPUAllocator::Free(void* p, size_t size, size_t index) { - PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( - "The index should be 0, index is %d", index)); + PADDLE_ENFORCE_EQ(index, 0, + platform::errors::InvalidArgument( + "The index should be 0, index is %d", index)); PADDLE_ENFORCE_GE(gpu_alloc_size_, size, platform::errors::InvalidArgument( "The size of memory (%d) to free exceeds the size of " @@ -223,8 +224,9 @@ void* CUDAPinnedAllocator::Alloc(size_t* index, size_t size) { void CUDAPinnedAllocator::Free(void* p, size_t size, size_t index) { gpuError_t err; - PADDLE_ENFORCE_EQ(index, 1, platform::errors::InvalidArgument( - "The index should be 1, but got %d", index)); + PADDLE_ENFORCE_EQ(index, 1, + platform::errors::InvalidArgument( + "The index should be 1, but got %d", index)); PADDLE_ENFORCE_GE(cuda_pinnd_alloc_size_, size, platform::errors::InvalidArgument( @@ -310,8 +312,9 @@ void* NPUAllocator::Alloc(size_t* index, size_t size) { void NPUAllocator::Free(void* p, size_t size, size_t index) { VLOG(4) << "Free " << p << " size " << size; - PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( - "The index should be 0, index is %d", index)); + PADDLE_ENFORCE_EQ(index, 0, + platform::errors::InvalidArgument( + "The index should be 0, index is %d", index)); PADDLE_ENFORCE_GE(npu_alloc_size_, size, platform::errors::InvalidArgument( "The size of memory (%d) to free exceeds the size of " @@ -355,8 +358,9 @@ void* NPUPinnedAllocator::Alloc(size_t* index, size_t size) { void NPUPinnedAllocator::Free(void* p, size_t size, size_t index) { aclError err; - PADDLE_ENFORCE_EQ(index, 1, platform::errors::InvalidArgument( - "The index should be 1, but got %d", index)); + PADDLE_ENFORCE_EQ(index, 1, + platform::errors::InvalidArgument( + "The index should be 1, but got %d", index)); PADDLE_ENFORCE_GE(npu_pinnd_alloc_size_, size, platform::errors::InvalidArgument( @@ -425,8 +429,9 @@ void* MLUAllocator::Alloc(size_t* index, size_t size) { } void MLUAllocator::Free(void* p, size_t size, size_t index) { - PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( - "The index should be 0, index is %d", index)); + PADDLE_ENFORCE_EQ(index, 0, + platform::errors::InvalidArgument( + "The index should be 0, index is %d", index)); PADDLE_ENFORCE_GE(mlu_alloc_size_, size, platform::errors::InvalidArgument( "The size of memory (%d) to free exceeds the size of " @@ -469,8 +474,9 @@ void* CustomAllocator::Alloc(size_t* index, size_t size) { void CustomAllocator::Free(void* p, size_t size, size_t index) { VLOG(4) << "CustomAllocator::Free " << p << " size " << size; - PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( - "The index should be 0, index is %d", index)); + PADDLE_ENFORCE_EQ(index, 0, + platform::errors::InvalidArgument( + "The index should be 0, index is %d", index)); PADDLE_ENFORCE_GE(plug_alloc_size, size, platform::errors::InvalidArgument( "The size of memory (%d) to free exceeds the size of " diff --git a/paddle/fluid/memory/detail/system_allocator.h b/paddle/fluid/memory/detail/system_allocator.h index f6ff6282a61..18c2e278f99 100644 --- a/paddle/fluid/memory/detail/system_allocator.h +++ b/paddle/fluid/memory/detail/system_allocator.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include // for size_t + #include namespace paddle { diff --git a/paddle/fluid/memory/get_base_ptr_test.cu b/paddle/fluid/memory/get_base_ptr_test.cu index 188d2f5f420..c8928bda0c9 100644 --- a/paddle/fluid/memory/get_base_ptr_test.cu +++ b/paddle/fluid/memory/get_base_ptr_test.cu @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "gtest/gtest.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" diff --git a/paddle/fluid/memory/malloc.h b/paddle/fluid/memory/malloc.h index 796bdcf0ec2..a7d0fa9781f 100644 --- a/paddle/fluid/memory/malloc.h +++ b/paddle/fluid/memory/malloc.h @@ -24,9 +24,9 @@ limitations under the License. */ namespace paddle { namespace memory { -using phi::Allocation; -using allocation::Allocator; using allocation::AllocationPtr; +using allocation::Allocator; +using phi::Allocation; extern std::shared_ptr AllocShared(const platform::Place& place, size_t size); diff --git a/paddle/fluid/memory/memory_stats_test.cc b/paddle/fluid/memory/memory_stats_test.cc index b2fc602e401..081f0d3d78c 100644 --- a/paddle/fluid/memory/memory_stats_test.cc +++ b/paddle/fluid/memory/memory_stats_test.cc @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/memory/memory.h" #include #include + #include "gtest/gtest.h" +#include "paddle/fluid/memory/memory.h" namespace paddle { namespace memory { diff --git a/paddle/fluid/memory/pinned_memory_test.cu b/paddle/fluid/memory/pinned_memory_test.cu index 837c964e2ad..e5958615d01 100644 --- a/paddle/fluid/memory/pinned_memory_test.cu +++ b/paddle/fluid/memory/pinned_memory_test.cu @@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include + #include #include "paddle/fluid/memory/detail/memory_block.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memory.h" - #include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/memory/stats.h b/paddle/fluid/memory/stats.h index bb6a3cca664..a30ee161e1c 100644 --- a/paddle/fluid/memory/stats.h +++ b/paddle/fluid/memory/stats.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/errors.h" @@ -149,15 +150,16 @@ void HostMemoryStatUpdate(const std::string& stat_type, int dev_id, #define DEVICE_MEMORY_STAT_UPDATE(item, id, increment) \ DEVICE_MEMORY_STAT_FUNC(item, id, Update, increment) -#define HOST_MEMORY_STAT_FUNC(item, id, func, ...) \ - [&] { \ - PADDLE_ENFORCE_EQ(id, 0, paddle::platform::errors::OutOfRange( \ - "Only support device id 0 for host memory " \ - "stats, not support device id: %d", \ - id)); \ - return paddle::memory::Stat< \ - paddle::memory::HostMemoryStat##item##0>::GetInstance() \ - ->func(__VA_ARGS__); \ +#define HOST_MEMORY_STAT_FUNC(item, id, func, ...) \ + [&] { \ + PADDLE_ENFORCE_EQ(id, 0, \ + paddle::platform::errors::OutOfRange( \ + "Only support device id 0 for host memory " \ + "stats, not support device id: %d", \ + id)); \ + return paddle::memory::Stat< \ + paddle::memory::HostMemoryStat##item##0>::GetInstance() \ + ->func(__VA_ARGS__); \ }() #define HOST_MEMORY_STAT_CURRENT_VALUE(item, id) \ diff --git a/paddle/fluid/memory/stats_test.cc b/paddle/fluid/memory/stats_test.cc index bcaba8e9108..73a6b921ca8 100644 --- a/paddle/fluid/memory/stats_test.cc +++ b/paddle/fluid/memory/stats_test.cc @@ -13,11 +13,13 @@ // limitations under the License. #include "paddle/fluid/memory/stats.h" + #include #include #include #include #include + #include "gtest/gtest.h" namespace paddle { diff --git a/paddle/fluid/memory/stream_safe_cuda_alloc_test.cu b/paddle/fluid/memory/stream_safe_cuda_alloc_test.cu index 3bf873bcfc2..5b5350c34fb 100644 --- a/paddle/fluid/memory/stream_safe_cuda_alloc_test.cu +++ b/paddle/fluid/memory/stream_safe_cuda_alloc_test.cu @@ -25,6 +25,7 @@ #ifdef PADDLE_WITH_CUDA #include #include + #include "paddle/fluid/platform/cuda_graph_with_memory_pool.h" #endif @@ -47,9 +48,9 @@ __global__ void add_kernel(int *x, int *y, int n) { void CheckMemLeak(const platform::CUDAPlace &place) { uint64_t cuda_malloc_size = platform::RecordedGpuMallocSize(place.GetDeviceId()); - ASSERT_EQ(cuda_malloc_size, 0) << "Found " << cuda_malloc_size - << " bytes memory that not released yet," - << " there may be a memory leak problem"; + ASSERT_EQ(cuda_malloc_size, 0) + << "Found " << cuda_malloc_size << " bytes memory that not released yet," + << " there may be a memory leak problem"; } TEST(StreamSafeCUDAAllocInterfaceTest, AllocInterfaceTest) { diff --git a/paddle/fluid/operators/abs_op.cc b/paddle/fluid/operators/abs_op.cc index b9517e1cc86..86b60da341e 100644 --- a/paddle/fluid/operators/abs_op.cc +++ b/paddle/fluid/operators/abs_op.cc @@ -16,6 +16,7 @@ #include #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/fluid/operators/activation_cudnn_op.cu.cc b/paddle/fluid/operators/activation_cudnn_op.cu.cc index b4a97e24cf2..b9d5e5fbe5e 100644 --- a/paddle/fluid/operators/activation_cudnn_op.cu.cc +++ b/paddle/fluid/operators/activation_cudnn_op.cu.cc @@ -20,8 +20,8 @@ namespace paddle { namespace operators { using framework::Tensor; using platform::ActivationDescriptor; -using platform::TensorDescriptor; using platform::CUDADeviceContext; +using platform::TensorDescriptor; #ifdef PADDLE_WITH_HIP #define GPUDNN_ACTIVATION_RELU miopenActivationRELU diff --git a/paddle/fluid/operators/activation_op.cc b/paddle/fluid/operators/activation_op.cc index 6905f3d7954..e500992e1b5 100644 --- a/paddle/fluid/operators/activation_op.cc +++ b/paddle/fluid/operators/activation_op.cc @@ -1454,18 +1454,19 @@ namespace plat = paddle::platform; REGISTER_OPERATOR(KERNEL_TYPE##_grad, ops::ActivationOpGrad, \ ops::ActivationGradOpInplaceInferer); -#define REGISTER_ACTIVATION_CPU_KERNEL(act_type, op_name, functor, \ - grad_functor) \ - REGISTER_OP_CPU_KERNEL( \ - act_type, ops::ActivationKernel>, \ - ops::ActivationKernel>); \ - REGISTER_OP_CPU_KERNEL( \ - act_type##_grad, \ - ops::ActivationGradKernel>, \ - ops::ActivationGradKernel>, \ + ops::ActivationKernel>); \ + REGISTER_OP_CPU_KERNEL( \ + act_type##_grad, \ + ops::ActivationGradKernel>, \ + ops::ActivationGradKernel>); FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_OP); @@ -1781,21 +1782,18 @@ REGISTER_OP_VERSION(hard_shrink) "((x < -threshold) + (x > threshold)); after checkpoint: out = " "x * (((x < -threshold) + (x > threshold)) > 0)")); -REGISTER_OP_VERSION(softplus) - .AddCheckpoint( - R"ROC(add new attributes [beta] and [threshold], and the formula is changed to " +REGISTER_OP_VERSION(softplus).AddCheckpoint( + R"ROC(add new attributes [beta] and [threshold], and the formula is changed to " " softplus(x) = \\frac{1}{beta} * \\log(1 + e^{beta * x}) \\\\ \\text{For numerical" " stability, the implementation reverts to the linear function when: beta * x > threshold.})ROC", - paddle::framework::compatible::OpVersionDesc() - .NewAttr("beta", "The beta value of the new formula", 1.0f) - .NewAttr("threshold", "The threshold value of the new formula", - 20.0f)); - -REGISTER_OP_VERSION(mish) - .AddCheckpoint( - R"ROC(add new attributes [use_mkldnn], and when computing softplus the formula is changed as the new veriosn of softplus)ROC", - paddle::framework::compatible::OpVersionDesc().NewAttr( - "use_mkldnn", "(bool, default false) Only used in mkldnn kernel", - false)); + paddle::framework::compatible::OpVersionDesc() + .NewAttr("beta", "The beta value of the new formula", 1.0f) + .NewAttr("threshold", "The threshold value of the new formula", 20.0f)); + +REGISTER_OP_VERSION(mish).AddCheckpoint( + R"ROC(add new attributes [use_mkldnn], and when computing softplus the formula is changed as the new veriosn of softplus)ROC", + paddle::framework::compatible::OpVersionDesc().NewAttr( + "use_mkldnn", "(bool, default false) Only used in mkldnn kernel", + false)); /* ========================================================================== */ diff --git a/paddle/fluid/operators/activation_op.h b/paddle/fluid/operators/activation_op.h index 5f3916a65e7..81f5e24abfe 100644 --- a/paddle/fluid/operators/activation_op.h +++ b/paddle/fluid/operators/activation_op.h @@ -12,19 +12,20 @@ limitations under the License. */ #pragma once #include + #include +#include #include #include #include #include #include - -#include #ifndef _USE_MATH_DEFINES #define _USE_MATH_DEFINES #endif #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" @@ -362,9 +363,8 @@ struct Relu6GradFunctor : public BaseActivationFunctor { typename dX> void operator()(Device d, X x, Out out, dOut dout, dX dx) const { dx.device(d) = - dout * - ((out > static_cast(0)) * (out < static_cast(threshold))) - .template cast(); + dout * ((out > static_cast(0)) * (out < static_cast(threshold))) + .template cast(); } static constexpr ActBwdOpFwdDeps FwdDeps() { diff --git a/paddle/fluid/operators/activation_op_xpu.cc b/paddle/fluid/operators/activation_op_xpu.cc index e950f952c24..4127e4b1b10 100644 --- a/paddle/fluid/operators/activation_op_xpu.cc +++ b/paddle/fluid/operators/activation_op_xpu.cc @@ -253,8 +253,9 @@ struct XPUHardSwishFunctor : public BaseActivationFunctor { PADDLE_ENFORCE_EQ(threshold, 6.0f, platform::errors::External( "Not support threshold [%f] in XPU", threshold)); - PADDLE_ENFORCE_EQ(scale, 6.0f, platform::errors::External( - "Not support scale [%f] in XPU", scale)); + PADDLE_ENFORCE_EQ( + scale, 6.0f, + platform::errors::External("Not support scale [%f] in XPU", scale)); PADDLE_ENFORCE_EQ( offset, 3.0f, platform::errors::External("Not support offset [%f] in XPU", offset)); @@ -273,8 +274,9 @@ struct XPUHardSwishGradFunctor : public BaseActivationFunctor { PADDLE_ENFORCE_EQ(threshold, 6.0f, platform::errors::External( "Not support threshold [%f] in XPU", threshold)); - PADDLE_ENFORCE_EQ(scale, 6.0f, platform::errors::External( - "Not support scale [%f] in XPU", scale)); + PADDLE_ENFORCE_EQ( + scale, 6.0f, + platform::errors::External("Not support scale [%f] in XPU", scale)); PADDLE_ENFORCE_EQ( offset, 3.0f, platform::errors::External("Not support offset [%f] in XPU", offset)); @@ -377,10 +379,12 @@ struct XPUPowGradFunctor : public BaseActivationFunctor { auto x_dims = phi::vectorize(x->dims()); auto dy_dims = phi::vectorize(dOut->dims()); auto dx_dims = phi::vectorize(dX->dims()); - PADDLE_ENFORCE_EQ(x_dims, dy_dims, platform::errors::PreconditionNotMet( - "x_dims should match dy_dims.")); - PADDLE_ENFORCE_EQ(x_dims, dx_dims, platform::errors::PreconditionNotMet( - "x_dims should match dx_dims.")); + PADDLE_ENFORCE_EQ( + x_dims, dy_dims, + platform::errors::PreconditionNotMet("x_dims should match dy_dims.")); + PADDLE_ENFORCE_EQ( + x_dims, dx_dims, + platform::errors::PreconditionNotMet("x_dims should match dx_dims.")); float pow_factor = ctx.Attr("factor"); auto xpu_context = diff --git a/paddle/fluid/operators/add_position_encoding_op.cc b/paddle/fluid/operators/add_position_encoding_op.cc index e5fcd270eb8..4d2c23e2bb4 100644 --- a/paddle/fluid/operators/add_position_encoding_op.cc +++ b/paddle/fluid/operators/add_position_encoding_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/add_position_encoding_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/addmm_op.cc b/paddle/fluid/operators/addmm_op.cc index 716a2e40179..d0f0a6ae0c6 100644 --- a/paddle/fluid/operators/addmm_op.cc +++ b/paddle/fluid/operators/addmm_op.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/fluid/operators/affine_channel_op.cc b/paddle/fluid/operators/affine_channel_op.cc index 1b584fc5578..cd6798be2b2 100644 --- a/paddle/fluid/operators/affine_channel_op.cc +++ b/paddle/fluid/operators/affine_channel_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/affine_channel_op.cu b/paddle/fluid/operators/affine_channel_op.cu index cf4041f721a..87a71130b85 100644 --- a/paddle/fluid/operators/affine_channel_op.cu +++ b/paddle/fluid/operators/affine_channel_op.cu @@ -81,13 +81,13 @@ class AffineChannelCUDAKernel : public framework::OpKernel { int max_threads = dev_ctx.GetMaxPhysicalThreadCount(); grid = std::min(std::max(max_threads / block, 1), grid); if (layout == framework::DataLayout::kNCHW) { - KeAffineChannelCUDA<<>>( - x_d, scale_d, bias_d, C, HxW, num, y_d); + KeAffineChannelCUDA + <<>>(x_d, scale_d, bias_d, C, HxW, + num, y_d); } else { - KeAffineChannelCUDA<<>>( - x_d, scale_d, bias_d, C, HxW, num, y_d); + KeAffineChannelCUDA + <<>>(x_d, scale_d, bias_d, C, HxW, + num, y_d); } } }; @@ -169,29 +169,29 @@ class AffineChannelGradCUDAKernel : public framework::OpKernel { if (layout == framework::DataLayout::kNCHW) { if (dscale && dbias) { const T* x_d = x->data(); - AffineChannelScaleBiasGradientCUDAKernel< - T, block, framework::DataLayout::kNCHW><<>>( - dy_d, x_d, N, C, HxW, ds_d, db_d); + AffineChannelScaleBiasGradientCUDAKernel + <<>>(dy_d, x_d, N, C, HxW, ds_d, + db_d); } if (dx) { - KeAffineChannelCUDA<<>>( - dy_d, s_d, nullptr, C, HxW, num, dx_d); + KeAffineChannelCUDA + <<>>(dy_d, s_d, nullptr, C, HxW, + num, dx_d); } } else { if (dscale && dbias) { const T* x_d = x->data(); - AffineChannelScaleBiasGradientCUDAKernel< - T, block, framework::DataLayout::kNHWC><<>>( - dy_d, x_d, N, C, HxW, ds_d, db_d); + AffineChannelScaleBiasGradientCUDAKernel + <<>>(dy_d, x_d, N, C, HxW, ds_d, + db_d); } if (dx) { - KeAffineChannelCUDA<<>>( - dy_d, s_d, nullptr, C, HxW, num, dx_d); + KeAffineChannelCUDA + <<>>(dy_d, s_d, nullptr, C, HxW, + num, dx_d); } } } diff --git a/paddle/fluid/operators/affine_channel_op_xpu.cc b/paddle/fluid/operators/affine_channel_op_xpu.cc index db3eedea7ca..4de233b184a 100644 --- a/paddle/fluid/operators/affine_channel_op_xpu.cc +++ b/paddle/fluid/operators/affine_channel_op_xpu.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/affine_grid_cudnn_op.cu.cc b/paddle/fluid/operators/affine_grid_cudnn_op.cu.cc index 31801b14564..6fca4afabd9 100644 --- a/paddle/fluid/operators/affine_grid_cudnn_op.cu.cc +++ b/paddle/fluid/operators/affine_grid_cudnn_op.cu.cc @@ -65,8 +65,9 @@ class CUDNNAffineGridOpKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( platform::dynload::cudnnSpatialTfGridGeneratorForward( handle, cudnn_st_desc, theta_data, output_data), - 0, platform::errors::Fatal("Some errors has occurred " - "during forward computation in cudnn.")); + 0, + platform::errors::Fatal("Some errors has occurred " + "during forward computation in cudnn.")); } }; diff --git a/paddle/fluid/operators/affine_grid_op.cc b/paddle/fluid/operators/affine_grid_op.cc index e311d21bb54..d7a49a965a0 100644 --- a/paddle/fluid/operators/affine_grid_op.cc +++ b/paddle/fluid/operators/affine_grid_op.cc @@ -13,9 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/affine_grid_op.h" + #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" diff --git a/paddle/fluid/operators/affine_grid_op.cu b/paddle/fluid/operators/affine_grid_op.cu index eeb4b3bc8a7..29a540bdc2c 100644 --- a/paddle/fluid/operators/affine_grid_op.cu +++ b/paddle/fluid/operators/affine_grid_op.cu @@ -42,8 +42,8 @@ struct Linspace { auto stream = ctx.cuda_device_context().stream(); int block = 512; int grid = (count + block - 1) / block; - LinspaceKernel<<>>(start, slice, count, - number_data); + LinspaceKernel + <<>>(start, slice, count, number_data); } }; diff --git a/paddle/fluid/operators/affine_grid_op.h b/paddle/fluid/operators/affine_grid_op.h index 21540de2b64..cbf70b9135b 100644 --- a/paddle/fluid/operators/affine_grid_op.h +++ b/paddle/fluid/operators/affine_grid_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/fluid/operators/amp/alloc_float_status_op_npu.cc b/paddle/fluid/operators/amp/alloc_float_status_op_npu.cc index 68f6e3b2f3b..78bacc30161 100644 --- a/paddle/fluid/operators/amp/alloc_float_status_op_npu.cc +++ b/paddle/fluid/operators/amp/alloc_float_status_op_npu.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op.cu b/paddle/fluid/operators/amp/check_finite_and_unscale_op.cu index 2f6977b9e2d..7771902c02b 100644 --- a/paddle/fluid/operators/amp/check_finite_and_unscale_op.cu +++ b/paddle/fluid/operators/amp/check_finite_and_unscale_op.cu @@ -143,10 +143,10 @@ class CheckFiniteAndUnscaleGpuKernel : public framework::OpKernel { int blocks_per_grid = (total_num + elements_per_block - 1) / elements_per_block; VLOG(3) << "launch kernel"; - CheckFiniteAndUnscale< - T, MPDType><<>>( - d_xs, inverse_scale_v, xs_size, d_starts, found_inf_data, d_outs); + CheckFiniteAndUnscale + <<>>(d_xs, inverse_scale_v, xs_size, d_starts, + found_inf_data, d_outs); VLOG(3) << "finish kernel"; } }; diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc index 2862d923076..46572579e08 100644 --- a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc +++ b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc @@ -20,6 +20,7 @@ limitations under the License. */ #include #include #include + #include "gtest/gtest.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc b/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc index 30266d3eec0..1d3e5e5162c 100644 --- a/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc +++ b/paddle/fluid/operators/amp/check_finite_and_unscale_op_xpu.cc @@ -65,13 +65,15 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel { int r = xpu::isfinite(dev_ctx.x_context(), reinterpret_cast(x->data()), is_finite.data(), x->numel()); - PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, platform::errors::External( - "XPU API(isfinite) return wrong " - "value[%d %s]", - r, XPUAPIErrorMsg[r])); - r = xpu::logical_not(dev_ctx.x_context(), reinterpret_cast( - is_finite.data()), - is_finite.data(), x->numel()); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU API(isfinite) return wrong " + "value[%d %s]", + r, XPUAPIErrorMsg[r])); + r = xpu::logical_not( + dev_ctx.x_context(), + reinterpret_cast(is_finite.data()), + is_finite.data(), x->numel()); PADDLE_ENFORCE_EQ( r, XPU_SUCCESS, platform::errors::External("XPU API(logical_not) return wrong " @@ -79,10 +81,11 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel { r, XPUAPIErrorMsg[r])); r = xpu::any(dev_ctx.x_context(), is_finite.data(), found_inf_data, x->numel()); - PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, platform::errors::External( - "XPU API(any) return wrong " - "value[%d %s]", - r, XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU API(any) return wrong " + "value[%d %s]", + r, XPUAPIErrorMsg[r])); if (dev_ctx.x_context()->xpu_stream) { dev_ctx.Wait(); } @@ -106,36 +109,40 @@ class CheckFiniteAndUnscaleXPUKernel : public framework::OpKernel { int r = xpu::cast_v2(dev_ctx.x_context(), reinterpret_cast(x->data()), float_x.data(), x->numel()); - PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, platform::errors::External( - "XPU API(cast_v2) return wrong " - "value[%d %s]", - r, XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU API(cast_v2) return wrong " + "value[%d %s]", + r, XPUAPIErrorMsg[r])); r = xpu::scale(dev_ctx.x_context(), float_x.data(), float_out.data(), x->numel(), false, inverse_scale, 0.0); - PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, platform::errors::External( - "XPU API(scale) return wrong " - "value[%d %s]", - r, XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU API(scale) return wrong " + "value[%d %s]", + r, XPUAPIErrorMsg[r])); r = xpu::cast_v2(dev_ctx.x_context(), float_out.data(), reinterpret_cast(out->data()), out->numel()); - PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, platform::errors::External( - "XPU API(cast_v2) return wrong " - "value[%d %s]", - r, XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU API(cast_v2) return wrong " + "value[%d %s]", + r, XPUAPIErrorMsg[r])); } else { int r = xpu::scale(dev_ctx.x_context(), reinterpret_cast(x->data()), reinterpret_cast(out->data()), x->numel(), false, inverse_scale, 0.0); - PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, platform::errors::External( - "XPU API(scale) return wrong " - "value[%d %s]", - r, XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU API(scale) return wrong " + "value[%d %s]", + r, XPUAPIErrorMsg[r])); } } if (dev_ctx.x_context()->xpu_stream) { diff --git a/paddle/fluid/operators/amp/clear_float_status_op_npu.cc b/paddle/fluid/operators/amp/clear_float_status_op_npu.cc index e5a2d93e32f..c102bd2ba47 100644 --- a/paddle/fluid/operators/amp/clear_float_status_op_npu.cc +++ b/paddle/fluid/operators/amp/clear_float_status_op_npu.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" diff --git a/paddle/fluid/operators/amp/get_float_status_op_npu.cc b/paddle/fluid/operators/amp/get_float_status_op_npu.cc index 8109a1ff43f..0c118761650 100644 --- a/paddle/fluid/operators/amp/get_float_status_op_npu.cc +++ b/paddle/fluid/operators/amp/get_float_status_op_npu.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" diff --git a/paddle/fluid/operators/amp/update_loss_scaling_op.cc b/paddle/fluid/operators/amp/update_loss_scaling_op.cc index 8354650df02..baf742b0b40 100644 --- a/paddle/fluid/operators/amp/update_loss_scaling_op.cc +++ b/paddle/fluid/operators/amp/update_loss_scaling_op.cc @@ -13,9 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/amp/update_loss_scaling_op.h" + #include #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/amp/update_loss_scaling_op.cu b/paddle/fluid/operators/amp/update_loss_scaling_op.cu index 43f8f84578c..81f98643441 100644 --- a/paddle/fluid/operators/amp/update_loss_scaling_op.cu +++ b/paddle/fluid/operators/amp/update_loss_scaling_op.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/amp/update_loss_scaling_op.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/amp/update_loss_scaling_op.h b/paddle/fluid/operators/amp/update_loss_scaling_op.h index 41eb94247f5..f4c6b6f1f7d 100644 --- a/paddle/fluid/operators/amp/update_loss_scaling_op.h +++ b/paddle/fluid/operators/amp/update_loss_scaling_op.h @@ -19,6 +19,7 @@ #endif // PADDLE_WITH_CUDA && __NVCC__ #include #include + #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/amp/fp16_type_traits.h" #include "paddle/fluid/platform/device_context.h" diff --git a/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc b/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc index f9a93a47ff2..da7e23c4620 100644 --- a/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc +++ b/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc @@ -12,11 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/amp/update_loss_scaling_op.h" #include #include + #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/amp/update_loss_scaling_op.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" DECLARE_int32(min_loss_scaling); diff --git a/paddle/fluid/operators/amp/update_loss_scaling_op_xpu.cc b/paddle/fluid/operators/amp/update_loss_scaling_op_xpu.cc index fe03d93f448..8f57e00fe11 100644 --- a/paddle/fluid/operators/amp/update_loss_scaling_op_xpu.cc +++ b/paddle/fluid/operators/amp/update_loss_scaling_op_xpu.cc @@ -13,12 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/amp/update_loss_scaling_op.h" #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/amp/fp16_type_traits.h" +#include "paddle/fluid/operators/amp/update_loss_scaling_op.h" #include "paddle/fluid/platform/float16.h" namespace paddle { @@ -59,10 +60,11 @@ class UpdateLossScalingXPUKernel : public framework::OpKernel { r = xpu::constant(dev_ctx.x_context(), reinterpret_cast(out_data), num, XPUTyp(0.0)); - PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, platform::errors::External( - "XPU API(constant) return wrong " - "value[%d %s]", - r, XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU API(constant) return wrong " + "value[%d %s]", + r, XPUAPIErrorMsg[r])); } } const bool stop_update = ctx.Attr("stop_update"); diff --git a/paddle/fluid/operators/angle_op.h b/paddle/fluid/operators/angle_op.h index 116a8053db3..ace345465dc 100644 --- a/paddle/fluid/operators/angle_op.h +++ b/paddle/fluid/operators/angle_op.h @@ -17,11 +17,11 @@ #define _USE_MATH_DEFINES #endif #include -#include "paddle/phi/kernels/funcs/complex_functors.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/for_range.h" +#include "paddle/phi/kernels/funcs/complex_functors.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/arg_max_op.cc b/paddle/fluid/operators/arg_max_op.cc index c5e4188ca2d..63fd27a1edf 100644 --- a/paddle/fluid/operators/arg_max_op.cc +++ b/paddle/fluid/operators/arg_max_op.cc @@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/operators/arg_min_max_op_base.h" - -#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/unary.h" @@ -28,20 +27,18 @@ REGISTER_OPERATOR( paddle::framework::EmptyGradOpMaker, ArgMaxInferShapeFunctor); -REGISTER_OP_VERSION(arg_max) - .AddCheckpoint( - R"ROC( +REGISTER_OP_VERSION(arg_max).AddCheckpoint( + R"ROC( Upgrade argmax add a new attribute [flatten] and modify the attribute of dtype)ROC", - paddle::framework::compatible::OpVersionDesc() - .NewAttr("flatten", - "In order to compute the argmax over the flattened array " - "when the " - "argument `axis` in python API is None.", - false) - .ModifyAttr( - "dtype", - "Change the default value of dtype from -1 to 3" - ", means return the int64 indices directly. The rearse why " - "changing the default value is that the int64 value in " - "VarType is 3 in the frameworke.proto.", - 3)); + paddle::framework::compatible::OpVersionDesc() + .NewAttr("flatten", + "In order to compute the argmax over the flattened array " + "when the " + "argument `axis` in python API is None.", + false) + .ModifyAttr("dtype", + "Change the default value of dtype from -1 to 3" + ", means return the int64 indices directly. The rearse why " + "changing the default value is that the int64 value in " + "VarType is 3 in the frameworke.proto.", + 3)); diff --git a/paddle/fluid/operators/arg_min_max_op_base.h b/paddle/fluid/operators/arg_min_max_op_base.h index 585341beea1..194a3070bf6 100644 --- a/paddle/fluid/operators/arg_min_max_op_base.h +++ b/paddle/fluid/operators/arg_min_max_op_base.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/arg_min_op.cc b/paddle/fluid/operators/arg_min_op.cc index fb3abd01af8..c995d56cf6b 100644 --- a/paddle/fluid/operators/arg_min_op.cc +++ b/paddle/fluid/operators/arg_min_op.cc @@ -27,20 +27,18 @@ REGISTER_OPERATOR( paddle::framework::EmptyGradOpMaker, ArgMinInferShapeFunctor); -REGISTER_OP_VERSION(arg_min) - .AddCheckpoint( - R"ROC( +REGISTER_OP_VERSION(arg_min).AddCheckpoint( + R"ROC( Upgrade argmin add a new attribute [flatten] and modify the attribute of dtype)ROC", - paddle::framework::compatible::OpVersionDesc() - .NewAttr("flatten", - "In order to compute the argmin over the flattened array " - "when the " - "argument `axis` in python API is None.", - false) - .ModifyAttr( - "dtype", - "Change the default value of dtype from -1 to 3" - ", means return the int64 indices directly. The rearse why " - "changing the default value is that the int64 value in " - "VarType is 3 in the frameworke.proto.", - 3)); + paddle::framework::compatible::OpVersionDesc() + .NewAttr("flatten", + "In order to compute the argmin over the flattened array " + "when the " + "argument `axis` in python API is None.", + false) + .ModifyAttr("dtype", + "Change the default value of dtype from -1 to 3" + ", means return the int64 indices directly. The rearse why " + "changing the default value is that the int64 value in " + "VarType is 3 in the frameworke.proto.", + 3)); diff --git a/paddle/fluid/operators/array_operator.h b/paddle/fluid/operators/array_operator.h index af44a77c813..0cc3b695aef 100644 --- a/paddle/fluid/operators/array_operator.h +++ b/paddle/fluid/operators/array_operator.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device_context.h" diff --git a/paddle/fluid/operators/array_to_lod_tensor_op.cc b/paddle/fluid/operators/array_to_lod_tensor_op.cc index 1db3592b1cf..f0824695a06 100644 --- a/paddle/fluid/operators/array_to_lod_tensor_op.cc +++ b/paddle/fluid/operators/array_to_lod_tensor_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/phi/core/lod_utils.h" diff --git a/paddle/fluid/operators/ascend_trigger_op.h b/paddle/fluid/operators/ascend_trigger_op.h index eaa79da2ba8..d1eaa00c2a3 100644 --- a/paddle/fluid/operators/ascend_trigger_op.h +++ b/paddle/fluid/operators/ascend_trigger_op.h @@ -15,6 +15,7 @@ #pragma once #include #include + #include "paddle/fluid/framework/op_registry.h" #ifdef PADDLE_WITH_ASCEND #include "paddle/fluid/framework/fleet/ascend_wrapper.h" diff --git a/paddle/fluid/operators/assign_op_xpu.cc b/paddle/fluid/operators/assign_op_xpu.cc index b95be3096f0..7d03982f6ad 100644 --- a/paddle/fluid/operators/assign_op_xpu.cc +++ b/paddle/fluid/operators/assign_op_xpu.cc @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/assign_op.h" - #include +#include "paddle/fluid/operators/assign_op.h" + namespace paddle { namespace framework { class OpDesc; diff --git a/paddle/fluid/operators/attention_lstm_op.cc b/paddle/fluid/operators/attention_lstm_op.cc index bf7d609370a..22db7d9e982 100644 --- a/paddle/fluid/operators/attention_lstm_op.cc +++ b/paddle/fluid/operators/attention_lstm_op.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/attention_lstm_op.h" + #include + #include "paddle/fluid/platform/cpu_info.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/cpu_vec.h" @@ -62,8 +64,9 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { "LSTMWeight dims should be (%d + %d) * %d.", D, M, 4 * D)); auto b_dims = ctx->GetInputDim("LSTMBias"); - PADDLE_ENFORCE_EQ(b_dims.size(), 2, platform::errors::InvalidArgument( - "Input(LSTMBias)'s rank must be 2.")); + PADDLE_ENFORCE_EQ( + b_dims.size(), 2, + platform::errors::InvalidArgument("Input(LSTMBias)'s rank must be 2.")); PADDLE_ENFORCE_EQ(b_dims[0], 1, platform::errors::InvalidArgument( "LSTMBias dims should be 1 x %d.", 4 * D)); @@ -72,11 +75,13 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { "LSTMBias dims should be 1 x %d.", 4 * D)); auto c_dims = ctx->GetInputDim("C0"); - PADDLE_ENFORCE_EQ(c_dims.size(), 2, platform::errors::InvalidArgument( - "Input(C0)'s rank must be 2.")); + PADDLE_ENFORCE_EQ( + c_dims.size(), 2, + platform::errors::InvalidArgument("Input(C0)'s rank must be 2.")); if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ(c_dims[1], D, platform::errors::InvalidArgument( - "C0 dims should be N x %d.", D)); + PADDLE_ENFORCE_EQ( + c_dims[1], D, + platform::errors::InvalidArgument("C0 dims should be N x %d.", D)); } if (ctx->HasInput("H0")) { @@ -126,10 +131,12 @@ void AttentionLSTMOp::InferShape(framework::InferShapeContext* ctx) const { PADDLE_ENFORCE_EQ(dims.size(), 2, platform::errors::InvalidArgument( "Input(AttentionScalar)'s rank must be 2.")); - PADDLE_ENFORCE_EQ(dims[0], 1, platform::errors::InvalidArgument( - "AttentionScalar shapes must be 1 * 1.")); - PADDLE_ENFORCE_EQ(dims[1], 1, platform::errors::InvalidArgument( - "AttentionScalar shapes must be 1 * 1.")); + PADDLE_ENFORCE_EQ(dims[0], 1, + platform::errors::InvalidArgument( + "AttentionScalar shapes must be 1 * 1.")); + PADDLE_ENFORCE_EQ(dims[1], 1, + platform::errors::InvalidArgument( + "AttentionScalar shapes must be 1 * 1.")); } if (ctx->HasInput("AttentionScalarBias")) { @@ -332,14 +339,15 @@ class AttentionLSTMKernel : public framework::OpKernel { int len = x_lod[0][i + 1] - x_lod[0][i]; max_seq_len = max_seq_len < len ? len : max_seq_len; } - PADDLE_ENFORCE_EQ(x_lod.size(), 1UL, platform::errors::InvalidArgument( - "Input(X)'s lod size must be 1.")); + PADDLE_ENFORCE_EQ( + x_lod.size(), 1UL, + platform::errors::InvalidArgument("Input(X)'s lod size must be 1.")); PADDLE_ENFORCE_EQ( c0->dims()[0], N, platform::errors::InvalidArgument("C0 dims should be %d x %d.", N, D)); fc_out->Resize({max_seq_len, 1}); - std::function act_gate, act_cell, act_cand; + std::function act_gate, act_cell, act_cand; auto& act_gate_str = ctx.Attr("gate_activation"); auto& act_cell_str = ctx.Attr("cell_activation"); auto& act_cand_str = ctx.Attr("candidate_activation"); diff --git a/paddle/fluid/operators/average_accumulates_op.h b/paddle/fluid/operators/average_accumulates_op.h index 289dda56b19..de6eca3903f 100644 --- a/paddle/fluid/operators/average_accumulates_op.h +++ b/paddle/fluid/operators/average_accumulates_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/batch_fc_op.cc b/paddle/fluid/operators/batch_fc_op.cc index 952625bcb6e..2d2deae69a7 100644 --- a/paddle/fluid/operators/batch_fc_op.cc +++ b/paddle/fluid/operators/batch_fc_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/batch_fc_op.h" + #include namespace paddle { @@ -42,8 +43,9 @@ class BatchFCOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(input_dims.size(), 3, platform::errors::InvalidArgument( "Input of BatchFCOp should have 3D.")); - PADDLE_ENFORCE_EQ(w_dims.size(), 3, platform::errors::InvalidArgument( - "W of BatchFCOp should have 3D.")); + PADDLE_ENFORCE_EQ( + w_dims.size(), 3, + platform::errors::InvalidArgument("W of BatchFCOp should have 3D.")); PADDLE_ENFORCE_EQ( input_dims[0], w_dims[0], platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/batch_fc_op.cu b/paddle/fluid/operators/batch_fc_op.cu index ddedf0172be..5843acb4fdd 100644 --- a/paddle/fluid/operators/batch_fc_op.cu +++ b/paddle/fluid/operators/batch_fc_op.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/operators/batch_fc_op.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" diff --git a/paddle/fluid/operators/batch_norm_op.cc b/paddle/fluid/operators/batch_norm_op.cc index 2663a081011..67384338d76 100644 --- a/paddle/fluid/operators/batch_norm_op.cc +++ b/paddle/fluid/operators/batch_norm_op.cc @@ -13,9 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/batch_norm_op.h" + #include #include #include + #include "paddle/fluid/framework/data_layout.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" @@ -167,10 +169,11 @@ framework::OpKernelType BatchNormOp::GetExpectedKernelType( bn_param_type, framework::TransToProtoVarType(ctx.Input("Mean")->dtype()), platform::errors::InvalidArgument("Mean input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( - ctx.Input("Variance")->dtype()), - platform::errors::InvalidArgument( - "Variance input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType(ctx.Input("Variance")->dtype()), + platform::errors::InvalidArgument( + "Variance input should be of float type")); // TODO(pzelazko-intel): enable MKLDNN layout when it's ready framework::LibraryType library = framework::LibraryType::kPlain; diff --git a/paddle/fluid/operators/batch_norm_op.h b/paddle/fluid/operators/batch_norm_op.h index d274e8d2c00..b82b49e5cd5 100644 --- a/paddle/fluid/operators/batch_norm_op.h +++ b/paddle/fluid/operators/batch_norm_op.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/layout_utils.h" diff --git a/paddle/fluid/operators/batch_norm_op_mlu.cc b/paddle/fluid/operators/batch_norm_op_mlu.cc index 6507890a8b5..6dff315aa6a 100644 --- a/paddle/fluid/operators/batch_norm_op_mlu.cc +++ b/paddle/fluid/operators/batch_norm_op_mlu.cc @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/batch_norm_op.h" #include "paddle/fluid/operators/amp/fp16_type_traits.h" +#include "paddle/fluid/operators/batch_norm_op.h" #include "paddle/fluid/operators/mlu/mlu_baseop.h" namespace paddle { diff --git a/paddle/fluid/operators/batch_norm_op_npu.cc b/paddle/fluid/operators/batch_norm_op_npu.cc index ae03ecbcb16..725b7f3848f 100644 --- a/paddle/fluid/operators/batch_norm_op_npu.cc +++ b/paddle/fluid/operators/batch_norm_op_npu.cc @@ -113,8 +113,9 @@ class NPUBatchNormOpKernel : public framework::OpKernel { runner_reduce.Run(stream); const auto &runner_update = NpuOpRunner( - "BNTrainingUpdate", {x_tensor, sum, square_sum, *scale, *bias, - *running_mean, *running_var}, + "BNTrainingUpdate", + {x_tensor, sum, square_sum, *scale, *bias, *running_mean, + *running_var}, {y_tesnor, *mean_out, *variance_out, *saved_mean, *saved_variance}, {{"factor", momentum}, {"epsilon", epsilon}}); runner_update.Run(stream); @@ -216,10 +217,11 @@ class NPUBatchNormGradOpKernel : public framework::OpKernel { {dx_tensor}, {{"epsilon", epsilon}}); runner_infer.Run(stream); } else { - const auto &runner_reduce = NpuOpRunner( - "BNTrainingReduceGrad", {dy_tensor, x_tensor, *d_scale, *d_bias, - *scale, *saved_mean, *saved_inv_variance}, - {dx_tensor}, {{"epsilon", epsilon}}); + const auto &runner_reduce = + NpuOpRunner("BNTrainingReduceGrad", + {dy_tensor, x_tensor, *d_scale, *d_bias, *scale, + *saved_mean, *saved_inv_variance}, + {dx_tensor}, {{"epsilon", epsilon}}); runner_reduce.Run(stream); } } diff --git a/paddle/fluid/operators/batch_norm_op_xpu.cc b/paddle/fluid/operators/batch_norm_op_xpu.cc index 0893324c602..3ade2f36ad8 100644 --- a/paddle/fluid/operators/batch_norm_op_xpu.cc +++ b/paddle/fluid/operators/batch_norm_op_xpu.cc @@ -13,10 +13,11 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/batch_norm_op.h" #include #include +#include "paddle/fluid/operators/batch_norm_op.h" + namespace paddle { namespace operators { @@ -128,8 +129,9 @@ static int calculate_inv_BN_Y(xpu::Context *ctx, T *x, const T *scale, const T *bias, const T *mean, const T *variance, const int N, const int C, const int M, const T *y) { - PADDLE_ENFORCE_EQ(x, y, platform::errors::InvalidArgument( - "X and Y should be inplaced in inplace mode")); + PADDLE_ENFORCE_EQ(x, y, + platform::errors::InvalidArgument( + "X and Y should be inplaced in inplace mode")); std::vector tensor_shape_vec({N, C, M}); std::vector array_shape_vec({1, C, 1}); // y - bias @@ -207,8 +209,9 @@ class BatchNormGradXPUKernel : public framework::OpKernel { is_inplace = false; if (d_x) { PADDLE_ENFORCE_NE( - d_x, d_y, platform::errors::InvalidArgument( - "X@GRAD and Y@GRAD inplaced in non-inplace mode")); + d_x, d_y, + platform::errors::InvalidArgument( + "X@GRAD and Y@GRAD inplaced in non-inplace mode")); } } @@ -275,11 +278,12 @@ class BatchNormGradXPUKernel : public framework::OpKernel { int r1 = calculate_inv_var(dev_ctx.x_context(), global_var->data(), epsilon, C, epsilon_data, global_inv_std_data); - PADDLE_ENFORCE_EQ(r1, XPU_SUCCESS, platform::errors::External( - "XPU API(batch_norm_grad " - "calculate_inv_var function) " - "return wrong value[%d %s]", - r1, XPUAPIErrorMsg[r1])); + PADDLE_ENFORCE_EQ( + r1, XPU_SUCCESS, + platform::errors::External("XPU API(batch_norm_grad " + "calculate_inv_var function) " + "return wrong value[%d %s]", + r1, XPUAPIErrorMsg[r1])); } auto px = *x; auto *inv_std_data = @@ -290,11 +294,12 @@ class BatchNormGradXPUKernel : public framework::OpKernel { dev_ctx.x_context(), px.mutable_data(ctx.GetPlace()), scale->data(), bias->data(), mean_data, inv_std_data, N, C, H * W, x->data()); - PADDLE_ENFORCE_EQ(r2, XPU_SUCCESS, platform::errors::External( - "XPU API(batch_norm_grad " - "calculate_inv_BN_Y function) " - "return wrong value[%d %s]", - r2, XPUAPIErrorMsg[r2])); + PADDLE_ENFORCE_EQ( + r2, XPU_SUCCESS, + platform::errors::External("XPU API(batch_norm_grad " + "calculate_inv_BN_Y function) " + "return wrong value[%d %s]", + r2, XPUAPIErrorMsg[r2])); } int r3; @@ -319,10 +324,11 @@ class BatchNormGradXPUKernel : public framework::OpKernel { scale_data, batch_mean->data(), batch_inv_std->data(), d_scale_data, d_bias_data, is_nchw); } - PADDLE_ENFORCE_EQ(r3, XPU_SUCCESS, platform::errors::External( - "XPU API(batch_norm_grad) return " - "wrong value[%d %s]", - r3, XPUAPIErrorMsg[r3])); + PADDLE_ENFORCE_EQ( + r3, XPU_SUCCESS, + platform::errors::External("XPU API(batch_norm_grad) return " + "wrong value[%d %s]", + r3, XPUAPIErrorMsg[r3])); } }; diff --git a/paddle/fluid/operators/batch_size_like.h b/paddle/fluid/operators/batch_size_like.h index facb4cd8254..1cc6e364677 100644 --- a/paddle/fluid/operators/batch_size_like.h +++ b/paddle/fluid/operators/batch_size_like.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include #include + #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/beam_search_decode_op.cc b/paddle/fluid/operators/beam_search_decode_op.cc index 3fae65c5017..0e3e32666a8 100644 --- a/paddle/fluid/operators/beam_search_decode_op.cc +++ b/paddle/fluid/operators/beam_search_decode_op.cc @@ -12,10 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/beam_search_decode_op.h" + #include #include "paddle/fluid/framework/convert_utils.h" -#include "paddle/fluid/operators/beam_search_decode_op.h" #include "paddle/fluid/platform/device_context.h" namespace paddle { diff --git a/paddle/fluid/operators/beam_search_decode_op_test.cc b/paddle/fluid/operators/beam_search_decode_op_test.cc index cf32e407424..6f70136b2d2 100644 --- a/paddle/fluid/operators/beam_search_decode_op_test.cc +++ b/paddle/fluid/operators/beam_search_decode_op_test.cc @@ -103,11 +103,9 @@ TEST(BeamSearchDecodeOp, Backtrace) { std::vector{1, 1, 3, 5}, &ids, &scores); paddle::test::GenerateExample( std::vector{0, 2, 4}, - std::vector{0, 0, 0, 2, - 2}, // the branchs of the first source sentence - // are pruned since finished - std::vector{5, 1}, - &ids, &scores); + std::vector{0, 0, 0, 2, 2}, // the branchs of the first source + // sentence are pruned since finished + std::vector{5, 1}, &ids, &scores); ASSERT_EQ(ids.size(), 5UL); ASSERT_EQ(scores.size(), 5UL); diff --git a/paddle/fluid/operators/beam_search_op.cc b/paddle/fluid/operators/beam_search_op.cc index 887d28f5875..90b6359f447 100644 --- a/paddle/fluid/operators/beam_search_op.cc +++ b/paddle/fluid/operators/beam_search_op.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/beam_search_op.cu.cc b/paddle/fluid/operators/beam_search_op.cu.cc index 4ef9476eee5..15aca070221 100644 --- a/paddle/fluid/operators/beam_search_op.cu.cc +++ b/paddle/fluid/operators/beam_search_op.cu.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/beam_search_op.h" + #include "paddle/fluid/framework/op_registry.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/beam_search_op_npu.cc b/paddle/fluid/operators/beam_search_op_npu.cc index cae3d0e55fc..f5fa0ac026d 100644 --- a/paddle/fluid/operators/beam_search_op_npu.cc +++ b/paddle/fluid/operators/beam_search_op_npu.cc @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/beam_search_op.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/beam_search_op.h" namespace ops = paddle::operators; REGISTER_OP_NPU_KERNEL( diff --git a/paddle/fluid/operators/benchmark/op_tester.cc b/paddle/fluid/operators/benchmark/op_tester.cc index 4b1593b1f8b..fc01eef8058 100644 --- a/paddle/fluid/operators/benchmark/op_tester.cc +++ b/paddle/fluid/operators/benchmark/op_tester.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/benchmark/op_tester.h" + #include + #include "gflags/gflags.h" #include "gtest/gtest.h" #include "paddle/fluid/framework/op_info.h" diff --git a/paddle/fluid/operators/benchmark/op_tester.h b/paddle/fluid/operators/benchmark/op_tester.h index 6acd42c8675..217fbe2653e 100644 --- a/paddle/fluid/operators/benchmark/op_tester.h +++ b/paddle/fluid/operators/benchmark/op_tester.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/benchmark/op_tester_config.h" diff --git a/paddle/fluid/operators/benchmark/op_tester_config.cc b/paddle/fluid/operators/benchmark/op_tester_config.cc index e9477798858..d7a055ede1b 100644 --- a/paddle/fluid/operators/benchmark/op_tester_config.cc +++ b/paddle/fluid/operators/benchmark/op_tester_config.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/benchmark/op_tester_config.h" + #include + #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/fluid/operators/bilateral_slice_op.cc b/paddle/fluid/operators/bilateral_slice_op.cc index 675566504c2..124441093d3 100644 --- a/paddle/fluid/operators/bilateral_slice_op.cc +++ b/paddle/fluid/operators/bilateral_slice_op.cc @@ -10,9 +10,11 @@ limitations under the License. */ #include "paddle/fluid/operators/bilateral_slice_op.h" + #include #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/bilateral_slice_op.cu b/paddle/fluid/operators/bilateral_slice_op.cu index e7bf6d212dc..f20debdf0b8 100644 --- a/paddle/fluid/operators/bilateral_slice_op.cu +++ b/paddle/fluid/operators/bilateral_slice_op.cu @@ -11,6 +11,7 @@ #include #include + #include "paddle/fluid/operators/bilateral_slice_op.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" @@ -167,11 +168,11 @@ class BilateralSliceOpCUDAKernel : public framework::OpKernel { platform::GpuLaunchConfig config = platform::GetGpuLaunchConfig1D(ctx.cuda_device_context(), total_count); - BilateralSliceCudaForwardKernel< - T><<>>( - output_data, grid_data, guide_data, input_data, grid_sizes, has_offset, - total_count, output_dims[1]); + BilateralSliceCudaForwardKernel + <<>>( + output_data, grid_data, guide_data, input_data, grid_sizes, + has_offset, total_count, output_dims[1]); } }; @@ -475,29 +476,29 @@ class BilateralSliceGradOpCUDAKernel : public framework::OpKernel { platform::GpuLaunchConfig config = platform::GetGpuLaunchConfig1D(ctx.cuda_device_context(), grid_count); - BilateralSliceCudaGridGradKernel< - T><<>>( - grid_grad_data, output_grad_data, guide_data, input_data, grid_sizes, - has_offset, grid_count, output_chans); + BilateralSliceCudaGridGradKernel + <<>>( + grid_grad_data, output_grad_data, guide_data, input_data, + grid_sizes, has_offset, grid_count, output_chans); config = platform::GetGpuLaunchConfig1D(ctx.cuda_device_context(), guide_count); - BilateralSliceCudaGuideGradKernel< - T><<>>( - guide_grad_data, output_grad_data, grid_data, guide_data, input_data, - grid_sizes, has_offset, guide_count, output_chans); + BilateralSliceCudaGuideGradKernel + <<>>( + guide_grad_data, output_grad_data, grid_data, guide_data, + input_data, grid_sizes, has_offset, guide_count, output_chans); config = platform::GetGpuLaunchConfig1D(ctx.cuda_device_context(), input_count); - BilateralSliceCudaInputGradKernel< - T><<>>( - input_grad_data, output_grad_data, grid_data, guide_data, grid_sizes, - has_offset, input_count, output_chans); + BilateralSliceCudaInputGradKernel + <<>>( + input_grad_data, output_grad_data, grid_data, guide_data, + grid_sizes, has_offset, input_count, output_chans); } }; diff --git a/paddle/fluid/operators/bilateral_slice_op.h b/paddle/fluid/operators/bilateral_slice_op.h index a388f4763ec..66783f151ea 100644 --- a/paddle/fluid/operators/bilateral_slice_op.h +++ b/paddle/fluid/operators/bilateral_slice_op.h @@ -12,6 +12,7 @@ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/hostdevice.h" diff --git a/paddle/fluid/operators/bmm_op.cc b/paddle/fluid/operators/bmm_op.cc index 6b5f4755d77..16066c1a13e 100644 --- a/paddle/fluid/operators/bmm_op.cc +++ b/paddle/fluid/operators/bmm_op.cc @@ -13,6 +13,7 @@ * limitations under the License. */ #include "paddle/fluid/operators/bmm_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/bmm_op.h b/paddle/fluid/operators/bmm_op.h index 3fecb55caae..271a74a4444 100644 --- a/paddle/fluid/operators/bmm_op.h +++ b/paddle/fluid/operators/bmm_op.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/fluid/operators/bmm_op_xpu.cc b/paddle/fluid/operators/bmm_op_xpu.cc index cc185580279..348f25d46b4 100644 --- a/paddle/fluid/operators/bmm_op_xpu.cc +++ b/paddle/fluid/operators/bmm_op_xpu.cc @@ -16,8 +16,8 @@ #include #include -#include "paddle/fluid/operators/matmul_v2_op.h" +#include "paddle/fluid/operators/matmul_v2_op.h" #include "paddle/fluid/operators/xpu_api_wrapper.h" #include "paddle/fluid/platform/device/device_wrapper.h" diff --git a/paddle/fluid/operators/bpr_loss_op.cc b/paddle/fluid/operators/bpr_loss_op.cc index bbe4bb08adf..afa7aee4450 100644 --- a/paddle/fluid/operators/bpr_loss_op.cc +++ b/paddle/fluid/operators/bpr_loss_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/bpr_loss_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/bpr_loss_op.h b/paddle/fluid/operators/bpr_loss_op.h index 993bc0fccf0..fd6df2c1594 100644 --- a/paddle/fluid/operators/bpr_loss_op.h +++ b/paddle/fluid/operators/bpr_loss_op.h @@ -61,8 +61,9 @@ class BprLossOpKernel : public framework::OpKernel { const int64_t* label_data = labels->data(); for (int i = 0; i < step_size; ++i) { int lbl_pos = label_data[i]; - PADDLE_ENFORCE_GE(lbl_pos, 0, platform::errors::InvalidArgument( - "label data %d is illegal.", lbl_pos)); + PADDLE_ENFORCE_GE(lbl_pos, 0, + platform::errors::InvalidArgument( + "label data %d is illegal.", lbl_pos)); PADDLE_ENFORCE_LT(lbl_pos, class_num, platform::errors::InvalidArgument( "label data %d is illegal.", lbl_pos)); diff --git a/paddle/fluid/operators/broadcast_tensors_op.cc b/paddle/fluid/operators/broadcast_tensors_op.cc index 1063a8b7992..53146417f21 100644 --- a/paddle/fluid/operators/broadcast_tensors_op.cc +++ b/paddle/fluid/operators/broadcast_tensors_op.cc @@ -20,8 +20,8 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::Tensor; using framework::DDim; +using framework::Tensor; class BroadcastTensorsOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/cast_op.cc b/paddle/fluid/operators/cast_op.cc index 76e0f23df21..f0146994c1f 100644 --- a/paddle/fluid/operators/cast_op.cc +++ b/paddle/fluid/operators/cast_op.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/cast_op.h" + #include + #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/float16.h" diff --git a/paddle/fluid/operators/cast_op.h b/paddle/fluid/operators/cast_op.h index 034cb47fab1..2f222d23e7c 100644 --- a/paddle/fluid/operators/cast_op.h +++ b/paddle/fluid/operators/cast_op.h @@ -17,7 +17,6 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/transform.h" - #include "paddle/phi/api/lib/utils/tensor_utils.h" #include "paddle/phi/kernels/cast_kernel.h" diff --git a/paddle/fluid/operators/cast_op_xpu.cc b/paddle/fluid/operators/cast_op_xpu.cc index 64324d9772b..8551d799cc3 100644 --- a/paddle/fluid/operators/cast_op_xpu.cc +++ b/paddle/fluid/operators/cast_op_xpu.cc @@ -19,9 +19,8 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/cast_op.h" #include "paddle/fluid/platform/float16.h" -#include "xpu/refactor/math.h" - #include "paddle/phi/kernels/cast_kernel.h" +#include "xpu/refactor/math.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/center_loss_op.cc b/paddle/fluid/operators/center_loss_op.cc index cd1aa9d9c84..add0bf966d9 100644 --- a/paddle/fluid/operators/center_loss_op.cc +++ b/paddle/fluid/operators/center_loss_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/center_loss_op.h" + #include #include diff --git a/paddle/fluid/operators/center_loss_op.cu b/paddle/fluid/operators/center_loss_op.cu index 549bb5ae75a..b46feeae64b 100644 --- a/paddle/fluid/operators/center_loss_op.cu +++ b/paddle/fluid/operators/center_loss_op.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/operators/center_loss_op.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" diff --git a/paddle/fluid/operators/center_loss_op.h b/paddle/fluid/operators/center_loss_op.h index ed266e9ac7d..18769fed37b 100644 --- a/paddle/fluid/operators/center_loss_op.h +++ b/paddle/fluid/operators/center_loss_op.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/transform.h" diff --git a/paddle/fluid/operators/chunk_eval_op.cc b/paddle/fluid/operators/chunk_eval_op.cc index dfb0ad96b0b..83bdaa2de7d 100644 --- a/paddle/fluid/operators/chunk_eval_op.cc +++ b/paddle/fluid/operators/chunk_eval_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/chunk_eval_op.h" + #include #include @@ -55,11 +56,12 @@ class ChunkEvalOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( (inference_dim.size() == 3 && inference_dim[2] == 1) || inference_dim.size() == 2, - true, platform::errors::InvalidArgument( - "when Input(SeqLength) is provided, Input(Inference) " - "should be of dim 3 (batch_size, bucket, 1) or dim 2 " - "(batch_size, bucket), but received [%s].", - inference_dim)); + true, + platform::errors::InvalidArgument( + "when Input(SeqLength) is provided, Input(Inference) " + "should be of dim 3 (batch_size, bucket, 1) or dim 2 " + "(batch_size, bucket), but received [%s].", + inference_dim)); auto seq_length_dim = ctx->GetInputDim("SeqLength"); PADDLE_ENFORCE_LE(seq_length_dim.size(), 2, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/cinn/cinn_instruction_run_op.cc b/paddle/fluid/operators/cinn/cinn_instruction_run_op.cc index 0903c53e5ec..be9829dd43b 100644 --- a/paddle/fluid/operators/cinn/cinn_instruction_run_op.cc +++ b/paddle/fluid/operators/cinn/cinn_instruction_run_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/cinn/cinn_instruction_run_op.h" + #include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h" #include "paddle/fluid/operators/cinn/cinn_launch_context.h" #include "paddle/fluid/platform/enforce.h" @@ -48,12 +49,12 @@ class CinnInstructionRunOp : public framework::OperatorWithKernel { protected: /* [Why use single type kernel]: - * - * Whether the kernel data type is int, float or other type, - * which has no effect on its execution logic, so directly - * specified a data type here. - * - */ + * + * Whether the kernel data type is int, float or other type, + * which has no effect on its execution logic, so directly + * specified a data type here. + * + */ framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType(framework::proto::VarType::FP32, diff --git a/paddle/fluid/operators/cinn/cinn_instruction_run_op.cu.cc b/paddle/fluid/operators/cinn/cinn_instruction_run_op.cu.cc index ea72f6c5374..afa350ef116 100644 --- a/paddle/fluid/operators/cinn/cinn_instruction_run_op.cu.cc +++ b/paddle/fluid/operators/cinn/cinn_instruction_run_op.cu.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/cinn/cinn_instruction_run_op.h" + #include "paddle/fluid/framework/op_registry.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/cinn/cinn_instruction_run_op.h b/paddle/fluid/operators/cinn/cinn_instruction_run_op.h index 81c2d23d3f1..13483d78f49 100644 --- a/paddle/fluid/operators/cinn/cinn_instruction_run_op.h +++ b/paddle/fluid/operators/cinn/cinn_instruction_run_op.h @@ -18,6 +18,7 @@ #include #include #include + #include "cinn/hlir/framework/graph_compiler.h" #include "cinn/hlir/framework/instruction.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc b/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc index 68bc3a0eb5c..cbfab3090c0 100644 --- a/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc +++ b/paddle/fluid/operators/cinn/cinn_instruction_run_op_test.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include + #include "gtest/gtest.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/paddle2cinn/cinn_compiler.h" diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.cc b/paddle/fluid/operators/cinn/cinn_launch_context.cc index a660d59fb4c..6b70efee86f 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_context.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_context.cc @@ -13,10 +13,12 @@ // limitations under the License. #include "paddle/fluid/operators/cinn/cinn_launch_context.h" + #include #include #include #include + #include "cinn/hlir/framework/graph_compiler.h" #include "cinn/hlir/framework/instruction.h" #include "cinn/hlir/framework/scope.h" @@ -43,13 +45,13 @@ namespace paddle { namespace operators::details { -using framework::Scope; using framework::LoDTensor; using framework::ParallelExecutor; +using framework::Scope; using CinnInstruction = ::cinn::hlir::framework::Instruction; using CinnRuntimeProgram = ::cinn::hlir::framework::Program; -using framework::paddle2cinn::Name2VarInfoMap; using framework::paddle2cinn::kMemOptVarInfoFromMainGraph; +using framework::paddle2cinn::Name2VarInfoMap; CinnLaunchContext::CinnLaunchContext(const framework::ir::Graph& graph, const CinnCompiledObject& compiled_obj) diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.h b/paddle/fluid/operators/cinn/cinn_launch_context.h index ed5e4383d83..0bbbcc8b031 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_context.h +++ b/paddle/fluid/operators/cinn/cinn_launch_context.h @@ -20,6 +20,7 @@ #include #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/parallel_executor.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/operators/cinn/cinn_launch_context_test.cc b/paddle/fluid/operators/cinn/cinn_launch_context_test.cc index ecbfbf2f92e..cd4465d355f 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_context_test.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_context_test.cc @@ -13,9 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/cinn/cinn_launch_context.h" + #include #include #include + #include "cinn/auto_schedule/auto_tuner.h" #include "cinn/common/target.h" #include "cinn/common/type.h" @@ -38,11 +40,11 @@ USE_OP(cinn_instruction_run); namespace paddle { namespace operators::details { +using framework::LoDTensor; using framework::OpDesc; +using framework::ParallelExecutor; using framework::ProgramDesc; -using framework::LoDTensor; using framework::ir::Graph; -using framework::ParallelExecutor; using framework::paddle2cinn::Name2VarInfoMap; using CinnShape = ::cinn::hlir::framework::Shape; using CinnInstruction = ::cinn::hlir::framework::Instruction; diff --git a/paddle/fluid/operators/cinn/cinn_launch_op.cc b/paddle/fluid/operators/cinn/cinn_launch_op.cc index 0a9b66bc92c..3b0198613db 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_op.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_op.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/operators/cinn/cinn_launch_op.h" + #include #include + #include "cinn/hlir/framework/graph_compiler.h" #include "cinn/runtime/cinn_runtime.h" #include "cinn/runtime/flags.h" diff --git a/paddle/fluid/operators/cinn/cinn_launch_op.cu.cc b/paddle/fluid/operators/cinn/cinn_launch_op.cu.cc index 9dfd53834e9..fb5a48ca3d0 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_op.cu.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_op.cu.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/cinn/cinn_launch_op.h" + #include "paddle/fluid/framework/operator.h" /* see [Why use single type kernel] */ diff --git a/paddle/fluid/operators/cinn/cinn_launch_op.h b/paddle/fluid/operators/cinn/cinn_launch_op.h index f40b788dfb5..62c79faafec 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_op.h +++ b/paddle/fluid/operators/cinn/cinn_launch_op.h @@ -77,16 +77,16 @@ class CinnLaunchOpKernel : public framework::OpKernel { std::map inputs_name2tensor; std::vector input_x_variable_names; std::vector input_no_need_buffer_variable_names; - auto add_name2tensor_fn = [&inputs_name2tensor]( - const std::vector& variable_names, - const std::vector& tensors) { - std::transform( - variable_names.begin(), variable_names.end(), tensors.begin(), - std::inserter(inputs_name2tensor, inputs_name2tensor.end()), - [](const std::string& name, const LoDTensor* tensor) { - return std::make_pair(name, tensor); - }); - }; + auto add_name2tensor_fn = + [&inputs_name2tensor](const std::vector& variable_names, + const std::vector& tensors) { + std::transform( + variable_names.begin(), variable_names.end(), tensors.begin(), + std::inserter(inputs_name2tensor, inputs_name2tensor.end()), + [](const std::string& name, const LoDTensor* tensor) { + return std::make_pair(name, tensor); + }); + }; auto input_x_tensors = ctx.MultiInput(kX); if (!input_x_tensors.empty()) { diff --git a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc index b0bd043f432..9ed9fad36a3 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_op_test.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_op_test.cc @@ -13,10 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/cinn/cinn_launch_op.h" + #include + #include #include #include + #include "gflags/gflags.h" #include "gtest/gtest.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/cinn/cinn_op_helper.cc b/paddle/fluid/operators/cinn/cinn_op_helper.cc index 3fb9c822c77..26fee2d9e57 100644 --- a/paddle/fluid/operators/cinn/cinn_op_helper.cc +++ b/paddle/fluid/operators/cinn/cinn_op_helper.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/cinn/cinn_op_helper.h" + #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/device_context.h" diff --git a/paddle/fluid/operators/cinn/cinn_op_helper.h b/paddle/fluid/operators/cinn/cinn_op_helper.h index e542134b946..55ee3789c0a 100644 --- a/paddle/fluid/operators/cinn/cinn_op_helper.h +++ b/paddle/fluid/operators/cinn/cinn_op_helper.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/operator.h" // We define some common names or utility functions diff --git a/paddle/fluid/operators/cinn/test_helper.h b/paddle/fluid/operators/cinn/test_helper.h index 9720a5309fa..4e06882279b 100644 --- a/paddle/fluid/operators/cinn/test_helper.h +++ b/paddle/fluid/operators/cinn/test_helper.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include "gtest/gtest.h" #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/lod_tensor.h" diff --git a/paddle/fluid/operators/class_center_sample_op.cu b/paddle/fluid/operators/class_center_sample_op.cu index a23cf2815d8..7192b415c27 100644 --- a/paddle/fluid/operators/class_center_sample_op.cu +++ b/paddle/fluid/operators/class_center_sample_op.cu @@ -15,17 +15,20 @@ #ifdef PADDLE_WITH_HIP #include #include + #include typedef hiprandState curandState; namespace cub = hipcub; #else #include #include + #include #endif #include #include + #include "paddle/fluid/operators/class_center_sample_op.h" #include "paddle/phi/api/include/tensor.h" diff --git a/paddle/fluid/operators/class_center_sample_op.h b/paddle/fluid/operators/class_center_sample_op.h index 24ce9ace3bf..8f12e90e185 100644 --- a/paddle/fluid/operators/class_center_sample_op.h +++ b/paddle/fluid/operators/class_center_sample_op.h @@ -16,6 +16,7 @@ #include #include #include + #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/clip_by_norm_op.h b/paddle/fluid/operators/clip_by_norm_op.h index 8822fffd326..379cd4c6653 100644 --- a/paddle/fluid/operators/clip_by_norm_op.h +++ b/paddle/fluid/operators/clip_by_norm_op.h @@ -114,10 +114,11 @@ class ClipByNormOp : public framework::OperatorWithKernel { "Output(Out) of ClipByNormOp should not be null. " "Please check if it is created correctly.")); auto max_norm = ctx->Attrs().Get("max_norm"); - PADDLE_ENFORCE_GT(max_norm, 0, platform::errors::InvalidArgument( - "max_norm should be greater than 0. " - "Received max_norm is %f.", - max_norm)); + PADDLE_ENFORCE_GT( + max_norm, 0, + platform::errors::InvalidArgument("max_norm should be greater than 0. " + "Received max_norm is %f.", + max_norm)); auto x_dims = ctx->GetInputDim("X"); ctx->SetOutputDim("Out", x_dims); ctx->ShareLoD("X", /*->*/ "Out"); diff --git a/paddle/fluid/operators/clip_by_norm_op_xpu.cc b/paddle/fluid/operators/clip_by_norm_op_xpu.cc index 7c91f06a8d7..62c2608f11c 100644 --- a/paddle/fluid/operators/clip_by_norm_op_xpu.cc +++ b/paddle/fluid/operators/clip_by_norm_op_xpu.cc @@ -13,9 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/clip_by_norm_op.h" #include +#include "paddle/fluid/operators/clip_by_norm_op.h" + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/clip_op.cc b/paddle/fluid/operators/clip_op.cc index 6e898d31663..46eb9448d9d 100644 --- a/paddle/fluid/operators/clip_op.cc +++ b/paddle/fluid/operators/clip_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" @@ -179,14 +180,13 @@ REGISTER_OPERATOR(clip_grad, ops::ClipOpGrad, ops::ClipGradInplaceInferer, ops::ClipDoubleGradOpMaker, ops::ClipDoubleGradOpMaker); -REGISTER_OP_VERSION(clip) - .AddCheckpoint( - R"ROC( +REGISTER_OP_VERSION(clip).AddCheckpoint( + R"ROC( Upgrade clip add a new input [Min])ROC", - paddle::framework::compatible::OpVersionDesc() - .NewInput("Min", - "Pass the mix, min value as input, not attribute. Min is " - "dispensable.") - .NewInput("Max", - "Pass the mix, min value as input, not attribute. Max is " - "dispensable.")); + paddle::framework::compatible::OpVersionDesc() + .NewInput("Min", + "Pass the mix, min value as input, not attribute. Min is " + "dispensable.") + .NewInput("Max", + "Pass the mix, min value as input, not attribute. Max is " + "dispensable.")); diff --git a/paddle/fluid/operators/clip_op_xpu.cc b/paddle/fluid/operators/clip_op_xpu.cc index c5513128372..a99e5d2506f 100644 --- a/paddle/fluid/operators/clip_op_xpu.cc +++ b/paddle/fluid/operators/clip_op_xpu.cc @@ -61,10 +61,11 @@ class ClipXPUKernel : public framework::OpKernel { auto out_data = reinterpret_cast(out->data()); int r = xpu::clip_v2(dev_ctx.x_context(), x_data, out_data, x->numel(), min, max); - PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, platform::errors::External( - "XPU API(clip_v2) return wrong " - "value[%d %s]", - r, XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU API(clip_v2) return wrong " + "value[%d %s]", + r, XPUAPIErrorMsg[r])); } }; diff --git a/paddle/fluid/operators/coalesce_tensor_op.cc b/paddle/fluid/operators/coalesce_tensor_op.cc index aa5a38e4dbf..af15ca2acb7 100644 --- a/paddle/fluid/operators/coalesce_tensor_op.cc +++ b/paddle/fluid/operators/coalesce_tensor_op.cc @@ -14,6 +14,7 @@ #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/operator.h" @@ -265,11 +266,10 @@ class CoalesceTensorOpKernel : public framework::OpKernel { ->ShareDataWith(fused_tensor->Slice( static_cast(offset), static_cast(offset + len))) .Resize(dim); - len = use_align - ? platform::Alignment(len * size_of_dtype, context.GetPlace(), - align_size) / - size_of_dtype - : len; + len = use_align ? platform::Alignment(len * size_of_dtype, + context.GetPlace(), align_size) / + size_of_dtype + : len; ss << "output(" << out_var_names[i] << ") dim:(" << dim << ")" << " address: " << out_tensors[i]->data() << " len: " << len << ", "; offset += len; @@ -304,12 +304,11 @@ class CoalesceTensorOpKernel : public framework::OpKernel { size, 0, platform::errors::InvalidArgument( "The number of tensor `%s`'s elements is 0.", var_names[i])); - auto len = - use_align - ? platform::Alignment(static_cast(size) * size_of_dtype, - place, align_size) / - size_of_dtype - : static_cast(size); + auto len = use_align ? platform::Alignment( + static_cast(size) * size_of_dtype, + place, align_size) / + size_of_dtype + : static_cast(size); const void *ptr = lod_tensors[i]->IsInitialized() ? lod_tensors[i]->data() : nullptr; VLOG(4) << size << " " << len; diff --git a/paddle/fluid/operators/collective/allreduce_op.cc b/paddle/fluid/operators/collective/allreduce_op.cc index 63b135a74cf..53843104dc5 100644 --- a/paddle/fluid/operators/collective/allreduce_op.cc +++ b/paddle/fluid/operators/collective/allreduce_op.cc @@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/collective/allreduce_op.h" + #include // NOLINT #include -#include "paddle/fluid/operators/collective/allreduce_op.h" - namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/collective/barrier_op.h b/paddle/fluid/operators/collective/barrier_op.h index 6df4d24c0ed..88333f36413 100644 --- a/paddle/fluid/operators/collective/barrier_op.h +++ b/paddle/fluid/operators/collective/barrier_op.h @@ -25,6 +25,7 @@ limitations under the License. */ #if defined(PADDLE_WITH_GLOO) #include + #include "paddle/fluid/framework/fleet/gloo_wrapper.h" #endif diff --git a/paddle/fluid/operators/collective/broadcast_op.cc b/paddle/fluid/operators/collective/broadcast_op.cc index 61e27887b68..071b0350de6 100644 --- a/paddle/fluid/operators/collective/broadcast_op.cc +++ b/paddle/fluid/operators/collective/broadcast_op.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/collective/c_allgather_op.cc b/paddle/fluid/operators/collective/c_allgather_op.cc index c4e779698cc..f20ec75a970 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.cc +++ b/paddle/fluid/operators/collective/c_allgather_op.cc @@ -26,8 +26,9 @@ class CAllGatherOp : public framework::OperatorWithKernel { OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "AllGather"); OP_INOUT_CHECK(ctx->HasOutput("Out"), "Input", "Out", "AllGather"); int nranks = ctx->Attrs().Get("nranks"); - PADDLE_ENFORCE_GE(nranks, 2, platform::errors::InvalidArgument( - "The value of nranks should be >=2.")); + PADDLE_ENFORCE_GE(nranks, 2, + platform::errors::InvalidArgument( + "The value of nranks should be >=2.")); framework::DDim dim = ctx->GetInputDim("X"); dim[0] = dim[0] * nranks; if (dim[0] < 0) dim[0] = -1; diff --git a/paddle/fluid/operators/collective/c_allgather_op.h b/paddle/fluid/operators/collective/c_allgather_op.h index aa2040a2693..7f8c7b2f50e 100644 --- a/paddle/fluid/operators/collective/c_allgather_op.h +++ b/paddle/fluid/operators/collective/c_allgather_op.h @@ -25,6 +25,7 @@ limitations under the License. */ #if defined(PADDLE_WITH_GLOO) #include + #include "paddle/fluid/framework/fleet/gloo_wrapper.h" #endif diff --git a/paddle/fluid/operators/collective/c_allgather_op_npu.cc b/paddle/fluid/operators/collective/c_allgather_op_npu.cc index 5339293da0f..f9ffdea7908 100644 --- a/paddle/fluid/operators/collective/c_allgather_op_npu.cc +++ b/paddle/fluid/operators/collective/c_allgather_op_npu.cc @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/collective/c_allgather_op.h" - #include +#include "paddle/fluid/operators/collective/c_allgather_op.h" + #if defined(PADDLE_WITH_ASCEND_CL) #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/device/npu/hccl_helper.h" diff --git a/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc b/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc index 7206dd01bca..087f6b879c3 100644 --- a/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_allgather_op_npu_test.cc @@ -17,23 +17,22 @@ limitations under the License. */ #endif #include + #include #include // NOLINT #include #include "gtest/gtest.h" - #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/phi/kernels/funcs/math_function.h" - #include "paddle/fluid/operators/collective/c_allgather_op.h" #include "paddle/fluid/operators/collective/c_allreduce_op.h" #include "paddle/fluid/operators/collective/c_broadcast_op.h" #include "paddle/fluid/operators/collective/c_reducescatter_op.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" +#include "paddle/fluid/string/printf.h" +#include "paddle/phi/kernels/funcs/math_function.h" #if defined(PADDLE_WITH_ASCEND_CL) #include "paddle/fluid/platform/collective_helper.h" diff --git a/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc b/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc index 0946ad8aca6..5c2d6981bad 100644 --- a/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_allreduce_max_op_npu_test.cc @@ -17,23 +17,22 @@ limitations under the License. */ #endif #include + #include #include // NOLINT #include #include "gtest/gtest.h" - #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/phi/kernels/funcs/math_function.h" - #include "paddle/fluid/operators/collective/c_allgather_op.h" #include "paddle/fluid/operators/collective/c_allreduce_op.h" #include "paddle/fluid/operators/collective/c_broadcast_op.h" #include "paddle/fluid/operators/collective/c_reducescatter_op.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" +#include "paddle/fluid/string/printf.h" +#include "paddle/phi/kernels/funcs/math_function.h" #if defined(PADDLE_WITH_ASCEND_CL) #include "paddle/fluid/platform/collective_helper.h" diff --git a/paddle/fluid/operators/collective/c_allreduce_op.h b/paddle/fluid/operators/collective/c_allreduce_op.h index 404f7c017ac..61cf4cf5b7f 100644 --- a/paddle/fluid/operators/collective/c_allreduce_op.h +++ b/paddle/fluid/operators/collective/c_allreduce_op.h @@ -41,6 +41,7 @@ limitations under the License. */ #if defined(PADDLE_WITH_GLOO) #include + #include "paddle/fluid/framework/fleet/gloo_wrapper.h" #endif @@ -335,10 +336,11 @@ class CAllReduceOpXPUKernel : public framework::OpKernel { "Invalid reduce type: %d", red_type)); } - PADDLE_ENFORCE_EQ(bkcl_all_reduce(comm->comm(), sendbuff, recvbuff, numel, - dtype, bkcl_red_type, stream), - BKCL_SUCCESS, platform::errors::PreconditionNotMet( - "BKCL all reduce failed")); + PADDLE_ENFORCE_EQ( + bkcl_all_reduce(comm->comm(), sendbuff, recvbuff, numel, dtype, + bkcl_red_type, stream), + BKCL_SUCCESS, + platform::errors::PreconditionNotMet("BKCL all reduce failed")); #else PADDLE_THROW(platform::errors::PreconditionNotMet( "PaddlePaddle should be compiled with XPU.")); diff --git a/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc b/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc index 61e5f279034..4c76d094baf 100644 --- a/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_allreduce_sum_op_npu_test.cc @@ -17,20 +17,19 @@ limitations under the License. */ #endif #include + #include #include // NOLINT #include #include "gtest/gtest.h" - #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/phi/kernels/funcs/math_function.h" - #include "paddle/fluid/operators/collective/c_allreduce_op.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" +#include "paddle/fluid/string/printf.h" +#include "paddle/phi/kernels/funcs/math_function.h" #if defined(PADDLE_WITH_ASCEND_CL) #include "paddle/fluid/platform/collective_helper.h" diff --git a/paddle/fluid/operators/collective/c_broadcast_op.h b/paddle/fluid/operators/collective/c_broadcast_op.h index eb4acb9a369..394ea45efbb 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op.h +++ b/paddle/fluid/operators/collective/c_broadcast_op.h @@ -24,6 +24,7 @@ limitations under the License. */ #if defined(PADDLE_WITH_GLOO) #include + #include "paddle/fluid/framework/fleet/gloo_wrapper.h" #endif diff --git a/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc b/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc index cf4d6a28744..e383e78c5dd 100644 --- a/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_broadcast_op_npu_test.cc @@ -17,20 +17,19 @@ limitations under the License. */ #endif #include + #include #include // NOLINT #include #include "gtest/gtest.h" - #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/phi/kernels/funcs/math_function.h" - #include "paddle/fluid/operators/collective/c_broadcast_op.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" +#include "paddle/fluid/string/printf.h" +#include "paddle/phi/kernels/funcs/math_function.h" #if defined(PADDLE_WITH_ASCEND_CL) #include "paddle/fluid/platform/collective_helper.h" diff --git a/paddle/fluid/operators/collective/c_comm_init_all_op.cc b/paddle/fluid/operators/collective/c_comm_init_all_op.cc index ce2da1f22f1..c9605f4d1b2 100644 --- a/paddle/fluid/operators/collective/c_comm_init_all_op.cc +++ b/paddle/fluid/operators/collective/c_comm_init_all_op.cc @@ -15,7 +15,6 @@ limitations under the License. */ #include "paddle/fluid/framework/op_info.h" #include "paddle/fluid/framework/op_registry.h" - #include "paddle/fluid/framework/threadpool.h" #include "paddle/fluid/platform/collective_helper.h" @@ -53,9 +52,9 @@ class CCommInitAllOp : public framework::OperatorBase { void RunImpl(const framework::Scope& scope, const platform::Place& place) const override { -// PADDLE_ENFORCE_EQ(platform::is_gpu_place(place), true, -// platform::errors::PreconditionNotMet( -// "CCommInitAllOp can run on gpu place only")); + // PADDLE_ENFORCE_EQ(platform::is_gpu_place(place), true, + // platform::errors::PreconditionNotMet( + // "CCommInitAllOp can run on gpu place only")); #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) std::vector devices = Attr>("devices"); diff --git a/paddle/fluid/operators/collective/c_comm_init_multitrainer_op.cc b/paddle/fluid/operators/collective/c_comm_init_multitrainer_op.cc index 86c966378cc..3ea24f6e654 100644 --- a/paddle/fluid/operators/collective/c_comm_init_multitrainer_op.cc +++ b/paddle/fluid/operators/collective/c_comm_init_multitrainer_op.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include #endif #include + #include #include diff --git a/paddle/fluid/operators/collective/c_comm_init_op.cc b/paddle/fluid/operators/collective/c_comm_init_op.cc index 490747520d6..a41d4293c90 100644 --- a/paddle/fluid/operators/collective/c_comm_init_op.cc +++ b/paddle/fluid/operators/collective/c_comm_init_op.cc @@ -71,8 +71,9 @@ class CCommInitOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( platform::is_gpu_place(place) || platform::is_xpu_place(place) || platform::is_mlu_place(place), - true, platform::errors::PreconditionNotMet( - "CCommInitOp can run on gpu or xpu or mlu place only.")); + true, + platform::errors::PreconditionNotMet( + "CCommInitOp can run on gpu or xpu or mlu place only.")); #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) || \ defined(PADDLE_WITH_XPU_BKCL) || defined(PADDLE_WITH_CNCL) diff --git a/paddle/fluid/operators/collective/c_concat_op.cc b/paddle/fluid/operators/collective/c_concat_op.cc index 551fde21162..155db23a039 100644 --- a/paddle/fluid/operators/collective/c_concat_op.cc +++ b/paddle/fluid/operators/collective/c_concat_op.cc @@ -27,17 +27,19 @@ class CConcatOp : public framework::OperatorWithKernel { int nranks = ctx->Attrs().Get("nranks"); int rank = ctx->Attrs().Get("rank"); int ring_id = ctx->Attrs().Get("ring_id"); - PADDLE_ENFORCE_GE(nranks, 2, platform::errors::InvalidArgument( - "The number of ranks (%d) for c_concat " - "must be greater than 1.", - nranks)); + PADDLE_ENFORCE_GE(nranks, 2, + platform::errors::InvalidArgument( + "The number of ranks (%d) for c_concat " + "must be greater than 1.", + nranks)); PADDLE_ENFORCE_GE( ring_id, 0, platform::errors::InvalidArgument( "The ring_id (%d) for c_concat must be non-negative.", ring_id)); PADDLE_ENFORCE_GE( - rank, 0, platform::errors::InvalidArgument( - "The rank (%d) for c_concat must be non-negative.", rank)); + rank, 0, + platform::errors::InvalidArgument( + "The rank (%d) for c_concat must be non-negative.", rank)); PADDLE_ENFORCE_LT(rank, nranks, platform::errors::InvalidArgument( "The value of rank (%d) for c_concat must " diff --git a/paddle/fluid/operators/collective/c_concat_op.cu.cc b/paddle/fluid/operators/collective/c_concat_op.cu.cc index d3d9db0e5f8..98df6c8688e 100644 --- a/paddle/fluid/operators/collective/c_concat_op.cu.cc +++ b/paddle/fluid/operators/collective/c_concat_op.cu.cc @@ -12,9 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/collective/c_concat_op.h" + #include -#include "paddle/fluid/operators/collective/c_concat_op.h" #include "paddle/fluid/operators/math/concat_and_split.h" #include "paddle/phi/api/include/tensor.h" diff --git a/paddle/fluid/operators/collective/c_gen_bkcl_id_op.cc b/paddle/fluid/operators/collective/c_gen_bkcl_id_op.cc index ec174ad0e56..3bd7e3ceffa 100644 --- a/paddle/fluid/operators/collective/c_gen_bkcl_id_op.cc +++ b/paddle/fluid/operators/collective/c_gen_bkcl_id_op.cc @@ -21,9 +21,8 @@ limitations under the License. */ #include "paddle/fluid/framework/var_type_traits.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/place.h" - #include "paddle/fluid/platform/gen_comm_id_helper.h" +#include "paddle/fluid/platform/place.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/collective/c_gen_cncl_id_op.cc b/paddle/fluid/operators/collective/c_gen_cncl_id_op.cc index 7e65fba5718..d2e85171a4a 100644 --- a/paddle/fluid/operators/collective/c_gen_cncl_id_op.cc +++ b/paddle/fluid/operators/collective/c_gen_cncl_id_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include + #include #include "paddle/fluid/framework/op_proto_maker.h" @@ -21,9 +22,8 @@ limitations under the License. */ #include "paddle/fluid/framework/var_type_traits.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/place.h" - #include "paddle/fluid/platform/gen_comm_id_helper.h" +#include "paddle/fluid/platform/place.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/collective/c_gen_hccl_id_op.cc b/paddle/fluid/operators/collective/c_gen_hccl_id_op.cc index 6eec3853880..3f81eab7bc2 100644 --- a/paddle/fluid/operators/collective/c_gen_hccl_id_op.cc +++ b/paddle/fluid/operators/collective/c_gen_hccl_id_op.cc @@ -19,12 +19,11 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/var_type_traits.h" +#include "paddle/fluid/platform/device/npu/dynload/hccl.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/place.h" - -#include "paddle/fluid/platform/device/npu/dynload/hccl.h" #include "paddle/fluid/platform/gen_comm_id_helper.h" +#include "paddle/fluid/platform/place.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc b/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc index d392beb3a48..d4f1fe1c182 100644 --- a/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc +++ b/paddle/fluid/operators/collective/c_gen_nccl_id_op.cc @@ -20,9 +20,8 @@ limitations under the License. */ #include "paddle/fluid/framework/var_type_traits.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/place.h" - #include "paddle/fluid/platform/gen_comm_id_helper.h" +#include "paddle/fluid/platform/place.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/collective/c_reduce_op.h b/paddle/fluid/operators/collective/c_reduce_op.h index 4e9edb53730..5399a4aacbe 100644 --- a/paddle/fluid/operators/collective/c_reduce_op.h +++ b/paddle/fluid/operators/collective/c_reduce_op.h @@ -40,6 +40,7 @@ limitations under the License. */ #if defined(PADDLE_WITH_GLOO) #include + #include "paddle/fluid/framework/fleet/gloo_wrapper.h" #endif @@ -261,10 +262,11 @@ class CReduceOpXPUKernel : public framework::OpKernel { "Invalid reduce type: %d", red_type)); } - PADDLE_ENFORCE_EQ(bkcl_reduce(comm->comm(), sendbuff, recvbuff, numel, - dtype, bkcl_red_type, root, stream), - BKCL_SUCCESS, platform::errors::PreconditionNotMet( - "BKCL all reduce failed")); + PADDLE_ENFORCE_EQ( + bkcl_reduce(comm->comm(), sendbuff, recvbuff, numel, dtype, + bkcl_red_type, root, stream), + BKCL_SUCCESS, + platform::errors::PreconditionNotMet("BKCL all reduce failed")); #else PADDLE_THROW(platform::errors::PreconditionNotMet( "PaddlePaddle should be compiled with XPU.")); @@ -319,9 +321,10 @@ class CReduceOpCUDAKernel : public framework::OpKernel { break; default: - PADDLE_ENFORCE_EQ(true, false, platform::errors::InvalidArgument( - "red_type must be one of kRedSum, " - "kRedMax, kRedMin, kRedProd.")); + PADDLE_ENFORCE_EQ(true, false, + platform::errors::InvalidArgument( + "red_type must be one of kRedSum, " + "kRedMax, kRedMin, kRedProd.")); } PADDLE_ENFORCE_GPU_SUCCESS(platform::dynload::ncclReduce( diff --git a/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc b/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc index c4e410d04da..3bd55ea3704 100644 --- a/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_reduce_sum_op_npu_test.cc @@ -17,20 +17,19 @@ limitations under the License. */ #endif #include + #include #include // NOLINT #include #include "gtest/gtest.h" - #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/phi/kernels/funcs/math_function.h" - #include "paddle/fluid/operators/collective/c_reduce_op.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" +#include "paddle/fluid/string/printf.h" +#include "paddle/phi/kernels/funcs/math_function.h" #if defined(PADDLE_WITH_ASCEND_CL) #include "paddle/fluid/platform/collective_helper.h" diff --git a/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc b/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc index 8b498787c69..16437d4769e 100644 --- a/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_reducescatter_op_npu_test.cc @@ -17,23 +17,22 @@ limitations under the License. */ #endif #include + #include #include // NOLINT #include #include "gtest/gtest.h" - #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/phi/kernels/funcs/math_function.h" - #include "paddle/fluid/operators/collective/c_allgather_op.h" #include "paddle/fluid/operators/collective/c_allreduce_op.h" #include "paddle/fluid/operators/collective/c_broadcast_op.h" #include "paddle/fluid/operators/collective/c_reducescatter_op.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" +#include "paddle/fluid/string/printf.h" +#include "paddle/phi/kernels/funcs/math_function.h" #if defined(PADDLE_WITH_ASCEND_CL) #include "paddle/fluid/platform/collective_helper.h" diff --git a/paddle/fluid/operators/collective/c_scatter_op.h b/paddle/fluid/operators/collective/c_scatter_op.h index 71a5f488ebc..ee07d7663b2 100644 --- a/paddle/fluid/operators/collective/c_scatter_op.h +++ b/paddle/fluid/operators/collective/c_scatter_op.h @@ -24,6 +24,7 @@ limitations under the License. */ #if defined(PADDLE_WITH_GLOO) #include + #include "paddle/fluid/framework/fleet/gloo_wrapper.h" #endif diff --git a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu index 4c9fb148424..71216538a4e 100644 --- a/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu +++ b/paddle/fluid/operators/collective/c_softmax_with_cross_entropy_op.cu @@ -373,15 +373,15 @@ class CSoftmaxWithCrossEntropyGradCUDAKernel : public framework::OpKernel { const int end_index = start_index + D; if (label_type == framework::proto::VarType::INT32) { - MaskLabelByIndexGrad<<>>( - logit_grad_2d.data(), loss_grad->data(), - labels->data(), start_index, end_index, N, D); + MaskLabelByIndexGrad + <<>>( + logit_grad_2d.data(), loss_grad->data(), + labels->data(), start_index, end_index, N, D); } else if (label_type == framework::proto::VarType::INT64) { - MaskLabelByIndexGrad<<>>( - logit_grad_2d.data(), loss_grad->data(), - labels->data(), start_index, end_index, N, D); + MaskLabelByIndexGrad + <<>>( + logit_grad_2d.data(), loss_grad->data(), + labels->data(), start_index, end_index, N, D); } } }; diff --git a/paddle/fluid/operators/collective/c_split_op.cc b/paddle/fluid/operators/collective/c_split_op.cc index 37ec989f3f9..32f3ff9eab1 100644 --- a/paddle/fluid/operators/collective/c_split_op.cc +++ b/paddle/fluid/operators/collective/c_split_op.cc @@ -27,17 +27,19 @@ class CSplitOp : public framework::OperatorWithKernel { int nranks = ctx->Attrs().Get("nranks"); int rank = ctx->Attrs().Get("rank"); int ring_id = ctx->Attrs().Get("ring_id"); - PADDLE_ENFORCE_GE(nranks, 2, platform::errors::InvalidArgument( - "The number of ranks (%d) for c_split " - "must be greater than 1.", - nranks)); + PADDLE_ENFORCE_GE(nranks, 2, + platform::errors::InvalidArgument( + "The number of ranks (%d) for c_split " + "must be greater than 1.", + nranks)); PADDLE_ENFORCE_GE( ring_id, 0, platform::errors::InvalidArgument( "The ring_id (%d) for c_split must be non-negative.", ring_id)); PADDLE_ENFORCE_GE( - rank, 0, platform::errors::InvalidArgument( - "The rank (%d) for c_split must be non-negative.", rank)); + rank, 0, + platform::errors::InvalidArgument( + "The rank (%d) for c_split must be non-negative.", rank)); PADDLE_ENFORCE_LT(rank, nranks, platform::errors::InvalidArgument( "The value of rank (%d) for c_split must " diff --git a/paddle/fluid/operators/collective/c_split_op.cu b/paddle/fluid/operators/collective/c_split_op.cu index a0c4182468f..1dce4ce04b5 100644 --- a/paddle/fluid/operators/collective/c_split_op.cu +++ b/paddle/fluid/operators/collective/c_split_op.cu @@ -59,10 +59,11 @@ class CSplitOpCUDAKernel : public framework::OpKernel { int rank = ctx.Attr("rank"); auto place = ctx.GetPlace(); - PADDLE_ENFORCE_GE(rank, 0, platform::errors::PreconditionNotMet( - "The value of rank (%d) for c_split must be " - "greater than or equal to 0.", - rank)); + PADDLE_ENFORCE_GE(rank, 0, + platform::errors::PreconditionNotMet( + "The value of rank (%d) for c_split must be " + "greater than or equal to 0.", + rank)); PADDLE_ENFORCE_GE(nranks, 2, platform::errors::PreconditionNotMet( "The value of nranks (%d) for c_split must be " diff --git a/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc b/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc index 133085ad3f3..91b89486c6a 100644 --- a/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc +++ b/paddle/fluid/operators/collective/c_sync_comm_stream_op_npu_test.cc @@ -26,11 +26,10 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/phi/kernels/funcs/math_function.h" - #include "paddle/fluid/operators/collective/c_broadcast_op.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" +#include "paddle/fluid/string/printf.h" +#include "paddle/phi/kernels/funcs/math_function.h" #if defined(PADDLE_WITH_ASCEND_CL) #include "paddle/fluid/platform/collective_helper.h" diff --git a/paddle/fluid/operators/collective/checknumeric_npu_test.cc b/paddle/fluid/operators/collective/checknumeric_npu_test.cc index 36c6f4fadd0..b99ac381635 100644 --- a/paddle/fluid/operators/collective/checknumeric_npu_test.cc +++ b/paddle/fluid/operators/collective/checknumeric_npu_test.cc @@ -17,21 +17,20 @@ limitations under the License. */ #endif #include + #include #include #include // NOLINT #include #include "gtest/gtest.h" - #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/phi/kernels/funcs/math_function.h" - #include "paddle/fluid/operators/collective/c_allreduce_op.h" #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" +#include "paddle/fluid/string/printf.h" +#include "paddle/phi/kernels/funcs/math_function.h" #if defined(PADDLE_WITH_ASCEND_CL) #include "paddle/fluid/platform/collective_helper.h" diff --git a/paddle/fluid/operators/collective/gen_bkcl_id_op.cc b/paddle/fluid/operators/collective/gen_bkcl_id_op.cc index 1ce89383568..f60030cec76 100644 --- a/paddle/fluid/operators/collective/gen_bkcl_id_op.cc +++ b/paddle/fluid/operators/collective/gen_bkcl_id_op.cc @@ -24,11 +24,10 @@ limitations under the License. */ #include "paddle/fluid/platform/device/xpu/bkcl_helper.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/gen_comm_id_helper.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/split.h" -#include "paddle/fluid/platform/gen_comm_id_helper.h" - namespace paddle { namespace operators { @@ -69,9 +68,10 @@ class GenBKCLIdOp : public framework::OperatorBase { int trainer_id = Attr("trainer_id"); std::string endpoint = trainers[trainer_id]; - PADDLE_ENFORCE_GE(trainer_id, 0, platform::errors::InvalidArgument( - "trainer_id %d is less than 0. Its " - "valid range is [0, trainer_size)")); + PADDLE_ENFORCE_GE( + trainer_id, 0, + platform::errors::InvalidArgument("trainer_id %d is less than 0. Its " + "valid range is [0, trainer_size)")); PADDLE_ENFORCE_LT( trainer_id, static_cast(trainers.size()), platform::errors::OutOfRange("trainer_id %d is out of range. Its valid " diff --git a/paddle/fluid/operators/collective/gen_hccl_id_op.cc b/paddle/fluid/operators/collective/gen_hccl_id_op.cc index 3d78082f12f..e0809459be1 100644 --- a/paddle/fluid/operators/collective/gen_hccl_id_op.cc +++ b/paddle/fluid/operators/collective/gen_hccl_id_op.cc @@ -21,14 +21,13 @@ limitations under the License. */ #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/var_type_traits.h" +#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" #include "paddle/fluid/platform/device/npu/hccl_helper.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/place.h" #include "paddle/fluid/string/split.h" -#include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" - namespace paddle { namespace operators { @@ -48,9 +47,10 @@ class GenHCCLIdOp : public framework::OperatorBase { int trainer_id = Attr("trainer_id"); std::string endpoint = trainers[trainer_id]; - PADDLE_ENFORCE_GE(trainer_id, 0, platform::errors::InvalidArgument( - "trainer_id %d is less than 0. Its " - "valid range is [0, trainer_size)")); + PADDLE_ENFORCE_GE( + trainer_id, 0, + platform::errors::InvalidArgument("trainer_id %d is less than 0. Its " + "valid range is [0, trainer_size)")); PADDLE_ENFORCE_LT( trainer_id, static_cast(trainers.size()), platform::errors::OutOfRange("trainer_id %d is out of range. Its valid " diff --git a/paddle/fluid/operators/collective/gen_hccl_id_op_helper.cc b/paddle/fluid/operators/collective/gen_hccl_id_op_helper.cc index ad50ac36750..ba573509bd1 100644 --- a/paddle/fluid/operators/collective/gen_hccl_id_op_helper.cc +++ b/paddle/fluid/operators/collective/gen_hccl_id_op_helper.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" + #include #include #include diff --git a/paddle/fluid/operators/collective/gen_nccl_id_op.cc b/paddle/fluid/operators/collective/gen_nccl_id_op.cc index 7a5b6b5f429..1e23f38c13a 100644 --- a/paddle/fluid/operators/collective/gen_nccl_id_op.cc +++ b/paddle/fluid/operators/collective/gen_nccl_id_op.cc @@ -70,9 +70,10 @@ class GenNCCLIdOp : public framework::OperatorBase { int trainer_id = Attr("trainer_id"); std::string endpoint = trainers[trainer_id]; - PADDLE_ENFORCE_GE(trainer_id, 0, platform::errors::InvalidArgument( - "trainer_id %d is less than 0. Its " - "valid range is [0, trainer_size)")); + PADDLE_ENFORCE_GE( + trainer_id, 0, + platform::errors::InvalidArgument("trainer_id %d is less than 0. Its " + "valid range is [0, trainer_size)")); PADDLE_ENFORCE_LT( trainer_id, static_cast(trainers.size()), platform::errors::OutOfRange("trainer_id %d is out of range. Its valid " diff --git a/paddle/fluid/operators/collective/partial_allgather_op.cc b/paddle/fluid/operators/collective/partial_allgather_op.cc index bef2ff94d63..6783d2f0b45 100644 --- a/paddle/fluid/operators/collective/partial_allgather_op.cc +++ b/paddle/fluid/operators/collective/partial_allgather_op.cc @@ -26,8 +26,9 @@ class PartialAllGatherOp : public framework::OperatorWithKernel { int nranks = ctx->Attrs().Get("nranks"); int rank = ctx->Attrs().Get("rank"); - PADDLE_ENFORCE_GE(nranks, 2, platform::errors::InvalidArgument( - "The value of nranks should be >=2.")); + PADDLE_ENFORCE_GE(nranks, 2, + platform::errors::InvalidArgument( + "The value of nranks should be >=2.")); PADDLE_ENFORCE_EQ( (rank >= 0 && rank < nranks), true, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/collective/partial_allgather_op_npu.cc b/paddle/fluid/operators/collective/partial_allgather_op_npu.cc index 0314bb7d5de..c727161d101 100644 --- a/paddle/fluid/operators/collective/partial_allgather_op_npu.cc +++ b/paddle/fluid/operators/collective/partial_allgather_op_npu.cc @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/collective/partial_allgather_op.h" #include +#include "paddle/fluid/operators/collective/partial_allgather_op.h" #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/device/npu/hccl_helper.h" diff --git a/paddle/fluid/operators/collective/partial_recv_op.cc b/paddle/fluid/operators/collective/partial_recv_op.cc index 99b2169180c..df59f49cb3a 100644 --- a/paddle/fluid/operators/collective/partial_recv_op.cc +++ b/paddle/fluid/operators/collective/partial_recv_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/collective/partial_recv_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/collective/partial_recv_op_npu.cc b/paddle/fluid/operators/collective/partial_recv_op_npu.cc index f14ce5f81f9..4704ab7683c 100644 --- a/paddle/fluid/operators/collective/partial_recv_op_npu.cc +++ b/paddle/fluid/operators/collective/partial_recv_op_npu.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/collective/partial_recv_op.h" - #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/device/npu/hccl_helper.h" @@ -55,8 +54,9 @@ class PartialRecvOpASCENDKernel : public framework::OpKernel { int nranks = comm->nranks(); int peer = ctx.Attr("peer"); - PADDLE_ENFORCE_EQ(nranks, 2, platform::errors::InvalidArgument( - "The nranks must be 2, but (%d)", nranks)); + PADDLE_ENFORCE_EQ(nranks, 2, + platform::errors::InvalidArgument( + "The nranks must be 2, but (%d)", nranks)); int root = peer; diff --git a/paddle/fluid/operators/collective/partial_send_op_npu.cc b/paddle/fluid/operators/collective/partial_send_op_npu.cc index 31c74fcc196..8f53bd8fc5f 100644 --- a/paddle/fluid/operators/collective/partial_send_op_npu.cc +++ b/paddle/fluid/operators/collective/partial_send_op_npu.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/collective/send_v2_op.h" - #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/device/npu/hccl_helper.h" @@ -52,8 +51,9 @@ class PartialSendOpASCENDKernel : public framework::OpKernel { int nranks = comm->nranks(); int rank = comm->rank(); - PADDLE_ENFORCE_EQ(nranks, 2, platform::errors::InvalidArgument( - "The nranks must be 2, but (%d)", nranks)); + PADDLE_ENFORCE_EQ(nranks, 2, + platform::errors::InvalidArgument( + "The nranks must be 2, but (%d)", nranks)); int root = rank; diff --git a/paddle/fluid/operators/collective/recv_v2_op.cc b/paddle/fluid/operators/collective/recv_v2_op.cc index 494665544f0..15da47e713b 100644 --- a/paddle/fluid/operators/collective/recv_v2_op.cc +++ b/paddle/fluid/operators/collective/recv_v2_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/collective/recv_v2_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/collective/recv_v2_op_npu.cc b/paddle/fluid/operators/collective/recv_v2_op_npu.cc index c31f1210f04..9aa1ab78869 100644 --- a/paddle/fluid/operators/collective/recv_v2_op_npu.cc +++ b/paddle/fluid/operators/collective/recv_v2_op_npu.cc @@ -61,8 +61,9 @@ class CRecvOpASCENDKernel : public framework::OpKernel { int nranks = comm->nranks(); int peer = ctx.Attr("peer"); - PADDLE_ENFORCE_EQ(nranks, 2, platform::errors::InvalidArgument( - "The nranks must be 2, but (%d)", nranks)); + PADDLE_ENFORCE_EQ(nranks, 2, + platform::errors::InvalidArgument( + "The nranks must be 2, but (%d)", nranks)); int root = peer; diff --git a/paddle/fluid/operators/collective/recv_v2_op_npu_test.cc b/paddle/fluid/operators/collective/recv_v2_op_npu_test.cc index 6e02d362156..0022b6bf39d 100644 --- a/paddle/fluid/operators/collective/recv_v2_op_npu_test.cc +++ b/paddle/fluid/operators/collective/recv_v2_op_npu_test.cc @@ -17,20 +17,19 @@ limitations under the License. */ #endif #include + #include #include // NOLINT #include #include "gtest/gtest.h" - #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/phi/kernels/funcs/math_function.h" - #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" #include "paddle/fluid/operators/collective/recv_v2_op.h" +#include "paddle/fluid/string/printf.h" +#include "paddle/phi/kernels/funcs/math_function.h" #if defined(PADDLE_WITH_ASCEND_CL) #include "paddle/fluid/platform/collective_helper.h" diff --git a/paddle/fluid/operators/collective/send_v2_op_npu.cc b/paddle/fluid/operators/collective/send_v2_op_npu.cc index 882630467a0..ee34026cb28 100644 --- a/paddle/fluid/operators/collective/send_v2_op_npu.cc +++ b/paddle/fluid/operators/collective/send_v2_op_npu.cc @@ -60,8 +60,9 @@ class CSendOpASCENDKernel : public framework::OpKernel { int nranks = comm->nranks(); int rank = comm->rank(); - PADDLE_ENFORCE_EQ(nranks, 2, platform::errors::InvalidArgument( - "The nranks must be 2, but (%d)", nranks)); + PADDLE_ENFORCE_EQ(nranks, 2, + platform::errors::InvalidArgument( + "The nranks must be 2, but (%d)", nranks)); int root = rank; diff --git a/paddle/fluid/operators/collective/send_v2_op_npu_test.cc b/paddle/fluid/operators/collective/send_v2_op_npu_test.cc index 57e3dd53cc7..9784e6ddc15 100644 --- a/paddle/fluid/operators/collective/send_v2_op_npu_test.cc +++ b/paddle/fluid/operators/collective/send_v2_op_npu_test.cc @@ -17,19 +17,19 @@ limitations under the License. */ #endif #include + #include #include // NOLINT #include -#include "gtest/gtest.h" +#include "gtest/gtest.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/string/printf.h" -#include "paddle/phi/kernels/funcs/math_function.h" - #include "paddle/fluid/operators/collective/gen_hccl_id_op_helper.h" #include "paddle/fluid/operators/collective/send_v2_op.h" +#include "paddle/fluid/string/printf.h" +#include "paddle/phi/kernels/funcs/math_function.h" #if defined(PADDLE_WITH_ASCEND_CL) #include "paddle/fluid/platform/collective_helper.h" diff --git a/paddle/fluid/operators/common_infer_shape_functions.cc b/paddle/fluid/operators/common_infer_shape_functions.cc index 1d187451c68..8bd60c77c46 100644 --- a/paddle/fluid/operators/common_infer_shape_functions.cc +++ b/paddle/fluid/operators/common_infer_shape_functions.cc @@ -61,12 +61,13 @@ inline void GetBroadcastDimsArrays(const framework::DDim &x_dims, PADDLE_ENFORCE_EQ( x_dims_array[i] == y_dims_array[i] || x_dims_array[i] <= 1 || y_dims_array[i] <= 1, - true, platform::errors::InvalidArgument( - "Broadcast dimension mismatch. Operands could " - "not be broadcast together with the shape of X = [%s] and " - "the shape of Y = [%s]. Received [%d] in X is not equal to " - "[%d] in Y at i:%d.", - x_dims, y_dims, x_dims_array[i], y_dims_array[i], i)); + true, + platform::errors::InvalidArgument( + "Broadcast dimension mismatch. Operands could " + "not be broadcast together with the shape of X = [%s] and " + "the shape of Y = [%s]. Received [%d] in X is not equal to " + "[%d] in Y at i:%d.", + x_dims, y_dims, x_dims_array[i], y_dims_array[i], i)); if ((x_dims_array[i] > 1 || y_dims_array[i] > 1) || (x_dims_array[i] == 1 && y_dims_array[i] == 1)) { out_dims_array[i] = std::max(x_dims_array[i], y_dims_array[i]); diff --git a/paddle/fluid/operators/complex_op.cc b/paddle/fluid/operators/complex_op.cc index 7241c92258e..d358f5765f9 100644 --- a/paddle/fluid/operators/complex_op.cc +++ b/paddle/fluid/operators/complex_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/complex_op.h" #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" diff --git a/paddle/fluid/operators/complex_view_op.cc b/paddle/fluid/operators/complex_view_op.cc index 763f936ec9c..92b48fe8b06 100644 --- a/paddle/fluid/operators/complex_view_op.cc +++ b/paddle/fluid/operators/complex_view_op.cc @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/complex_view_op.cu b/paddle/fluid/operators/complex_view_op.cu index 261881cb8d2..b62c0470dd6 100644 --- a/paddle/fluid/operators/complex_view_op.cu +++ b/paddle/fluid/operators/complex_view_op.cu @@ -12,9 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/operators/complex_view_op.h" - #include "paddle/fluid/framework/data_type.h" +#include "paddle/fluid/operators/complex_view_op.h" #include "paddle/fluid/platform/enforce.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/concat_op.cc b/paddle/fluid/operators/concat_op.cc index a467f2dbee7..599fbcce39f 100644 --- a/paddle/fluid/operators/concat_op.cc +++ b/paddle/fluid/operators/concat_op.cc @@ -15,11 +15,12 @@ limitations under the License. */ #include "paddle/fluid/operators/concat_op.h" #include + #include #include #include -#include "paddle/fluid/framework/infershape_utils.h" +#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/phi/infermeta/multiary.h" #include "paddle/phi/kernels/funcs/concat_funcs.h" diff --git a/paddle/fluid/operators/concat_op.h b/paddle/fluid/operators/concat_op.h index 50aca54c12d..746e0e7a056 100644 --- a/paddle/fluid/operators/concat_op.h +++ b/paddle/fluid/operators/concat_op.h @@ -17,11 +17,11 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/concat_and_split.h" #include "paddle/fluid/operators/strided_memcpy.h" #include "paddle/fluid/operators/utils.h" - #include "paddle/phi/kernels/concat_kernel.h" #include "paddle/phi/kernels/funcs/concat_funcs.h" diff --git a/paddle/fluid/operators/concat_op_mlu.cc b/paddle/fluid/operators/concat_op_mlu.cc index e8f6b2dc869..3d927af96e1 100644 --- a/paddle/fluid/operators/concat_op_mlu.cc +++ b/paddle/fluid/operators/concat_op_mlu.cc @@ -99,10 +99,11 @@ class ConcatGradMLUKernel : public framework::OpKernel { axis = ComputeAxis(static_cast(axis), static_cast(ins[0]->dims().size())); - PADDLE_ENFORCE_GE(axis, 0, platform::errors::InvalidArgument( - "concat_grad: axis should be larger than or " - "equal to 0, but received axis is %d.", - axis)); + PADDLE_ENFORCE_GE(axis, 0, + platform::errors::InvalidArgument( + "concat_grad: axis should be larger than or " + "equal to 0, but received axis is %d.", + axis)); PADDLE_ENFORCE_LT( axis, out_grad->dims().size(), platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/concat_op_xpu.cc b/paddle/fluid/operators/concat_op_xpu.cc index ba35098bbac..fcbfc6f7a2b 100644 --- a/paddle/fluid/operators/concat_op_xpu.cc +++ b/paddle/fluid/operators/concat_op_xpu.cc @@ -12,12 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/concat_op.h" #include #include #include -#include "paddle/fluid/platform/device/xpu/xpu_header.h" +#include "paddle/fluid/operators/concat_op.h" +#include "paddle/fluid/platform/device/xpu/xpu_header.h" #include "paddle/phi/core/lod_utils.h" namespace paddle { @@ -33,17 +33,19 @@ class ConcatXPUKernel : public framework::OpKernel { auto ins = ctx.MultiInput("X"); framework::LoDTensor* out = ctx.Output("Out"); int axis = ctx.Attr("axis"); - PADDLE_ENFORCE_NE(ins[0], nullptr, platform::errors::InvalidArgument( - "The input should not be null.")); + PADDLE_ENFORCE_NE( + ins[0], nullptr, + platform::errors::InvalidArgument("The input should not be null.")); PADDLE_ENFORCE_NE(ctx.HasInput("AxisTensor"), true, platform::errors::InvalidArgument( "XPU donot surpport AxisTensor for now")); axis = ComputeAxis(static_cast(axis), static_cast(ins[0]->dims().size())); - PADDLE_ENFORCE_GE(axis, 0, platform::errors::InvalidArgument( - "concat: axis should be larger than or " - "equal to 0, but received axis is %d.", - axis)); + PADDLE_ENFORCE_GE(axis, 0, + platform::errors::InvalidArgument( + "concat: axis should be larger than or " + "equal to 0, but received axis is %d.", + axis)); PADDLE_ENFORCE_LT(axis, ins[0]->dims().size(), platform::errors::InvalidArgument( "concat: axis should be less than ins[0]->dims()!" @@ -94,8 +96,9 @@ class ConcatXPUKernel : public framework::OpKernel { } } - PADDLE_ENFORCE_GT(xdims_list.size(), 0, platform::errors::InvalidArgument( - "No tensor need concat")); + PADDLE_ENFORCE_GT( + xdims_list.size(), 0, + platform::errors::InvalidArgument("No tensor need concat")); auto& dev_ctx = ctx.template device_context(); int r = xpu::concat(dev_ctx.x_context(), ptrs, @@ -129,8 +132,9 @@ class ConcatGradXPUKernel : public framework::OpKernel { } } } - PADDLE_ENFORCE_NE(ins[0], nullptr, platform::errors::InvalidArgument( - "The input should not be null.")); + PADDLE_ENFORCE_NE( + ins[0], nullptr, + platform::errors::InvalidArgument("The input should not be null.")); auto axis = ctx.Attr("axis"); if (ctx.HasInput("AxisTensor")) { auto* axis_tensor = ctx.Input("AxisTensor"); @@ -149,10 +153,11 @@ class ConcatGradXPUKernel : public framework::OpKernel { ptrs[j] = nullptr; } } - PADDLE_ENFORCE_GE(axis, 0, platform::errors::InvalidArgument( - "concat_grad: axis should be larger than or " - "equal to 0, but received axis is %d.", - axis)); + PADDLE_ENFORCE_GE(axis, 0, + platform::errors::InvalidArgument( + "concat_grad: axis should be larger than or " + "equal to 0, but received axis is %d.", + axis)); PADDLE_ENFORCE_LT( axis, out_grad->dims().size(), platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/conj_op.cc b/paddle/fluid/operators/conj_op.cc index cbec1182f20..0c294b60482 100644 --- a/paddle/fluid/operators/conj_op.cc +++ b/paddle/fluid/operators/conj_op.cc @@ -74,8 +74,9 @@ REGISTER_OPERATOR(conj, ops::ConjOp, ops::ConjOpMaker, ConjInferShapeFunctor); REGISTER_OP_CPU_KERNEL( - conj, ops::ConjKernel>, + conj, + ops::ConjKernel>, ops::ConjKernel>, ops::ConjKernel, diff --git a/paddle/fluid/operators/conj_op.cu b/paddle/fluid/operators/conj_op.cu index d04024d70a8..548508636ca 100644 --- a/paddle/fluid/operators/conj_op.cu +++ b/paddle/fluid/operators/conj_op.cu @@ -17,8 +17,9 @@ namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL( - conj, ops::ConjKernel>, + conj, + ops::ConjKernel>, ops::ConjKernel>, ops::ConjKernel, diff --git a/paddle/fluid/operators/controlflow/bitwise_op.cc b/paddle/fluid/operators/controlflow/bitwise_op.cc index 4dcbbc8568f..19865f9a9fb 100644 --- a/paddle/fluid/operators/controlflow/bitwise_op.cc +++ b/paddle/fluid/operators/controlflow/bitwise_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" diff --git a/paddle/fluid/operators/controlflow/compare_op.cc b/paddle/fluid/operators/controlflow/compare_op.cc index 72d81d8c3fd..21fc69eb019 100644 --- a/paddle/fluid/operators/controlflow/compare_op.cc +++ b/paddle/fluid/operators/controlflow/compare_op.cc @@ -80,14 +80,12 @@ class CompareOp : public framework::OperatorWithKernel { } // namespace operators } // namespace paddle -#define REGISTER_COMPARE_OP_VERSION(op_type) \ - REGISTER_OP_VERSION(op_type) \ - .AddCheckpoint( \ - R"ROC(Upgrade compare ops, add a new attribute [force_cpu])ROC", \ - paddle::framework::compatible::OpVersionDesc().ModifyAttr( \ - "force_cpu", \ - "In order to force fill output variable to gpu memory.", \ - false)); +#define REGISTER_COMPARE_OP_VERSION(op_type) \ + REGISTER_OP_VERSION(op_type).AddCheckpoint( \ + R"ROC(Upgrade compare ops, add a new attribute [force_cpu])ROC", \ + paddle::framework::compatible::OpVersionDesc().ModifyAttr( \ + "force_cpu", \ + "In order to force fill output variable to gpu memory.", false)); #define REGISTER_COMPARE_OP(op_type, _equation) \ struct _##op_type##Comment { \ diff --git a/paddle/fluid/operators/controlflow/conditional_block_op.h b/paddle/fluid/operators/controlflow/conditional_block_op.h index c024e4a12cd..c1d13ffdf12 100644 --- a/paddle/fluid/operators/controlflow/conditional_block_op.h +++ b/paddle/fluid/operators/controlflow/conditional_block_op.h @@ -68,10 +68,11 @@ class ConditionalOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ(framework::TransToProtoVarType(ips[0]->dtype()) == framework::proto::VarType::BOOL && ips[0]->numel() == 1, - true, platform::errors::InvalidArgument( - "condition input's data type should be bool, " - "numel should be 1, actual numel is %d", - ips[0]->numel())); + true, + platform::errors::InvalidArgument( + "condition input's data type should be bool, " + "numel should be 1, actual numel is %d", + ips[0]->numel())); bool res = false; if (platform::is_gpu_place(ips[0]->place())) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) diff --git a/paddle/fluid/operators/controlflow/fetch_op.cc b/paddle/fluid/operators/controlflow/fetch_op.cc index 111ca9c63c6..369a1ffedc4 100644 --- a/paddle/fluid/operators/controlflow/fetch_op.cc +++ b/paddle/fluid/operators/controlflow/fetch_op.cc @@ -35,10 +35,11 @@ static void DataCopy(const framework::LoDTensor &src_item, // as params are not a subject to paddle's data_format VLOG(4) << "innerTransDataLayoutFromMKLDNN"; framework::innerTransDataLayoutFromMKLDNN( - src_item.layout(), fetch_var_name == framework::GradVarName("Filter") - ? framework::DataLayout::kNCHW - : paddle::platform::MKLDNNDeviceContext::tls() - .get_cur_paddle_data_layout(), + src_item.layout(), + fetch_var_name == framework::GradVarName("Filter") + ? framework::DataLayout::kNCHW + : paddle::platform::MKLDNNDeviceContext::tls() + .get_cur_paddle_data_layout(), src_item, &out, platform::CPUPlace()); paddle::framework::TensorCopySync(out, platform::CPUPlace(), dst_item); } else { @@ -92,11 +93,12 @@ class FetchOp : public framework::OperatorBase { int col = Attr("col"); PADDLE_ENFORCE_GE( - col, 0, platform::errors::InvalidArgument( - "Expected the column index (the attribute 'col' of " - "operator 'Fetch') of current fetching variable to be " - "no less than 0. But received column index = %d.", - col)); + col, 0, + platform::errors::InvalidArgument( + "Expected the column index (the attribute 'col' of " + "operator 'Fetch') of current fetching variable to be " + "no less than 0. But received column index = %d.", + col)); VLOG(3) << "Fetch variable " << fetch_var_name << " to variable " << out_name << "'s " << col << " column."; diff --git a/paddle/fluid/operators/controlflow/fetch_v2_op.cc b/paddle/fluid/operators/controlflow/fetch_v2_op.cc index caa67139a9b..29d6eb1b2d4 100644 --- a/paddle/fluid/operators/controlflow/fetch_v2_op.cc +++ b/paddle/fluid/operators/controlflow/fetch_v2_op.cc @@ -42,10 +42,11 @@ static void DeepCopy(const framework::LoDTensor &src_item, // Convert to desired Paddle layout, apart from grads of filter // as params are not a subject to paddle's data_format framework::innerTransDataLayoutFromMKLDNN( - src_item.layout(), fetch_var_name == framework::GradVarName("Filter") - ? framework::DataLayout::kNCHW - : paddle::platform::MKLDNNDeviceContext::tls() - .get_cur_paddle_data_layout(), + src_item.layout(), + fetch_var_name == framework::GradVarName("Filter") + ? framework::DataLayout::kNCHW + : paddle::platform::MKLDNNDeviceContext::tls() + .get_cur_paddle_data_layout(), src_item, &out, platform::CPUPlace()); paddle::framework::TensorCopySync(out, platform::CPUPlace(), dst_item); } else { @@ -123,11 +124,12 @@ class FetchV2Kernel { int col = ctx.Attr("col"); PADDLE_ENFORCE_GE( - col, 0, platform::errors::InvalidArgument( - "Expected the column index (the attribute 'col' of " - "operator 'Fetch') of current fetching variable to be " - "no less than 0. But received column index = %d.", - col)); + col, 0, + platform::errors::InvalidArgument( + "Expected the column index (the attribute 'col' of " + "operator 'Fetch') of current fetching variable to be " + "no less than 0. But received column index = %d.", + col)); auto *fetch_list = out_var->GetMutable(); diff --git a/paddle/fluid/operators/controlflow/get_places_op.cc b/paddle/fluid/operators/controlflow/get_places_op.cc index 55bd4879ab7..7f3b0040041 100644 --- a/paddle/fluid/operators/controlflow/get_places_op.cc +++ b/paddle/fluid/operators/controlflow/get_places_op.cc @@ -62,9 +62,10 @@ class GetPlacesOp : public framework::OperatorBase { device_count = is_gpu ? CUDADevCount() : std::thread::hardware_concurrency(); } - PADDLE_ENFORCE_NE(device_count, 0UL, platform::errors::InvalidArgument( - "Cannot indicate %s device count", - is_gpu ? "GPU" : "CPU")); + PADDLE_ENFORCE_NE( + device_count, 0UL, + platform::errors::InvalidArgument("Cannot indicate %s device count", + is_gpu ? "GPU" : "CPU")); auto out_var_name = Output("Out"); auto &places = *(GET_DATA_SAFELY(scope.FindVar(out_var_name), "Output", diff --git a/paddle/fluid/operators/controlflow/logical_op.cc b/paddle/fluid/operators/controlflow/logical_op.cc index 4d11cb5ff74..a9c28f48ef7 100644 --- a/paddle/fluid/operators/controlflow/logical_op.cc +++ b/paddle/fluid/operators/controlflow/logical_op.cc @@ -12,6 +12,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" diff --git a/paddle/fluid/operators/controlflow/op_variant.h b/paddle/fluid/operators/controlflow/op_variant.h index cc1f36a875f..57d44b67939 100644 --- a/paddle/fluid/operators/controlflow/op_variant.h +++ b/paddle/fluid/operators/controlflow/op_variant.h @@ -50,8 +50,9 @@ class OpVariant { const AttrType &Attr(const std::string &name) const { auto &attrs = Attrs(); auto it = attrs.find(name); - PADDLE_ENFORCE_NE(it, attrs.end(), platform::errors::NotFound( - "Cannot find attribute %s.", name)); + PADDLE_ENFORCE_NE( + it, attrs.end(), + platform::errors::NotFound("Cannot find attribute %s.", name)); return BOOST_GET_CONST(AttrType, it->second); } diff --git a/paddle/fluid/operators/controlflow/recurrent_op_helper.cc b/paddle/fluid/operators/controlflow/recurrent_op_helper.cc index 43913cae6b3..62cd2fc3376 100644 --- a/paddle/fluid/operators/controlflow/recurrent_op_helper.cc +++ b/paddle/fluid/operators/controlflow/recurrent_op_helper.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/controlflow/recurrent_op_helper.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/controlflow/while_op.cc b/paddle/fluid/operators/controlflow/while_op.cc index d8daa25f31b..a551bad8eb1 100644 --- a/paddle/fluid/operators/controlflow/while_op.cc +++ b/paddle/fluid/operators/controlflow/while_op.cc @@ -45,7 +45,7 @@ static std::string GetSkipEagerDeletionVarsDebugString( } return str; } -} // NOLINT +} // namespace class WhileOp : public framework::OperatorBase { public: @@ -375,10 +375,11 @@ class WhileGradOp : public framework::OperatorBase { PADDLE_ENFORCE_EQ( var->IsType() || var->IsType(), - true, platform::errors::InvalidArgument( - "Currently the type of var only can be LoDTensorArray, " - "or LoDTensor, but the received var[%s] is %s.", - inside_grad_name, framework::ToTypeName(var->Type()))); + true, + platform::errors::InvalidArgument( + "Currently the type of var only can be LoDTensorArray, " + "or LoDTensor, but the received var[%s] is %s.", + inside_grad_name, framework::ToTypeName(var->Type()))); if ((var_iter == outside_og_names.end()) && var->IsType()) { diff --git a/paddle/fluid/operators/controlflow/while_op_helper.cc b/paddle/fluid/operators/controlflow/while_op_helper.cc index 63b273fdbb8..2b2001be6bf 100644 --- a/paddle/fluid/operators/controlflow/while_op_helper.cc +++ b/paddle/fluid/operators/controlflow/while_op_helper.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/operators/controlflow/while_op_helper.h" #include + #include "paddle/fluid/string/string_helper.h" namespace paddle { diff --git a/paddle/fluid/operators/conv_base_helper.h b/paddle/fluid/operators/conv_base_helper.h index 9e1a323fc9f..f141c9eb087 100644 --- a/paddle/fluid/operators/conv_base_helper.h +++ b/paddle/fluid/operators/conv_base_helper.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/conv_search_cache.h" #include "paddle/fluid/operators/conv_cudnn_op_cache.h" #include "paddle/phi/backends/gpu/gpu_context.h" diff --git a/paddle/fluid/operators/conv_cudnn_op_cache.h b/paddle/fluid/operators/conv_cudnn_op_cache.h index af67d857e0e..3d704c8be30 100644 --- a/paddle/fluid/operators/conv_cudnn_op_cache.h +++ b/paddle/fluid/operators/conv_cudnn_op_cache.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc index f084862b419..28ca2feeec5 100644 --- a/paddle/fluid/operators/conv_op.cc +++ b/paddle/fluid/operators/conv_op.cc @@ -19,15 +19,13 @@ limitations under the License. */ #include #include "paddle/fluid/framework/op_version_registry.h" - #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif -#include "paddle/fluid/platform/cudnn_workspace_helper.h" - #include "paddle/fluid/framework/infershape_utils.h" +#include "paddle/fluid/platform/cudnn_workspace_helper.h" #include "paddle/phi/infermeta/binary.h" namespace paddle { @@ -864,16 +862,15 @@ REGISTER_OPERATOR(conv3d_grad, ops::ConvOpGrad, ops::Conv3DDoubleGradMaker); REGISTER_OPERATOR(conv3d_grad_grad, ops::ConvOpDoubleGrad); -REGISTER_OP_VERSION(conv2d) - .AddCheckpoint( - R"ROC( +REGISTER_OP_VERSION(conv2d).AddCheckpoint( + R"ROC( Upgrade conv2d, add a new attribute [use_addto]. )ROC", - paddle::framework::compatible::OpVersionDesc().NewAttr( - "use_addto", - "In order to support new feature (inplace addto strategy) for " - "gradient accumulation.", - false)); + paddle::framework::compatible::OpVersionDesc().NewAttr( + "use_addto", + "In order to support new feature (inplace addto strategy) for " + "gradient accumulation.", + false)); REGISTER_OP_VERSION(depthwise_conv2d) .AddCheckpoint( @@ -886,13 +883,12 @@ REGISTER_OP_VERSION(depthwise_conv2d) "gradient accumulation.", false)); -REGISTER_OP_VERSION(conv3d) - .AddCheckpoint( - R"ROC( +REGISTER_OP_VERSION(conv3d).AddCheckpoint( + R"ROC( Upgrade conv3d, add a new attribute [use_addto]. )ROC", - paddle::framework::compatible::OpVersionDesc().NewAttr( - "use_addto", - "In order to support new feature (inplace addto strategy) for " - "gradient accumulation.", - false)); + paddle::framework::compatible::OpVersionDesc().NewAttr( + "use_addto", + "In order to support new feature (inplace addto strategy) for " + "gradient accumulation.", + false)); diff --git a/paddle/fluid/operators/conv_op.h b/paddle/fluid/operators/conv_op.h index 58f2eeee256..644a827b488 100644 --- a/paddle/fluid/operators/conv_op.h +++ b/paddle/fluid/operators/conv_op.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/layout_utils.h" diff --git a/paddle/fluid/operators/conv_op_npu.cc b/paddle/fluid/operators/conv_op_npu.cc index 3ace825e7b8..15a5aa737ae 100644 --- a/paddle/fluid/operators/conv_op_npu.cc +++ b/paddle/fluid/operators/conv_op_npu.cc @@ -130,12 +130,12 @@ class DepthwiseConvNPUKernel : public framework::OpKernel { "TransposeD", {*filter}, {transformed_filter}, {{"perm", perm}}); runner_trans.Run(stream); - const auto& runner = - NpuOpRunner("DepthwiseConv2D", {input_tensor, transformed_filter}, - {output_tensor}, {{"strides", strides}, - {"dilations", dilations}, - {"pads", padding}, - {"data_format", data_format}}); + const auto& runner = NpuOpRunner( + "DepthwiseConv2D", {input_tensor, transformed_filter}, {output_tensor}, + {{"strides", strides}, + {"dilations", dilations}, + {"pads", padding}, + {"data_format", data_format}}); runner.Run(stream); } }; @@ -392,14 +392,15 @@ class NPUConvGradOpKernel : public framework::OpKernel { filter_grad_fp32.ShareDataWith(*filter_grad); } - const auto& runner = NpuOpRunner( - "Conv2DBackpropFilterD", {input_tensor, output_grad_tensor}, - {filter_grad_fp32}, {{"filter_size", filter_shape_vec}, - {"strides", strides_vec}, - {"pads", paddings}, - {"dilations", dilations_vec}, - {"groups", groups}, - {"data_format", data_format}}); + const auto& runner = + NpuOpRunner("Conv2DBackpropFilterD", + {input_tensor, output_grad_tensor}, {filter_grad_fp32}, + {{"filter_size", filter_shape_vec}, + {"strides", strides_vec}, + {"pads", paddings}, + {"dilations", dilations_vec}, + {"groups", groups}, + {"data_format", data_format}}); runner.Run(stream); if (framework::TransToProtoVarType(input->dtype()) == @@ -418,12 +419,13 @@ class NPUConvGradOpKernel : public framework::OpKernel { } const auto& runner = NpuOpRunner("Conv2DBackpropInputD", {*filter, output_grad_tensor}, - {input_grad_tensor}, {{"input_size", input_shape_vec}, - {"strides", strides_vec}, - {"pads", paddings}, - {"dilations", dilations_vec}, - {"groups", groups}, - {"data_format", data_format}}); + {input_grad_tensor}, + {{"input_size", input_shape_vec}, + {"strides", strides_vec}, + {"pads", paddings}, + {"dilations", dilations_vec}, + {"groups", groups}, + {"data_format", data_format}}); runner.Run(stream); } } @@ -452,11 +454,12 @@ class NPUConv3dKernel : public framework::OpKernel { "= [%s]", data_format)); - PADDLE_ENFORCE_EQ(groups, 1, platform::errors::Unimplemented( - "the groups must be 1 in " - "the npu kernel of conv3d, but got groups " - "= [%d]", - groups)); + PADDLE_ENFORCE_EQ(groups, 1, + platform::errors::Unimplemented( + "the groups must be 1 in " + "the npu kernel of conv3d, but got groups " + "= [%d]", + groups)); output->mutable_data(ctx.GetPlace()); @@ -537,11 +540,12 @@ class NPUConv3dGradKernel : public framework::OpKernel { "= [%s]", data_format)); - PADDLE_ENFORCE_EQ(groups, 1, platform::errors::Unimplemented( - "the groups must be 1 in " - "the npu kernel of conv3d, but got groups " - "= [%d]", - groups)); + PADDLE_ENFORCE_EQ(groups, 1, + platform::errors::Unimplemented( + "the groups must be 1 in " + "the npu kernel of conv3d, but got groups " + "= [%d]", + groups)); auto& dev_ctx = ctx.template device_context(); auto input_tensor = @@ -593,14 +597,15 @@ class NPUConv3dGradKernel : public framework::OpKernel { filter_grad_tensor.ShareDataWith(*filter_grad); filter_grad_tensor.set_layout(DataLayout::kNCDHW); - const auto& runner = NpuOpRunner( - "Conv3DBackpropFilterD", {input_tensor, output_grad_tensor}, - {filter_grad_tensor}, {{"filter_size", filter_shape_vec}, - {"strides", strides_vec}, - {"pads", paddings}, - {"dilations", dilations_vec}, - {"groups", groups}, - {"data_format", data_format}}); + const auto& runner = + NpuOpRunner("Conv3DBackpropFilterD", + {input_tensor, output_grad_tensor}, {filter_grad_tensor}, + {{"filter_size", filter_shape_vec}, + {"strides", strides_vec}, + {"pads", paddings}, + {"dilations", dilations_vec}, + {"groups", groups}, + {"data_format", data_format}}); runner.Run(stream); } @@ -613,14 +618,15 @@ class NPUConv3dGradKernel : public framework::OpKernel { input_grad_tensor.ShareDataWith(*input_grad); input_grad_tensor.set_layout(DataLayout::kNCDHW); - const auto& runner = NpuOpRunner( - "Conv3DBackpropInputD", {filter_tensor, output_grad_tensor}, - {input_grad_tensor}, {{"input_size", input_shape_vec}, - {"strides", strides_vec}, - {"pads", paddings}, - {"dilations", dilations_vec}, - {"groups", groups}, - {"data_format", data_format}}); + const auto& runner = + NpuOpRunner("Conv3DBackpropInputD", + {filter_tensor, output_grad_tensor}, {input_grad_tensor}, + {{"input_size", input_shape_vec}, + {"strides", strides_vec}, + {"pads", paddings}, + {"dilations", dilations_vec}, + {"groups", groups}, + {"data_format", data_format}}); runner.Run(stream); } } diff --git a/paddle/fluid/operators/conv_op_xpu.cc b/paddle/fluid/operators/conv_op_xpu.cc index cc5c20d3928..d66eefc6946 100644 --- a/paddle/fluid/operators/conv_op_xpu.cc +++ b/paddle/fluid/operators/conv_op_xpu.cc @@ -8,10 +8,11 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/conv_op.h" #include #include #include + +#include "paddle/fluid/operators/conv_op.h" #include "paddle/fluid/platform/cudnn_workspace_helper.h" #ifdef PADDLE_WITH_XPU namespace paddle { diff --git a/paddle/fluid/operators/conv_shift_op.cc b/paddle/fluid/operators/conv_shift_op.cc index e7af908eba2..e996021ed84 100644 --- a/paddle/fluid/operators/conv_shift_op.cc +++ b/paddle/fluid/operators/conv_shift_op.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/conv_shift_op.h" + #include + #include "paddle/fluid/framework/eigen.h" namespace paddle { diff --git a/paddle/fluid/operators/conv_transpose_op.cc b/paddle/fluid/operators/conv_transpose_op.cc index fe76fc3aebb..8b60c67f92e 100644 --- a/paddle/fluid/operators/conv_transpose_op.cc +++ b/paddle/fluid/operators/conv_transpose_op.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/conv_transpose_op_npu.cc b/paddle/fluid/operators/conv_transpose_op_npu.cc index 050ede78f72..c07be5a3fdb 100644 --- a/paddle/fluid/operators/conv_transpose_op_npu.cc +++ b/paddle/fluid/operators/conv_transpose_op_npu.cc @@ -12,9 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/conv_transpose_op.h" - #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/conv_transpose_op.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" #include "paddle/phi/kernels/cpu/conv_util.h" @@ -90,9 +89,9 @@ class Conv2DTransposeNPUKernel : public framework::OpKernel { auto output_dim_vec = phi::vectorize(output_tensor.dims()); auto stream = ctx.template device_context().stream(); - const auto& runner = - NpuOpRunner("Conv2DTransposeD", {input_tensor, *filter}, - {output_tensor}, {{"input_size", output_dim_vec}, + const auto& runner = NpuOpRunner("Conv2DTransposeD", + {input_tensor, *filter}, {output_tensor}, + {{"input_size", output_dim_vec}, {"strides", strides}, {"dilations", dilations}, {"output_padding", output_padding}, @@ -167,14 +166,15 @@ class Conv2DTransposeGradNPUKernel : public framework::OpKernel { auto stream = ctx.template device_context().stream(); if (filter_grad) { filter_grad->mutable_data(ctx.GetPlace()); - const auto& runner = NpuOpRunner( - "Conv2DBackpropFilterD", {output_grad_tensor, input_tensor}, - {*filter_grad}, {{"filter_size", phi::vectorize(filter_dims)}, - {"strides", strides_vec}, - {"pads", paddings}, - {"dilations", dilations_vec}, - {"groups", groups}, - {"data_format", data_format}}); + const auto& runner = + NpuOpRunner("Conv2DBackpropFilterD", + {output_grad_tensor, input_tensor}, {*filter_grad}, + {{"filter_size", phi::vectorize(filter_dims)}, + {"strides", strides_vec}, + {"pads", paddings}, + {"dilations", dilations_vec}, + {"groups", groups}, + {"data_format", data_format}}); runner.Run(stream); } if (input_grad) { @@ -184,13 +184,13 @@ class Conv2DTransposeGradNPUKernel : public framework::OpKernel { if (channel_last) { input_grad_tensor.set_layout(DataLayout::kNHWC); } - const auto& runner = - NpuOpRunner("Conv2D", {output_grad_tensor, *filter}, - {input_grad_tensor}, {{"strides", strides_vec}, - {"pads", paddings}, - {"dilations", dilations_vec}, - {"groups", groups}, - {"data_format", data_format}}); + const auto& runner = NpuOpRunner("Conv2D", {output_grad_tensor, *filter}, + {input_grad_tensor}, + {{"strides", strides_vec}, + {"pads", paddings}, + {"dilations", dilations_vec}, + {"groups", groups}, + {"data_format", data_format}}); runner.Run(stream); } } diff --git a/paddle/fluid/operators/conv_transpose_op_xpu.cc b/paddle/fluid/operators/conv_transpose_op_xpu.cc index b8bd3c4f006..ae25c57784f 100644 --- a/paddle/fluid/operators/conv_transpose_op_xpu.cc +++ b/paddle/fluid/operators/conv_transpose_op_xpu.cc @@ -9,12 +9,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/conv_transpose_op.h" - #include #include #include + #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/conv_transpose_op.h" #include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/phi/kernels/cpu/conv_util.h" diff --git a/paddle/fluid/operators/correlation_op.cc b/paddle/fluid/operators/correlation_op.cc index 62e0f311d15..21258958549 100644 --- a/paddle/fluid/operators/correlation_op.cc +++ b/paddle/fluid/operators/correlation_op.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/correlation_op.cu b/paddle/fluid/operators/correlation_op.cu index f488cc12e64..f9dd9ab98a3 100644 --- a/paddle/fluid/operators/correlation_op.cu +++ b/paddle/fluid/operators/correlation_op.cu @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" #ifdef __HIPCC__ @@ -227,11 +228,11 @@ class CorrelationCUDAKernel : public framework::OpKernel { dim3 threadsPerBlock(THREADS_PER_BLOCK); dim3 totalBlocksCorr(N, OH, OW); - correlation_forward< - T><<>>( - output->data(), OC, OH, OW, rinput1.data(), C, H, W, - rinput2.data(), pad_size, kernel_size, max_displacement, stride1, - stride2); + correlation_forward + <<>>( + output->data(), OC, OH, OW, rinput1.data(), C, H, W, + rinput2.data(), pad_size, kernel_size, max_displacement, stride1, + stride2); } }; @@ -472,19 +473,19 @@ class CorrelationCUDAGradKernel : public framework::OpKernel { dim3 totalBlocksCorr(H, W, C); for (int n = 0; n < N; n++) { - correlation_backward_input1< - T><<>>( - n, grad_input1->data(), C, H, W, grad_output->data(), GOC, GOH, - GOW, rinput2.data(), pad_size, kernel_size, max_displacement, - stride1, stride2); + correlation_backward_input1 + <<>>( + n, grad_input1->data(), C, H, W, grad_output->data(), GOC, + GOH, GOW, rinput2.data(), pad_size, kernel_size, + max_displacement, stride1, stride2); } for (int n = 0; n < N; n++) { - correlation_backward_input2< - T><<>>( - n, grad_input2->data(), C, H, W, grad_output->data(), GOC, GOH, - GOW, rinput1.data(), pad_size, kernel_size, max_displacement, - stride1, stride2); + correlation_backward_input2 + <<>>( + n, grad_input2->data(), C, H, W, grad_output->data(), GOC, + GOH, GOW, rinput1.data(), pad_size, kernel_size, + max_displacement, stride1, stride2); } } }; diff --git a/paddle/fluid/operators/cos_sim_op.cc b/paddle/fluid/operators/cos_sim_op.cc index d41ceafba1a..4c0c5596e5d 100644 --- a/paddle/fluid/operators/cos_sim_op.cc +++ b/paddle/fluid/operators/cos_sim_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/cos_sim_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/crf_decoding_op.cc b/paddle/fluid/operators/crf_decoding_op.cc index 6d3e6e34c3b..fa080b7a4b4 100644 --- a/paddle/fluid/operators/crf_decoding_op.cc +++ b/paddle/fluid/operators/crf_decoding_op.cc @@ -158,11 +158,12 @@ class CRFDecodingOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( (label_dims.size() == 2UL && label_dims[1] == 1) || label_dims.size() == 1UL, - true, platform::errors::InvalidArgument( - "The Input(Label) should be a 2-D tensor with last " - "dimension fixed to 1 or a 1-D tensor. But received: " - "input rank %u, input shape [%s].", - label_dims.size(), label_dims)); + true, + platform::errors::InvalidArgument( + "The Input(Label) should be a 2-D tensor with last " + "dimension fixed to 1 or a 1-D tensor. But received: " + "input rank %u, input shape [%s].", + label_dims.size(), label_dims)); } if (ctx->IsRuntime() || (emission_dims[0] > 0 && label_dims[0] > 0)) { PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/operators/crf_decoding_op.h b/paddle/fluid/operators/crf_decoding_op.h index 6b11ff69c30..8b40abf3deb 100644 --- a/paddle/fluid/operators/crf_decoding_op.h +++ b/paddle/fluid/operators/crf_decoding_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/jit/kernels.h" @@ -22,8 +23,8 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::LoDTensor; using framework::LoD; +using framework::LoDTensor; using framework::Tensor; template diff --git a/paddle/fluid/operators/crop_op.cc b/paddle/fluid/operators/crop_op.cc index 9de5bc6ea36..2e0a054fa12 100644 --- a/paddle/fluid/operators/crop_op.cc +++ b/paddle/fluid/operators/crop_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/crop_op.h" + #include #include #include diff --git a/paddle/fluid/operators/crop_op.h b/paddle/fluid/operators/crop_op.h index 5ac28fafb09..49e1d6ab584 100644 --- a/paddle/fluid/operators/crop_op.h +++ b/paddle/fluid/operators/crop_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/eigen/eigen_function.h" @@ -171,17 +172,19 @@ class CropGradKernel : public framework::OpKernel { size_t rank = context.Input(framework::GradVarName("Out"))->dims().size(); PADDLE_ENFORCE_GE( - rank, 1, platform::errors::InvalidArgument( - "The number of dimensions of the input 'Out@GRAD' for " - "CropGrad must be greater than or equal " - "to 1, but the value received is %d.", - rank)); + rank, 1, + platform::errors::InvalidArgument( + "The number of dimensions of the input 'Out@GRAD' for " + "CropGrad must be greater than or equal " + "to 1, but the value received is %d.", + rank)); PADDLE_ENFORCE_LE( - rank, 6, platform::errors::InvalidArgument( - "The number of dimensions of the input 'Out@GRAD' for " - "CropGrad must be less than or equal " - "to 6, but the value received is %d.", - rank)); + rank, 6, + platform::errors::InvalidArgument( + "The number of dimensions of the input 'Out@GRAD' for " + "CropGrad must be less than or equal " + "to 6, but the value received is %d.", + rank)); switch (rank) { case 1: CropGradFunction(context); diff --git a/paddle/fluid/operators/crop_tensor_op.cc b/paddle/fluid/operators/crop_tensor_op.cc index 0e53bbb5d18..a9a94e2c948 100644 --- a/paddle/fluid/operators/crop_tensor_op.cc +++ b/paddle/fluid/operators/crop_tensor_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/crop_tensor_op.h" + #include #include #include diff --git a/paddle/fluid/operators/crop_tensor_op.h b/paddle/fluid/operators/crop_tensor_op.h index 409458037a2..851d007896d 100644 --- a/paddle/fluid/operators/crop_tensor_op.h +++ b/paddle/fluid/operators/crop_tensor_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/eigen/eigen_function.h" @@ -72,11 +73,12 @@ static framework::DDim ValidateShape(const std::vector shape, "The value (%d) of the %uth element for shape of " "Op(crop_tensor) should not be zero.", shape[i], i)); - PADDLE_ENFORCE_EQ(shape[i], -1, platform::errors::InvalidArgument( - "When the value (%d) of the %uth " - "element for shape of Op(crop_tensor)" - " is negative, only -1 is supported.", - shape[i], i)); + PADDLE_ENFORCE_EQ(shape[i], -1, + platform::errors::InvalidArgument( + "When the value (%d) of the %uth " + "element for shape of Op(crop_tensor)" + " is negative, only -1 is supported.", + shape[i], i)); output_shape[i] = in_dims[i] - offsets[i]; } else { output_shape[i] = static_cast(shape[i]); @@ -226,11 +228,12 @@ class CropTensorKernel : public framework::OpKernel { "value received is %d.", rank)); PADDLE_ENFORCE_LE( - rank, 6, platform::errors::InvalidArgument( - "The number of dimensions of the input 'x' for " - "Op(crop_tensor) must be less than or equal to 6, but the " - "value received is %d.", - rank)); + rank, 6, + platform::errors::InvalidArgument( + "The number of dimensions of the input 'x' for " + "Op(crop_tensor) must be less than or equal to 6, but the " + "value received is %d.", + rank)); switch (rank) { case 1: CropTensorFunction(context); diff --git a/paddle/fluid/operators/cross_entropy_op.cc b/paddle/fluid/operators/cross_entropy_op.cc index 4f5912c81ba..a880584f4cf 100644 --- a/paddle/fluid/operators/cross_entropy_op.cc +++ b/paddle/fluid/operators/cross_entropy_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/cross_entropy_op.h" + #include #include #include diff --git a/paddle/fluid/operators/cross_op.cc b/paddle/fluid/operators/cross_op.cc index 674b75625d1..977d84e1e47 100644 --- a/paddle/fluid/operators/cross_op.cc +++ b/paddle/fluid/operators/cross_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" @@ -21,8 +22,8 @@ namespace paddle { namespace operators { -using framework::Tensor; using framework::DDim; +using framework::Tensor; const int kDefaultDim = framework::DDim::kMaxRank; class CrossOp : public framework::OperatorWithKernel { diff --git a/paddle/fluid/operators/ctc_align_op.cu b/paddle/fluid/operators/ctc_align_op.cu index ba90c677570..10ec5a6bdd1 100644 --- a/paddle/fluid/operators/ctc_align_op.cu +++ b/paddle/fluid/operators/ctc_align_op.cu @@ -15,7 +15,9 @@ limitations under the License. */ #include #include #include + #include + #include "paddle/fluid/operators/ctc_align_op.h" namespace paddle { @@ -92,10 +94,10 @@ class CTCAlignOpCUDAKernel : public framework::OpKernel { auto* output_length = ctx.Output("OutputLength"); T* output_length_data = output_length->mutable_data({input_dims[0], 1}, ctx.GetPlace()); - PaddingMergeAndDelCudaKernel< - T><<<32, (input_dims[0] + 32 - 1) / 32, 0, stream>>>( - input_dims[1], tokens, input_length_data, blank, merge_repeated, - padding_value, input_dims[0], output_data, output_length_data); + PaddingMergeAndDelCudaKernel + <<<32, (input_dims[0] + 32 - 1) / 32, 0, stream>>>( + input_dims[1], tokens, input_length_data, blank, merge_repeated, + padding_value, input_dims[0], output_data, output_length_data); } else { const size_t level = 0; auto input_lod = framework::ToAbsOffset(input->lod()); diff --git a/paddle/fluid/operators/ctc_align_op.h b/paddle/fluid/operators/ctc_align_op.h index c561974b0c9..9e189a9fb63 100644 --- a/paddle/fluid/operators/ctc_align_op.h +++ b/paddle/fluid/operators/ctc_align_op.h @@ -15,7 +15,9 @@ limitations under the License. */ #pragma once #include + #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/cudnn_lstm_cache.h b/paddle/fluid/operators/cudnn_lstm_cache.h index 5451cf815ca..da8284b4f2e 100644 --- a/paddle/fluid/operators/cudnn_lstm_cache.h +++ b/paddle/fluid/operators/cudnn_lstm_cache.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/dynload/cudnn.h" diff --git a/paddle/fluid/operators/cudnn_lstm_op.cc b/paddle/fluid/operators/cudnn_lstm_op.cc index ccb0062fcc7..9ff4f796995 100644 --- a/paddle/fluid/operators/cudnn_lstm_op.cc +++ b/paddle/fluid/operators/cudnn_lstm_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/operators/cudnn_rnn_cache.h b/paddle/fluid/operators/cudnn_rnn_cache.h index 6c059257b94..e2159a09c12 100644 --- a/paddle/fluid/operators/cudnn_rnn_cache.h +++ b/paddle/fluid/operators/cudnn_rnn_cache.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" diff --git a/paddle/fluid/operators/cumsum_op.cc b/paddle/fluid/operators/cumsum_op.cc index 11633fb0b87..dbb703e7e87 100644 --- a/paddle/fluid/operators/cumsum_op.cc +++ b/paddle/fluid/operators/cumsum_op.cc @@ -86,13 +86,12 @@ REGISTER_OPERATOR(cumsum, ops::CumOp, ops::CumsumOpMaker, ops::CumsumGradMaker, CumsumInferShapeFunctor); -REGISTER_OP_VERSION(cumsum) - .AddCheckpoint( - R"ROC( +REGISTER_OP_VERSION(cumsum).AddCheckpoint( + R"ROC( Upgrade cumsum add a new attribute [flatten]. )ROC", - paddle::framework::compatible::OpVersionDesc().NewAttr( - "flatten", - "In order to compute the cumsum over the flattened array when the " - "argument `axis` in python API is None.", - false)); + paddle::framework::compatible::OpVersionDesc().NewAttr( + "flatten", + "In order to compute the cumsum over the flattened array when the " + "argument `axis` in python API is None.", + false)); diff --git a/paddle/fluid/operators/cvm_op.cc b/paddle/fluid/operators/cvm_op.cc index e909906da7b..912167cec5a 100644 --- a/paddle/fluid/operators/cvm_op.cc +++ b/paddle/fluid/operators/cvm_op.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/cvm_op.h" + #include + #include "paddle/phi/kernels/funcs/math_function.h" namespace paddle { diff --git a/paddle/fluid/operators/data_norm_op.cc b/paddle/fluid/operators/data_norm_op.cc index 137de2d5af9..8287654949e 100644 --- a/paddle/fluid/operators/data_norm_op.cc +++ b/paddle/fluid/operators/data_norm_op.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/data_norm_op.h" + #include #include + #include "paddle/fluid/framework/data_layout.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" @@ -163,10 +165,11 @@ class DataNormOp : public framework::OperatorWithKernel { OperatorWithKernel::IndicateVarDataType(ctx, "BatchSum"), platform::errors::InvalidArgument( "BatchSum input should be of float type")); - PADDLE_ENFORCE_EQ(dn_param_type, OperatorWithKernel::IndicateVarDataType( - ctx, "BatchSquareSum"), - platform::errors::InvalidArgument( - "BatchSquareSum input should be of float type")); + PADDLE_ENFORCE_EQ( + dn_param_type, + OperatorWithKernel::IndicateVarDataType(ctx, "BatchSquareSum"), + platform::errors::InvalidArgument( + "BatchSquareSum input should be of float type")); bool enable_scale_and_shift = ctx.Attr("enable_scale_and_shift"); if (enable_scale_and_shift) { @@ -277,8 +280,9 @@ class DataNormKernel const auto *x = ctx.Input("X"); const auto &x_dims = x->dims(); - PADDLE_ENFORCE_EQ(x_dims.size(), 2, platform::errors::InvalidArgument( - "The Input dim size should be 2")); + PADDLE_ENFORCE_EQ( + x_dims.size(), 2, + platform::errors::InvalidArgument("The Input dim size should be 2")); const int N = x_dims[0]; const int C = (data_layout == DataLayout::kNCHW ? x_dims[1] @@ -515,8 +519,9 @@ class DataNormGradKernel // Get the size for each dimension. // NCHW [batch_size, in_channels, in_height, in_width] const auto &x_dims = x->dims(); - PADDLE_ENFORCE_EQ(x_dims.size(), 2, platform::errors::InvalidArgument( - "The Input dim size should be 2")); + PADDLE_ENFORCE_EQ( + x_dims.size(), 2, + platform::errors::InvalidArgument("The Input dim size should be 2")); const int N = x_dims[0]; const int C = (data_layout == DataLayout::kNCHW ? x_dims[1] @@ -757,10 +762,9 @@ REGISTER_OP_CPU_KERNEL( data_norm_grad, ops::DataNormGradKernel, ops::DataNormGradKernel); -REGISTER_OP_VERSION(data_norm) - .AddCheckpoint( - R"ROC( +REGISTER_OP_VERSION(data_norm).AddCheckpoint( + R"ROC( upgrad data_norm op by adding scale_w to support scale and shift.)ROC", - paddle::framework::compatible::OpVersionDesc().NewInput( - "scale_w", - "scale_w is used to do scale duirng data_norm like batchnorm ")); + paddle::framework::compatible::OpVersionDesc().NewInput( + "scale_w", + "scale_w is used to do scale duirng data_norm like batchnorm ")); diff --git a/paddle/fluid/operators/data_norm_op.cu b/paddle/fluid/operators/data_norm_op.cu index 28a79221201..21c7d7d4bf4 100644 --- a/paddle/fluid/operators/data_norm_op.cu +++ b/paddle/fluid/operators/data_norm_op.cu @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/operators/data_norm_op.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" @@ -100,8 +101,9 @@ class DataNormKernel const auto *x = ctx.Input("X"); const auto &x_dims = x->dims(); // Align with CPU version, but should we add this restriction? - PADDLE_ENFORCE_EQ(x_dims.size(), 2, platform::errors::PreconditionNotMet( - "The Input dim size should be 2")); + PADDLE_ENFORCE_EQ( + x_dims.size(), 2, + platform::errors::PreconditionNotMet("The Input dim size should be 2")); const int N = x_dims[0]; const int C = x_dims[1]; const T *batch_size_in = ctx.Input("BatchSize")->data(); @@ -143,8 +145,9 @@ class DataNormGradKernel const auto &x_dims = x->dims(); // Align with CPU version, but should we add this restriction? - PADDLE_ENFORCE_EQ(x_dims.size(), 2, platform::errors::PreconditionNotMet( - "The Input dim size should be 2")); + PADDLE_ENFORCE_EQ( + x_dims.size(), 2, + platform::errors::PreconditionNotMet("The Input dim size should be 2")); const int N = x_dims[0]; const int C = x_dims[1]; diff --git a/paddle/fluid/operators/decode_jpeg_op.cu b/paddle/fluid/operators/decode_jpeg_op.cu index de6b35bc9cd..a257afc50f9 100644 --- a/paddle/fluid/operators/decode_jpeg_op.cu +++ b/paddle/fluid/operators/decode_jpeg_op.cu @@ -15,6 +15,7 @@ #if !defined(WITH_NV_JETSON) && !defined(PADDLE_WITH_HIP) #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/dynload/nvjpeg.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/deformable_conv_op.cc b/paddle/fluid/operators/deformable_conv_op.cc index 1b76aca1e66..b54c8a81abd 100644 --- a/paddle/fluid/operators/deformable_conv_op.cc +++ b/paddle/fluid/operators/deformable_conv_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/fluid/operators/deformable_conv_op_xpu.cc b/paddle/fluid/operators/deformable_conv_op_xpu.cc index 240e5658956..d977cfe844a 100644 --- a/paddle/fluid/operators/deformable_conv_op_xpu.cc +++ b/paddle/fluid/operators/deformable_conv_op_xpu.cc @@ -15,6 +15,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/xpu/xpu_header.h" @@ -169,28 +170,32 @@ class DeformableConvGradXPUKernel : public framework::OpKernel { const float* offset_ptr = offset.data(); const float* mask_ptr = mask.data(); if (dx_data == nullptr) { - PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast(&dx_data), - input->numel() * sizeof(T)), - XPU_SUCCESS, platform::errors::ResourceExhausted( - "XPU has no enough memory")); + PADDLE_ENFORCE_EQ( + xpu_malloc(reinterpret_cast(&dx_data), + input->numel() * sizeof(T)), + XPU_SUCCESS, + platform::errors::ResourceExhausted("XPU has no enough memory")); } if (dw_data == nullptr) { - PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast(&dw_data), - filter.numel() * sizeof(T)), - XPU_SUCCESS, platform::errors::ResourceExhausted( - "XPU has no enough memory")); + PADDLE_ENFORCE_EQ( + xpu_malloc(reinterpret_cast(&dw_data), + filter.numel() * sizeof(T)), + XPU_SUCCESS, + platform::errors::ResourceExhausted("XPU has no enough memory")); } if (doffset_data == nullptr) { - PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast(&doffset_data), - offset.numel() * sizeof(T)), - XPU_SUCCESS, platform::errors::ResourceExhausted( - "XPU has no enough memory")); + PADDLE_ENFORCE_EQ( + xpu_malloc(reinterpret_cast(&doffset_data), + offset.numel() * sizeof(T)), + XPU_SUCCESS, + platform::errors::ResourceExhausted("XPU has no enough memory")); } if (dmask_data == nullptr) { - PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast(&dmask_data), - mask.numel() * sizeof(T)), - XPU_SUCCESS, platform::errors::ResourceExhausted( - "XPU has no enough memory")); + PADDLE_ENFORCE_EQ( + xpu_malloc(reinterpret_cast(&dmask_data), + mask.numel() * sizeof(T)), + XPU_SUCCESS, + platform::errors::ResourceExhausted("XPU has no enough memory")); } int input_dim = input->numel() / input->dims()[0]; @@ -207,10 +212,11 @@ class DeformableConvGradXPUKernel : public framework::OpKernel { int f = filter.dims()[0]; T* filter_grad_tmp = nullptr; - PADDLE_ENFORCE_EQ(xpu_malloc(reinterpret_cast(&filter_grad_tmp), - filter_grad->numel() * sizeof(T)), - XPU_SUCCESS, platform::errors::ResourceExhausted( - "XPU has no enough memory")); + PADDLE_ENFORCE_EQ( + xpu_malloc(reinterpret_cast(&filter_grad_tmp), + filter_grad->numel() * sizeof(T)), + XPU_SUCCESS, + platform::errors::ResourceExhausted("XPU has no enough memory")); // set zeros for d_table_data const int zero = 0; diff --git a/paddle/fluid/operators/deformable_conv_v1_op.cc b/paddle/fluid/operators/deformable_conv_v1_op.cc index 0ec95cb54ba..2da561c8685 100644 --- a/paddle/fluid/operators/deformable_conv_v1_op.cc +++ b/paddle/fluid/operators/deformable_conv_v1_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.cc b/paddle/fluid/operators/deformable_psroi_pooling_op.cc index 7e7cdbd8d17..a989e3f9217 100644 --- a/paddle/fluid/operators/deformable_psroi_pooling_op.cc +++ b/paddle/fluid/operators/deformable_psroi_pooling_op.cc @@ -13,9 +13,11 @@ // limitations under the License. #include "paddle/fluid/operators/deformable_psroi_pooling_op.h" + #include #include #include + #include "paddle/phi/kernels/funcs/blas/blas.h" namespace paddle { @@ -165,11 +167,12 @@ class DeformablePSROIPoolOp : public framework::OperatorWithKernel { auto part_width = part_size[1]; auto sample_per_part = ctx->Attrs().Get("sample_per_part"); auto trans_std = ctx->Attrs().Get("trans_std"); - PADDLE_ENFORCE_GE(trans_std, 0., platform::errors::InvalidArgument( - "Input(trans_std) should not be lower " - "than 0.0, but received trans_std " - "is:%f", - trans_std)); + PADDLE_ENFORCE_GE(trans_std, 0., + platform::errors::InvalidArgument( + "Input(trans_std) should not be lower " + "than 0.0, but received trans_std " + "is:%f", + trans_std)); PADDLE_ENFORCE_GE( input_dims[1], output_channels, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.cu b/paddle/fluid/operators/deformable_psroi_pooling_op.cu index 873950b2d2f..174f045c160 100644 --- a/paddle/fluid/operators/deformable_psroi_pooling_op.cu +++ b/paddle/fluid/operators/deformable_psroi_pooling_op.cu @@ -23,10 +23,12 @@ #pragma once #include + #include #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/deformable_psroi_pooling_op.h" diff --git a/paddle/fluid/operators/deformable_psroi_pooling_op.h b/paddle/fluid/operators/deformable_psroi_pooling_op.h index 3deabce54ed..6ff6ab20df2 100644 --- a/paddle/fluid/operators/deformable_psroi_pooling_op.h +++ b/paddle/fluid/operators/deformable_psroi_pooling_op.h @@ -25,6 +25,7 @@ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/dequantize_op.cc b/paddle/fluid/operators/dequantize_op.cc index 876bd1199ad..2bed296efd7 100644 --- a/paddle/fluid/operators/dequantize_op.cc +++ b/paddle/fluid/operators/dequantize_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/dequantize_op.h" + #include "paddle/fluid/framework/op_version_registry.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" @@ -47,8 +48,8 @@ namespace ops = paddle::operators; REGISTER_OPERATOR(dequantize, ops::DeQuantOp, ops::DeQuantOpMaker); REGISTER_OP_VERSION(dequantize) - .AddCheckpoint( - R"ROC( Add a new attribute [Shift])ROC", - paddle::framework::compatible::OpVersionDesc().NewAttr( - "Shift", "Dequantize data to uint8 if provided non-zero value.", - 0.0f)); + .AddCheckpoint(R"ROC( Add a new attribute [Shift])ROC", + paddle::framework::compatible::OpVersionDesc().NewAttr( + "Shift", + "Dequantize data to uint8 if provided non-zero value.", + 0.0f)); diff --git a/paddle/fluid/operators/dequantize_op.h b/paddle/fluid/operators/dequantize_op.h index 75c27a06c21..ea7a08c8f36 100644 --- a/paddle/fluid/operators/dequantize_op.h +++ b/paddle/fluid/operators/dequantize_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/dequeue_op.cc b/paddle/fluid/operators/dequeue_op.cc index fb5d53dacf0..1a6286b0a32 100644 --- a/paddle/fluid/operators/dequeue_op.cc +++ b/paddle/fluid/operators/dequeue_op.cc @@ -14,6 +14,7 @@ #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" diff --git a/paddle/fluid/operators/detection/anchor_generator_op.h b/paddle/fluid/operators/detection/anchor_generator_op.h index 0bcb56d7aa8..b3d490ac0b5 100644 --- a/paddle/fluid/operators/detection/anchor_generator_op.h +++ b/paddle/fluid/operators/detection/anchor_generator_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/transform.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/detection/bbox_util.h b/paddle/fluid/operators/detection/bbox_util.h index 7bbbbe7f40e..b9b9b0b0c0d 100644 --- a/paddle/fluid/operators/detection/bbox_util.h +++ b/paddle/fluid/operators/detection/bbox_util.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" @@ -122,8 +123,9 @@ void BboxOverlaps(const framework::Tensor& r_boxes, inter_h = std::max(y_max - y_min + 1, zero); inter_area = inter_w * inter_h; overlaps_et(i, j) = - (inter_area == 0.) ? 0 : inter_area / - (r_box_area + c_box_area - inter_area); + (inter_area == 0.) + ? 0 + : inter_area / (r_box_area + c_box_area - inter_area); } } } diff --git a/paddle/fluid/operators/detection/box_clip_op.cc b/paddle/fluid/operators/detection/box_clip_op.cc index 73f0607fdde..08d688a1495 100644 --- a/paddle/fluid/operators/detection/box_clip_op.cc +++ b/paddle/fluid/operators/detection/box_clip_op.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/detection/box_clip_op.h" + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/detection/box_clip_op.cu b/paddle/fluid/operators/detection/box_clip_op.cu index 65f2a559071..672b9a5db95 100644 --- a/paddle/fluid/operators/detection/box_clip_op.cu +++ b/paddle/fluid/operators/detection/box_clip_op.cu @@ -12,6 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/detection/box_clip_op.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" diff --git a/paddle/fluid/operators/detection/box_clip_op.h b/paddle/fluid/operators/detection/box_clip_op.h index 13ba7894d60..4bcc81dbf98 100644 --- a/paddle/fluid/operators/detection/box_clip_op.h +++ b/paddle/fluid/operators/detection/box_clip_op.h @@ -11,6 +11,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/detection/bbox_util.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/detection/box_coder_op.cc b/paddle/fluid/operators/detection/box_coder_op.cc index 69d829e0021..461dcb7f39a 100644 --- a/paddle/fluid/operators/detection/box_coder_op.cc +++ b/paddle/fluid/operators/detection/box_coder_op.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/detection/box_coder_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/detection/box_coder_op.cu b/paddle/fluid/operators/detection/box_coder_op.cu index 22dc606df9d..b7dee412ee3 100644 --- a/paddle/fluid/operators/detection/box_coder_op.cu +++ b/paddle/fluid/operators/detection/box_coder_op.cu @@ -11,6 +11,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/operators/detection/box_coder_op.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" diff --git a/paddle/fluid/operators/detection/box_coder_op.h b/paddle/fluid/operators/detection/box_coder_op.h index a626f790fac..6ddfd717653 100644 --- a/paddle/fluid/operators/detection/box_coder_op.h +++ b/paddle/fluid/operators/detection/box_coder_op.h @@ -12,6 +12,7 @@ limitations under the License. */ #pragma once #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/detection/box_decoder_and_assign_op.h b/paddle/fluid/operators/detection/box_decoder_and_assign_op.h index d3565f87f33..7eed920fb3d 100644 --- a/paddle/fluid/operators/detection/box_decoder_and_assign_op.h +++ b/paddle/fluid/operators/detection/box_decoder_and_assign_op.h @@ -13,6 +13,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc index 92c9ab34aa4..b1b8c3ba2da 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License.*/ #include "paddle/fluid/operators/detection/collect_fpn_proposals_op.h" + #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu index 860fdd01794..bea6fb17488 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.cu @@ -18,6 +18,7 @@ namespace cub = hipcub; #endif #include + #include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" diff --git a/paddle/fluid/operators/detection/collect_fpn_proposals_op.h b/paddle/fluid/operators/detection/collect_fpn_proposals_op.h index e5ae9a6ccbd..973cbc6ec16 100644 --- a/paddle/fluid/operators/detection/collect_fpn_proposals_op.h +++ b/paddle/fluid/operators/detection/collect_fpn_proposals_op.h @@ -20,6 +20,7 @@ limitations under the License.*/ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/detection/density_prior_box_op.h b/paddle/fluid/operators/detection/density_prior_box_op.h index adc2723acbf..0912ce90160 100644 --- a/paddle/fluid/operators/detection/density_prior_box_op.h +++ b/paddle/fluid/operators/detection/density_prior_box_op.h @@ -12,6 +12,7 @@ limitations under the License. */ #pragma once #include #include + #include "paddle/fluid/operators/detection/prior_box_op.h" namespace paddle { diff --git a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc index 4e514e62f40..e382586ec66 100644 --- a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc +++ b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/detection/distribute_fpn_proposals_op.h" + #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu index 7ad25e003b4..5adf1469ec2 100644 --- a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu +++ b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.cu @@ -21,6 +21,7 @@ namespace cub = hipcub; #endif #include + #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/operators/detection/bbox_util.h" #include "paddle/fluid/operators/detection/distribute_fpn_proposals_op.h" diff --git a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.h b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.h index 5479e08c2a5..85db2437ee5 100644 --- a/paddle/fluid/operators/detection/distribute_fpn_proposals_op.h +++ b/paddle/fluid/operators/detection/distribute_fpn_proposals_op.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/detection/generate_mask_labels_op.cc b/paddle/fluid/operators/detection/generate_mask_labels_op.cc index c9cc4e72207..da86502f78c 100644 --- a/paddle/fluid/operators/detection/generate_mask_labels_op.cc +++ b/paddle/fluid/operators/detection/generate_mask_labels_op.cc @@ -10,9 +10,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/detection/bbox_util.h" diff --git a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc index cbf17048400..bc528060355 100644 --- a/paddle/fluid/operators/detection/generate_proposal_labels_op.cc +++ b/paddle/fluid/operators/detection/generate_proposal_labels_op.cc @@ -10,9 +10,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/operators/detection/bbox_util.h" diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cc b/paddle/fluid/operators/detection/generate_proposals_op.cc index d6130823271..a6d2d8a2a01 100644 --- a/paddle/fluid/operators/detection/generate_proposals_op.cc +++ b/paddle/fluid/operators/detection/generate_proposals_op.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/operators/detection/bbox_util.h" diff --git a/paddle/fluid/operators/detection/generate_proposals_op.cu b/paddle/fluid/operators/detection/generate_proposals_op.cu index 5fb7973fd89..20efb1fa6ca 100644 --- a/paddle/fluid/operators/detection/generate_proposals_op.cu +++ b/paddle/fluid/operators/detection/generate_proposals_op.cu @@ -14,8 +14,10 @@ limitations under the License. */ #include #include + #include #include + #include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memory.h" diff --git a/paddle/fluid/operators/detection/generate_proposals_v2_op.cc b/paddle/fluid/operators/detection/generate_proposals_v2_op.cc index 1f1802574c5..b8b6118058f 100644 --- a/paddle/fluid/operators/detection/generate_proposals_v2_op.cc +++ b/paddle/fluid/operators/detection/generate_proposals_v2_op.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/operators/detection/bbox_util.h" diff --git a/paddle/fluid/operators/detection/generate_proposals_v2_op.cu b/paddle/fluid/operators/detection/generate_proposals_v2_op.cu index 005309e8ee5..deb7f3a41df 100644 --- a/paddle/fluid/operators/detection/generate_proposals_v2_op.cu +++ b/paddle/fluid/operators/detection/generate_proposals_v2_op.cu @@ -14,8 +14,10 @@ limitations under the License. */ #include #include + #include #include + #include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memory.h" diff --git a/paddle/fluid/operators/detection/gpc.cc b/paddle/fluid/operators/detection/gpc.cc index 6b1b0cd8b35..4dea559d8e4 100644 --- a/paddle/fluid/operators/detection/gpc.cc +++ b/paddle/fluid/operators/detection/gpc.cc @@ -24,6 +24,7 @@ **/ #include "paddle/fluid/operators/detection/gpc.h" + #include "paddle/fluid/platform/enforce.h" namespace gpc { diff --git a/paddle/fluid/operators/detection/locality_aware_nms_op.cc b/paddle/fluid/operators/detection/locality_aware_nms_op.cc index 8cc0ebcab61..3f8bc867418 100644 --- a/paddle/fluid/operators/detection/locality_aware_nms_op.cc +++ b/paddle/fluid/operators/detection/locality_aware_nms_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/detection/nms_util.h" @@ -51,16 +52,17 @@ class LocalityAwareNMSOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( box_dims[2] == 4 || box_dims[2] == 8 || box_dims[2] == 16 || box_dims[2] == 24 || box_dims[2] == 32, - true, platform::errors::InvalidArgument( - "The last dimension of Input(BBoxes) must be 4 or 8, " - "represents the layout of coordinate " - "[xmin, ymin, xmax, ymax] or " - "4 points: [x1, y1, x2, y2, x3, y3, x4, y4] or " - "8 points: [xi, yi] i= 1,2,...,8 or " - "12 points: [xi, yi] i= 1,2,...,12 or " - "16 points: [xi, yi] i= 1,2,...,16. " - "But received %d.", - box_dims[2])); + true, + platform::errors::InvalidArgument( + "The last dimension of Input(BBoxes) must be 4 or 8, " + "represents the layout of coordinate " + "[xmin, ymin, xmax, ymax] or " + "4 points: [x1, y1, x2, y2, x3, y3, x4, y4] or " + "8 points: [xi, yi] i= 1,2,...,8 or " + "12 points: [xi, yi] i= 1,2,...,12 or " + "16 points: [xi, yi] i= 1,2,...,16. " + "But received %d.", + box_dims[2])); PADDLE_ENFORCE_EQ( box_dims[1], score_dims[2], platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/detection/mask_util.cc b/paddle/fluid/operators/detection/mask_util.cc index e06218cfe56..41505ee8428 100644 --- a/paddle/fluid/operators/detection/mask_util.cc +++ b/paddle/fluid/operators/detection/mask_util.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/detection/mask_util.h" + #include #include + #include "paddle/fluid/memory/memory.h" namespace paddle { diff --git a/paddle/fluid/operators/detection/mask_util.h b/paddle/fluid/operators/detection/mask_util.h index 4e0ea54f6d8..25b03a11f7d 100644 --- a/paddle/fluid/operators/detection/mask_util.h +++ b/paddle/fluid/operators/detection/mask_util.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include namespace paddle { diff --git a/paddle/fluid/operators/detection/mask_util_test.cc b/paddle/fluid/operators/detection/mask_util_test.cc index de904e94746..68f7a6db648 100644 --- a/paddle/fluid/operators/detection/mask_util_test.cc +++ b/paddle/fluid/operators/detection/mask_util_test.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/detection/mask_util.h" + #include + #include "paddle/fluid/memory/memory.h" namespace paddle { diff --git a/paddle/fluid/operators/detection/matrix_nms_op.cc b/paddle/fluid/operators/detection/matrix_nms_op.cc index 3353739b01b..5eee52dfbc7 100644 --- a/paddle/fluid/operators/detection/matrix_nms_op.cc +++ b/paddle/fluid/operators/detection/matrix_nms_op.cc @@ -405,7 +405,6 @@ REGISTER_OPERATOR( REGISTER_OP_CPU_KERNEL(matrix_nms, ops::MatrixNMSKernel, ops::MatrixNMSKernel); REGISTER_OP_VERSION(matrix_nms) - .AddCheckpoint( - R"ROC(Upgrade matrix_nms: add a new output [RoisNum].)ROC", - paddle::framework::compatible::OpVersionDesc().NewOutput( - "RoisNum", "The number of RoIs in each image.")); + .AddCheckpoint(R"ROC(Upgrade matrix_nms: add a new output [RoisNum].)ROC", + paddle::framework::compatible::OpVersionDesc().NewOutput( + "RoisNum", "The number of RoIs in each image.")); diff --git a/paddle/fluid/operators/detection/multiclass_nms_op.cc b/paddle/fluid/operators/detection/multiclass_nms_op.cc index 83cf6e5fd30..f603a501f4b 100644 --- a/paddle/fluid/operators/detection/multiclass_nms_op.cc +++ b/paddle/fluid/operators/detection/multiclass_nms_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/detection/nms_util.h" @@ -55,18 +56,19 @@ class MultiClassNMSOp : public framework::OperatorWithKernel { ". But received rank = %d", box_dims.size())); if (score_size == 3) { - PADDLE_ENFORCE_EQ( - box_dims[2] == 4 || box_dims[2] == 8 || box_dims[2] == 16 || - box_dims[2] == 24 || box_dims[2] == 32, - true, platform::errors::InvalidArgument( - "The last dimension of Input" - "(BBoxes) must be 4 or 8, " - "represents the layout of coordinate " - "[xmin, ymin, xmax, ymax] or " - "4 points: [x1, y1, x2, y2, x3, y3, x4, y4] or " - "8 points: [xi, yi] i= 1,2,...,8 or " - "12 points: [xi, yi] i= 1,2,...,12 or " - "16 points: [xi, yi] i= 1,2,...,16")); + PADDLE_ENFORCE_EQ(box_dims[2] == 4 || box_dims[2] == 8 || + box_dims[2] == 16 || box_dims[2] == 24 || + box_dims[2] == 32, + true, + platform::errors::InvalidArgument( + "The last dimension of Input" + "(BBoxes) must be 4 or 8, " + "represents the layout of coordinate " + "[xmin, ymin, xmax, ymax] or " + "4 points: [x1, y1, x2, y2, x3, y3, x4, y4] or " + "8 points: [xi, yi] i= 1,2,...,8 or " + "12 points: [xi, yi] i= 1,2,...,12 or " + "16 points: [xi, yi] i= 1,2,...,16")); PADDLE_ENFORCE_EQ( box_dims[1], score_dims[2], platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/detection/nms_op.cc b/paddle/fluid/operators/detection/nms_op.cc index f6dc44eb5fc..34a92efa68a 100644 --- a/paddle/fluid/operators/detection/nms_op.cc +++ b/paddle/fluid/operators/detection/nms_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/detection/nms_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/detection/nms_op.cu b/paddle/fluid/operators/detection/nms_op.cu index b6027e67d6c..4f62c735c26 100644 --- a/paddle/fluid/operators/detection/nms_op.cu +++ b/paddle/fluid/operators/detection/nms_op.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/operators/detection/nms_op.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" diff --git a/paddle/fluid/operators/detection/nms_util.h b/paddle/fluid/operators/detection/nms_util.h index 0e448d42fc2..7a6565ac760 100644 --- a/paddle/fluid/operators/detection/nms_util.h +++ b/paddle/fluid/operators/detection/nms_util.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/operators/detection/poly_util.h" namespace paddle { diff --git a/paddle/fluid/operators/detection/poly_util.cc b/paddle/fluid/operators/detection/poly_util.cc index 1af2c95c6cf..6aa81bf1b39 100644 --- a/paddle/fluid/operators/detection/poly_util.cc +++ b/paddle/fluid/operators/detection/poly_util.cc @@ -16,13 +16,14 @@ limitations under the License. */ #define POLY_UTIL_CC_ #include "paddle/fluid/operators/detection/poly_util.h" + #include "paddle/fluid/framework/op_registry.h" namespace paddle { namespace operators { -using gpc::gpc_polygon_clip; using gpc::gpc_free_polygon; +using gpc::gpc_polygon_clip; template void Array2PointVec(const T*& box, const size_t box_size, diff --git a/paddle/fluid/operators/detection/poly_util.h b/paddle/fluid/operators/detection/poly_util.h index f07baf72d9f..cc37f00008d 100644 --- a/paddle/fluid/operators/detection/poly_util.h +++ b/paddle/fluid/operators/detection/poly_util.h @@ -16,6 +16,7 @@ limitations under the License. */ #define POLY_UTIL_H_ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/detection/gpc.h" diff --git a/paddle/fluid/operators/detection/prior_box_op.h b/paddle/fluid/operators/detection/prior_box_op.h index 4000994beb5..889bc8354bc 100644 --- a/paddle/fluid/operators/detection/prior_box_op.h +++ b/paddle/fluid/operators/detection/prior_box_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/transform.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/detection/retinanet_detection_output_op.cc b/paddle/fluid/operators/detection/retinanet_detection_output_op.cc index bc46ec0b656..4e49a6ed852 100644 --- a/paddle/fluid/operators/detection/retinanet_detection_output_op.cc +++ b/paddle/fluid/operators/detection/retinanet_detection_output_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { @@ -66,23 +67,26 @@ class RetinanetDetectionOutputOp : public framework::OperatorWithKernel { auto im_info_dims = ctx->GetInputDim("ImInfo"); const size_t b_n = bboxes_dims.size(); - PADDLE_ENFORCE_GT(b_n, 0, platform::errors::InvalidArgument( - "The number of Variables in Input(BBoxes) " - "should be greater than 0, " - "but received number is:%d.", - b_n)); + PADDLE_ENFORCE_GT(b_n, 0, + platform::errors::InvalidArgument( + "The number of Variables in Input(BBoxes) " + "should be greater than 0, " + "but received number is:%d.", + b_n)); const size_t s_n = scores_dims.size(); - PADDLE_ENFORCE_GT(s_n, 0, platform::errors::InvalidArgument( - "The number of Variables in Input(Scores) " - "should be greater than 0, " - "but received number is:%d.", - s_n)); + PADDLE_ENFORCE_GT(s_n, 0, + platform::errors::InvalidArgument( + "The number of Variables in Input(Scores) " + "should be greater than 0, " + "but received number is:%d.", + s_n)); const size_t a_n = anchors_dims.size(); - PADDLE_ENFORCE_GT(a_n, 0, platform::errors::InvalidArgument( - "The number of Variables in Input(Anchors) " - "should be greater than 0, " - "but received number is:%d.", - a_n)); + PADDLE_ENFORCE_GT(a_n, 0, + platform::errors::InvalidArgument( + "The number of Variables in Input(Anchors) " + "should be greater than 0, " + "but received number is:%d.", + a_n)); auto bbox_dims = bboxes_dims[0]; auto score_dims = scores_dims[0]; auto anchor_dims = anchors_dims[0]; diff --git a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc index 353d17a6e09..eb6d6c6db92 100644 --- a/paddle/fluid/operators/detection/roi_perspective_transform_op.cc +++ b/paddle/fluid/operators/detection/roi_perspective_transform_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -40,8 +41,8 @@ bool GT(T a, T b) { } /* -*check if (x, y) is in the boundary of roi -*/ + *check if (x, y) is in the boundary of roi + */ template bool in_quad(T x, T y, T roi_x[], T roi_y[]) { for (int i = 0; i < 4; i++) { @@ -431,10 +432,9 @@ class CPUROIPerspectiveTransformGradOpKernel : public framework::OpKernel { T matrix[9]; get_transform_matrix(transformed_width, transformed_height, roi_x, roi_y, matrix); - const T* out_grad_ptr = out_grad_data + - (roi_idx * channels + c) * - transformed_height * - transformed_width; + const T* out_grad_ptr = out_grad_data + (roi_idx * channels + c) * + transformed_height * + transformed_width; for (int out_h = 0; out_h < transformed_height; ++out_h) { for (int out_w = 0; out_w < transformed_width; ++out_w) { T src_w; diff --git a/paddle/fluid/operators/detection/roi_perspective_transform_op.cu b/paddle/fluid/operators/detection/roi_perspective_transform_op.cu index 515a4bbac59..1bff79606d4 100644 --- a/paddle/fluid/operators/detection/roi_perspective_transform_op.cu +++ b/paddle/fluid/operators/detection/roi_perspective_transform_op.cu @@ -13,13 +13,14 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/float16.h" #include "paddle/phi/kernels/funcs/math_function.h" -using paddle::platform::PADDLE_CUDA_NUM_THREADS; using paddle::platform::float16; +using paddle::platform::PADDLE_CUDA_NUM_THREADS; namespace paddle { namespace operators { @@ -56,8 +57,8 @@ __device__ T min(T a, T b) { } /* -* check if (x, y) is in the boundary of roi -*/ + * check if (x, y) is in the boundary of roi + */ template __device__ bool in_quad(T x, T y, T roi_x[], T roi_y[]) { for (int i = 0; i < 4; i++) { diff --git a/paddle/fluid/operators/detection/rpn_target_assign_op.cc b/paddle/fluid/operators/detection/rpn_target_assign_op.cc index e96c0bbc272..b636decdfbf 100644 --- a/paddle/fluid/operators/detection/rpn_target_assign_op.cc +++ b/paddle/fluid/operators/detection/rpn_target_assign_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/detection/bbox_util.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cc b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cc index 8526f1762cd..31f3dab81fe 100644 --- a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cc +++ b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/detection/sigmoid_focal_loss_op.h" + #include #include #include diff --git a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.h b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.h index 51829595863..fcb7ec1fbfe 100644 --- a/paddle/fluid/operators/detection/sigmoid_focal_loss_op.h +++ b/paddle/fluid/operators/detection/sigmoid_focal_loss_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/detection/yolo_box_op.cc b/paddle/fluid/operators/detection/yolo_box_op.cc index 35e38909017..ae7dfe0dd66 100644 --- a/paddle/fluid/operators/detection/yolo_box_op.cc +++ b/paddle/fluid/operators/detection/yolo_box_op.cc @@ -36,10 +36,11 @@ class YoloBoxOp : public framework::OperatorWithKernel { auto iou_aware = ctx->Attrs().Get("iou_aware"); auto iou_aware_factor = ctx->Attrs().Get("iou_aware_factor"); - PADDLE_ENFORCE_EQ(dim_x.size(), 4, platform::errors::InvalidArgument( - "Input(X) should be a 4-D tensor." - "But received X dimension(%s)", - dim_x.size())); + PADDLE_ENFORCE_EQ( + dim_x.size(), 4, + platform::errors::InvalidArgument("Input(X) should be a 4-D tensor." + "But received X dimension(%s)", + dim_x.size())); if (iou_aware) { PADDLE_ENFORCE_EQ( dim_x[1], anchor_num * (6 + class_num), @@ -245,11 +246,10 @@ REGISTER_OPERATOR( paddle::framework::EmptyGradOpMaker, YoloBoxInferShapeFunctor); -REGISTER_OP_VERSION(yolo_box) - .AddCheckpoint( - R"ROC( +REGISTER_OP_VERSION(yolo_box).AddCheckpoint( + R"ROC( Upgrade yolo box to add new attribute [iou_aware, iou_aware_factor]. )ROC", - paddle::framework::compatible::OpVersionDesc() - .NewAttr("iou_aware", "Whether use iou aware", false) - .NewAttr("iou_aware_factor", "iou aware factor", 0.5f)); + paddle::framework::compatible::OpVersionDesc() + .NewAttr("iou_aware", "Whether use iou aware", false) + .NewAttr("iou_aware_factor", "iou aware factor", 0.5f)); diff --git a/paddle/fluid/operators/detection/yolov3_loss_op.cc b/paddle/fluid/operators/detection/yolov3_loss_op.cc index 21044734ca8..2170fd0639f 100644 --- a/paddle/fluid/operators/detection/yolov3_loss_op.cc +++ b/paddle/fluid/operators/detection/yolov3_loss_op.cc @@ -10,6 +10,7 @@ limitations under the License. */ #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/imperative/type_defs.h" diff --git a/paddle/fluid/operators/detection_map_op.cc b/paddle/fluid/operators/detection_map_op.cc index 588967f0832..aa4695cc975 100644 --- a/paddle/fluid/operators/detection_map_op.cc +++ b/paddle/fluid/operators/detection_map_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/detection_map_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/detection_map_op.h b/paddle/fluid/operators/detection_map_op.h index 4dd41837f06..a034572a0c4 100644 --- a/paddle/fluid/operators/detection_map_op.h +++ b/paddle/fluid/operators/detection_map_op.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/determinant_op.cc b/paddle/fluid/operators/determinant_op.cc index 6959b5cf811..ec5a51bbffa 100644 --- a/paddle/fluid/operators/determinant_op.cc +++ b/paddle/fluid/operators/determinant_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/determinant_op.h" + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/backward.h" diff --git a/paddle/fluid/operators/determinant_op.h b/paddle/fluid/operators/determinant_op.h index 702ff3bfd87..d4c05b631e3 100644 --- a/paddle/fluid/operators/determinant_op.h +++ b/paddle/fluid/operators/determinant_op.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/for_range.h" diff --git a/paddle/fluid/operators/dgc_clip_by_norm_op.cc b/paddle/fluid/operators/dgc_clip_by_norm_op.cc index 85a29271b13..f60380f0475 100644 --- a/paddle/fluid/operators/dgc_clip_by_norm_op.cc +++ b/paddle/fluid/operators/dgc_clip_by_norm_op.cc @@ -10,10 +10,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include - #include "paddle/fluid/operators/dgc_clip_by_norm_op.h" +#include + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/dgc_op.cc b/paddle/fluid/operators/dgc_op.cc index 5fe66fa38a8..95d3f75de9a 100644 --- a/paddle/fluid/operators/dgc_op.cc +++ b/paddle/fluid/operators/dgc_op.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/dgc_op.h" + #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/dgc_op.h b/paddle/fluid/operators/dgc_op.h index b1bf5e27781..91093f67e05 100644 --- a/paddle/fluid/operators/dgc_op.h +++ b/paddle/fluid/operators/dgc_op.h @@ -14,8 +14,8 @@ limitations under the License. */ #pragma once #include -#include "dgc/dgc.h" +#include "dgc/dgc.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" @@ -118,10 +118,12 @@ class DGCOpKernel : public framework::OpKernel { 1 - get_period_sparcity( sparsity, static_cast(*current_step - rampup_begin_step), rampup_step); - PADDLE_ENFORCE_GE(ratio, 0.0, platform::errors::InvalidArgument( - "DGC sparsity ratio must >= 0")); - PADDLE_ENFORCE_LT(ratio, 1.0, platform::errors::InvalidArgument( - "DGC sparsity ratio must < 1")); + PADDLE_ENFORCE_GE( + ratio, 0.0, + platform::errors::InvalidArgument("DGC sparsity ratio must >= 0")); + PADDLE_ENFORCE_LT( + ratio, 1.0, + platform::errors::InvalidArgument("DGC sparsity ratio must < 1")); int k = static_cast(g->numel() * ratio); VLOG(10) << "m:" << m << ", use_nesterov:" << use_nesterov diff --git a/paddle/fluid/operators/diag_embed_op.cu b/paddle/fluid/operators/diag_embed_op.cu index 7e3ab6be664..a9d92fdf634 100644 --- a/paddle/fluid/operators/diag_embed_op.cu +++ b/paddle/fluid/operators/diag_embed_op.cu @@ -14,6 +14,7 @@ #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/diag_embed_op.h" diff --git a/paddle/fluid/operators/diag_embed_op.h b/paddle/fluid/operators/diag_embed_op.h index a5621be3baa..b07047996d5 100644 --- a/paddle/fluid/operators/diag_embed_op.h +++ b/paddle/fluid/operators/diag_embed_op.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/platform/for_range.h" diff --git a/paddle/fluid/operators/dirichlet_op.h b/paddle/fluid/operators/dirichlet_op.h index 540acad423a..658688816eb 100644 --- a/paddle/fluid/operators/dirichlet_op.h +++ b/paddle/fluid/operators/dirichlet_op.h @@ -15,6 +15,7 @@ #pragma once #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/for_range.h" diff --git a/paddle/fluid/operators/dist_op.cc b/paddle/fluid/operators/dist_op.cc index 55b24849412..6f897bff75c 100644 --- a/paddle/fluid/operators/dist_op.cc +++ b/paddle/fluid/operators/dist_op.cc @@ -14,6 +14,7 @@ #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/fluid/operators/dlnne/dlnne_engine_op.h b/paddle/fluid/operators/dlnne/dlnne_engine_op.h index 6b2622366fe..857f295326b 100644 --- a/paddle/fluid/operators/dlnne/dlnne_engine_op.h +++ b/paddle/fluid/operators/dlnne/dlnne_engine_op.h @@ -13,11 +13,11 @@ // limitations under the License. #pragma once +#include #include // NOTLINT #include // NOTLINT #include // NOTLINT -#include #include #include #include @@ -128,11 +128,13 @@ class DlnneEngineOp : public framework::OperatorBase { << ".onnx"; builder = dl::nne::CreateInferBuilder(); - PADDLE_ENFORCE_NE(builder, nullptr, platform::errors::Unavailable( - "nne create builder failed")); + PADDLE_ENFORCE_NE( + builder, nullptr, + platform::errors::Unavailable("nne create builder failed")); parser = dl::nne::CreateParser(); - PADDLE_ENFORCE_NE(parser, nullptr, platform::errors::Unavailable( - "nne create parser failed")); + PADDLE_ENFORCE_NE( + parser, nullptr, + platform::errors::Unavailable("nne create parser failed")); network = builder->CreateNetwork(); diff --git a/paddle/fluid/operators/dlnne/dlnne_engine_op_test.cc b/paddle/fluid/operators/dlnne/dlnne_engine_op_test.cc index 611366f6c5b..8e1d7fe5d81 100644 --- a/paddle/fluid/operators/dlnne/dlnne_engine_op_test.cc +++ b/paddle/fluid/operators/dlnne/dlnne_engine_op_test.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/operators/dlnne/dlnne_engine_op.h" + #include + #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_desc.h" diff --git a/paddle/fluid/operators/dropout_impl.cu.h b/paddle/fluid/operators/dropout_impl.cu.h index 482f88b73e6..c40f6c0bbae 100644 --- a/paddle/fluid/operators/dropout_impl.cu.h +++ b/paddle/fluid/operators/dropout_impl.cu.h @@ -19,11 +19,13 @@ limitations under the License. */ #ifdef PADDLE_WITH_CUDA #include #include + #include "paddle/fluid/platform/dynload/curand.h" #endif #ifdef PADDLE_WITH_HIP #include #include + #include "paddle/fluid/platform/dynload/hiprand.h" #endif diff --git a/paddle/fluid/operators/dropout_op.cc b/paddle/fluid/operators/dropout_op.cc index 8d033ea3194..9426efa4942 100644 --- a/paddle/fluid/operators/dropout_op.cc +++ b/paddle/fluid/operators/dropout_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/infermeta/binary.h" diff --git a/paddle/fluid/operators/dropout_op_xpu.cc b/paddle/fluid/operators/dropout_op_xpu.cc index 851f26ee0e7..24de99d6d8f 100644 --- a/paddle/fluid/operators/dropout_op_xpu.cc +++ b/paddle/fluid/operators/dropout_op_xpu.cc @@ -11,6 +11,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/device_wrapper.h" namespace paddle { diff --git a/paddle/fluid/operators/edit_distance_op.cc b/paddle/fluid/operators/edit_distance_op.cc index db8a107290e..8127895569f 100644 --- a/paddle/fluid/operators/edit_distance_op.cc +++ b/paddle/fluid/operators/edit_distance_op.cc @@ -37,12 +37,13 @@ class EditDistanceOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( hyp_dims.size() == 2 && ref_dims.size() == 2 && hyp_dims[0] == ref_dims[0], - true, platform::errors::InvalidArgument( - "Input(Hyps) and Input(Refs) must be 2-D Tensors with " - "identical first dimension. But received Input(Hyps): " - "input rank %u, input shape [%s]; received Input(Refs): " - "input rank %u, input shape [%s]", - hyp_dims.size(), hyp_dims, ref_dims.size(), ref_dims)); + true, + platform::errors::InvalidArgument( + "Input(Hyps) and Input(Refs) must be 2-D Tensors with " + "identical first dimension. But received Input(Hyps): " + "input rank %u, input shape [%s]; received Input(Refs): " + "input rank %u, input shape [%s]", + hyp_dims.size(), hyp_dims, ref_dims.size(), ref_dims)); PADDLE_ENFORCE_EQ( hyp_length_dims[0] == ref_length_dims[0] && hyp_length_dims[0] == hyp_dims[0], diff --git a/paddle/fluid/operators/edit_distance_op.cu b/paddle/fluid/operators/edit_distance_op.cu index 49ac7183ff3..eb208c559ce 100644 --- a/paddle/fluid/operators/edit_distance_op.cu +++ b/paddle/fluid/operators/edit_distance_op.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/edit_distance_op.h" diff --git a/paddle/fluid/operators/edit_distance_op.h b/paddle/fluid/operators/edit_distance_op.h index ef290c2eff2..101e3a90b80 100644 --- a/paddle/fluid/operators/edit_distance_op.h +++ b/paddle/fluid/operators/edit_distance_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/eig_op.cc b/paddle/fluid/operators/eig_op.cc index 6f1737dba81..5239248d82f 100644 --- a/paddle/fluid/operators/eig_op.cc +++ b/paddle/fluid/operators/eig_op.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/operators/eig_op.h" + #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { @@ -32,10 +34,11 @@ class EigOp : public framework::OperatorWithKernel { auto x_dims = ctx->GetInputDim("X"); int rank = x_dims.size(); - PADDLE_ENFORCE_GE(rank, 2, platform::errors::InvalidArgument( - "Expects input tensor x to be not less than " - "2 dimentions, but got dimention %d", - rank)); + PADDLE_ENFORCE_GE(rank, 2, + platform::errors::InvalidArgument( + "Expects input tensor x to be not less than " + "2 dimentions, but got dimention %d", + rank)); PADDLE_ENFORCE_EQ(x_dims[rank - 2], x_dims[rank - 1], platform::errors::InvalidArgument( "The input matrix must be a square matrix, " diff --git a/paddle/fluid/operators/eig_op.h b/paddle/fluid/operators/eig_op.h index fe898a6c41c..0f9afae8267 100644 --- a/paddle/fluid/operators/eig_op.h +++ b/paddle/fluid/operators/eig_op.h @@ -15,8 +15,10 @@ #pragma once #include + #include #include + #include "paddle/fluid/operators/math/matrix_solve.h" #include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/platform/for_range.h" diff --git a/paddle/fluid/operators/eigvals_op.cc b/paddle/fluid/operators/eigvals_op.cc index 2ef591dd26a..177dc684662 100644 --- a/paddle/fluid/operators/eigvals_op.cc +++ b/paddle/fluid/operators/eigvals_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/eigvals_op.h" + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/eigvals_op.h b/paddle/fluid/operators/eigvals_op.h index 4627acc0d07..d75b33e0857 100644 --- a/paddle/fluid/operators/eigvals_op.h +++ b/paddle/fluid/operators/eigvals_op.h @@ -16,6 +16,7 @@ #include #include + #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/allocation/allocator.h" @@ -71,14 +72,16 @@ static void SpiltBatchSquareMatrix(const Tensor& input, } static void CheckLapackEigResult(const int info, const std::string& name) { - PADDLE_ENFORCE_LE(info, 0, platform::errors::PreconditionNotMet( - "The QR algorithm failed to compute all the " - "eigenvalues in function %s.", - name.c_str())); + PADDLE_ENFORCE_LE(info, 0, + platform::errors::PreconditionNotMet( + "The QR algorithm failed to compute all the " + "eigenvalues in function %s.", + name.c_str())); PADDLE_ENFORCE_GE( - info, 0, platform::errors::InvalidArgument( - "The %d-th argument has an illegal value in function %s.", - -info, name.c_str())); + info, 0, + platform::errors::InvalidArgument( + "The %d-th argument has an illegal value in function %s.", -info, + name.c_str())); } template diff --git a/paddle/fluid/operators/einsum_op.cc b/paddle/fluid/operators/einsum_op.cc index 6da0045443c..7fc19d6913f 100644 --- a/paddle/fluid/operators/einsum_op.cc +++ b/paddle/fluid/operators/einsum_op.cc @@ -14,6 +14,7 @@ #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc b/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc index 22a5de4c609..9c1a84ba8b6 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_add_op_xpu.cc @@ -15,8 +15,8 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include #include -#include "paddle/fluid/operators/elementwise/elementwise_op.h" +#include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/operators/elementwise/elementwise_xpu.h" #include "paddle/fluid/platform/device/device_wrapper.h" diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.cc b/paddle/fluid/operators/elementwise/elementwise_div_op.cc index 13fd9b81a87..e0523a26ee3 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_div_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_div_op.h" + #include #include diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.h b/paddle/fluid/operators/elementwise/elementwise_div_op.h index e9adb9abdb5..b3363862d5f 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_div_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/operators/elementwise/elementwise_mul_op.h" namespace paddle { diff --git a/paddle/fluid/operators/elementwise/elementwise_heaviside_op.cc b/paddle/fluid/operators/elementwise/elementwise_heaviside_op.cc index e003a43b5c5..ebdebb2f485 100644 --- a/paddle/fluid/operators/elementwise/elementwise_heaviside_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_heaviside_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "paddle/fluid/operators/elementwise/elementwise_op.h" namespace paddle { diff --git a/paddle/fluid/operators/elementwise/elementwise_mlu.h b/paddle/fluid/operators/elementwise/elementwise_mlu.h index ff1e12103be..8c230c5f47b 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mlu.h +++ b/paddle/fluid/operators/elementwise/elementwise_mlu.h @@ -16,6 +16,7 @@ #ifdef PADDLE_WITH_MLU #include + #include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/operators/mlu/mlu_baseop.h" diff --git a/paddle/fluid/operators/elementwise/elementwise_mod_op_xpu.cc b/paddle/fluid/operators/elementwise/elementwise_mod_op_xpu.cc index 156589384c0..19d28301ffb 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mod_op_xpu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_mod_op_xpu.cc @@ -15,11 +15,11 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include #include + #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/operators/common_infer_shape_functions.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" - #include "paddle/fluid/operators/elementwise/elementwise_xpu.h" #include "paddle/fluid/platform/device/device_wrapper.h" diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.cc b/paddle/fluid/operators/elementwise/elementwise_mul_op.cc index 45b6f7cb391..253014a7981 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_mul_op.h" + #include #include + #include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/platform/complex.h" diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.h b/paddle/fluid/operators/elementwise/elementwise_mul_op.h index e2dd0e36d40..39045bf0d59 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.h @@ -15,9 +15,9 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/platform/cpu_info.h" - #include "paddle/phi/kernels/elementwise_kernel.h" namespace paddle { diff --git a/paddle/fluid/operators/elementwise/elementwise_op_function.h b/paddle/fluid/operators/elementwise/elementwise_op_function.h index 80b07721f0b..476b891bb41 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_function.h +++ b/paddle/fluid/operators/elementwise/elementwise_op_function.h @@ -28,7 +28,6 @@ limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_functor.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/transform.h" - #include "paddle/phi/api/lib/utils/tensor_utils.h" #include "paddle/phi/kernels/cpu/elementwise.h" #include "paddle/phi/kernels/cpu/elementwise_grad.h" @@ -60,14 +59,14 @@ namespace paddle { namespace operators { /* -* Pack input and output tensors into respective vectors with -* consideration of varible X`s class type. -* Input variable X is supported to be whether LoDTensor or -* SelectedRows class type in this package function, once X -* was SelectedRows type, a valid pointer x_for_selectedrows -* is excepted to be passed in from op kernel for acquisition -* of the valid address of LoDTensor created ahead in the function. -*/ + * Pack input and output tensors into respective vectors with + * consideration of varible X`s class type. + * Input variable X is supported to be whether LoDTensor or + * SelectedRows class type in this package function, once X + * was SelectedRows type, a valid pointer x_for_selectedrows + * is excepted to be passed in from op kernel for acquisition + * of the valid address of LoDTensor created ahead in the function. + */ template int PackTensorsIntoVector(const framework::ExecutionContext &ctx, std::vector *ins, @@ -327,10 +326,11 @@ static void FusedElemwiseAndActBroadcast1CUDA(gpuStream_t stream, const T *x, T *intermediate_out) { int block_size = std::min(ELEMWISE_MAX_BLOCK_DIM, w); int gird_size = h; - FusedElemwiseAndActBroadcast1CUDAKernel< - T, CompoundFunctor, BcastY, KeepIntermediateOut, - SameShapeOfIntermediateOutAndOut><<>>( - x, y, h, w, compound_functor, out, intermediate_out); + FusedElemwiseAndActBroadcast1CUDAKernel + <<>>(x, y, h, w, compound_functor, out, + intermediate_out); } template <<>>( - x, y, compound_functor, pre, n, post, out, intermediate_out); + FusedElemwiseAndActBroadcast2CUDAKernel + <<>>(x, y, compound_functor, pre, n, + post, out, intermediate_out); } #endif @@ -544,8 +545,9 @@ void FusedElemwiseAndActGradComputeNoBroadcast( out->data(), dout->data(), dx_op, dy_op, dintermediate_op, dx == nullptr ? nullptr : dx->mutable_data(ctx.GetPlace()), dy == nullptr ? nullptr : dy->mutable_data(ctx.GetPlace()), - dintermediate == nullptr ? nullptr : dintermediate->mutable_data( - ctx.GetPlace())}); + dintermediate == nullptr + ? nullptr + : dintermediate->mutable_data(ctx.GetPlace())}); } template <<>>( - x, y, intermediate_out, out, dout, h, w, dx_op, dy_op, dintermediate_op, - dx, dy, d_intermediate); + FusedElemwiseAndActGradBroadcast1CUDAKernel + <<>>(x, y, intermediate_out, out, dout, h, w, + dx_op, dy_op, dintermediate_op, dx, dy, + d_intermediate); } template <<>>( - x, y, intermediate_out, out, dout, pre, n, post, dx_op, dy_op, - dintermediate_op, dx, dy, dintermediate); + FusedElemwiseAndActGradBroadcast2CUDAKernel + <<>>( + x, y, intermediate_out, out, dout, pre, n, post, dx_op, dy_op, + dintermediate_op, dx, dy, dintermediate); } #endif @@ -995,8 +996,9 @@ void FusedElemwiseAndActGradComputeWithBroadcast( out->data(), dout->data(), h, w, dx_op, dy_op, dintermediate_op, dx == nullptr ? nullptr : dx->mutable_data(ctx.GetPlace()), dy == nullptr ? nullptr : dy->mutable_data(ctx.GetPlace()), - dintermediate == nullptr ? nullptr : dintermediate->mutable_data( - ctx.GetPlace())); + dintermediate == nullptr + ? nullptr + : dintermediate->mutable_data(ctx.GetPlace())); #endif } else { FusedElemwiseAndActGradBroadcast1CPUdata(), dout->data(), h, w, dx_op, dy_op, dintermediate_op, dx == nullptr ? nullptr : dx->mutable_data(ctx.GetPlace()), dy == nullptr ? nullptr : dy->mutable_data(ctx.GetPlace()), - dintermediate == nullptr ? nullptr : dintermediate->mutable_data( - ctx.GetPlace())); + dintermediate == nullptr + ? nullptr + : dintermediate->mutable_data(ctx.GetPlace())); } } else { if (platform::is_gpu_place(ctx.GetPlace())) { @@ -1022,8 +1025,9 @@ void FusedElemwiseAndActGradComputeWithBroadcast( dintermediate_op, dx == nullptr ? nullptr : dx->mutable_data(ctx.GetPlace()), dy == nullptr ? nullptr : dy->mutable_data(ctx.GetPlace()), - dintermediate == nullptr ? nullptr : dintermediate->mutable_data( - ctx.GetPlace())); + dintermediate == nullptr + ? nullptr + : dintermediate->mutable_data(ctx.GetPlace())); #endif } else { FusedElemwiseAndActGradBroadcast2CPUmutable_data(ctx.GetPlace()), dy == nullptr ? nullptr : dy->mutable_data(ctx.GetPlace()), - dintermediate == nullptr ? nullptr : dintermediate->mutable_data( - ctx.GetPlace())); + dintermediate == nullptr + ? nullptr + : dintermediate->mutable_data(ctx.GetPlace())); } } } diff --git a/paddle/fluid/operators/elementwise/elementwise_xpu.h b/paddle/fluid/operators/elementwise/elementwise_xpu.h index db5c94b9d1a..3f38450581e 100644 --- a/paddle/fluid/operators/elementwise/elementwise_xpu.h +++ b/paddle/fluid/operators/elementwise/elementwise_xpu.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/place.h" #include "xpu/refactor/math.h" @@ -32,8 +33,9 @@ void XPUElementwise( const std::vector&, const std::vector&)> func) { auto x_var = ctx.InputVar("X"); - PADDLE_ENFORCE_NE(x_var, nullptr, platform::errors::InvalidArgument( - "Cannot get input Variable X")); + PADDLE_ENFORCE_NE( + x_var, nullptr, + platform::errors::InvalidArgument("Cannot get input Variable X")); PADDLE_ENFORCE_EQ( x_var->IsType(), true, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/elementwise/test_elementwise_div_grad_grad.cc b/paddle/fluid/operators/elementwise/test_elementwise_div_grad_grad.cc index 3cecc52a3c4..f647bd91d5f 100644 --- a/paddle/fluid/operators/elementwise/test_elementwise_div_grad_grad.cc +++ b/paddle/fluid/operators/elementwise/test_elementwise_div_grad_grad.cc @@ -18,6 +18,7 @@ #include #include #include + #include "gtest/gtest.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" @@ -72,11 +73,12 @@ class TestElementwiseDivGradGradWithoutDout std::unique_ptr CreateTestOp() override { auto op = framework::OpRegistry::CreateOp( - this->op_type_, {{"Y", {"Y"}}, - {"Out", {"Out"}}, - {"DDX", {"DDX"}}, - {"DDY", {"DDY"}}, - {"DX", {"DX"}}}, + this->op_type_, + {{"Y", {"Y"}}, + {"Out", {"Out"}}, + {"DDX", {"DDX"}}, + {"DDY", {"DDY"}}, + {"DX", {"DX"}}}, {{"Y@GRAD", {"Y@GRAD"}}, {"DDOut", {"DDOut"}}}, {{"use_mkldnn", false}, {"axis", 0}}); return op; diff --git a/paddle/fluid/operators/elementwise/test_elementwise_op_grad_grad.h b/paddle/fluid/operators/elementwise/test_elementwise_op_grad_grad.h index 05f87e5465a..7defe4e5793 100644 --- a/paddle/fluid/operators/elementwise/test_elementwise_op_grad_grad.h +++ b/paddle/fluid/operators/elementwise/test_elementwise_op_grad_grad.h @@ -21,6 +21,7 @@ #include #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" diff --git a/paddle/fluid/operators/empty_op.cc b/paddle/fluid/operators/empty_op.cc index 9e0e4e7fe1c..0f6c308b211 100644 --- a/paddle/fluid/operators/empty_op.cc +++ b/paddle/fluid/operators/empty_op.cc @@ -12,9 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/op_registry.h" - #include "paddle/fluid/framework/infershape_utils.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/infermeta/nullary.h" namespace paddle { diff --git a/paddle/fluid/operators/expand_as_op.cc b/paddle/fluid/operators/expand_as_op.cc index 093c4d8f793..cace8b5fdff 100644 --- a/paddle/fluid/operators/expand_as_op.cc +++ b/paddle/fluid/operators/expand_as_op.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/expand_as_op.h" + #include #include diff --git a/paddle/fluid/operators/expand_as_v2_op.cc b/paddle/fluid/operators/expand_as_v2_op.cc old mode 100755 new mode 100644 index 9361edd43bf..8cdab4c5e1a --- a/paddle/fluid/operators/expand_as_v2_op.cc +++ b/paddle/fluid/operators/expand_as_v2_op.cc @@ -10,8 +10,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/expand_as_v2_op.h" + #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/phi/infermeta/binary.h" @@ -107,7 +109,6 @@ REGISTER_OPERATOR(expand_as_v2_grad, ops::ExpandAsV2GradOp, ops::ExpandAsV2GradNoNeedBufVarsInferer); REGISTER_OP_VERSION(expand_as_v2) - .AddCheckpoint( - R"ROC(fix expand_as_v2 and add new input [Y])ROC", - paddle::framework::compatible::OpVersionDesc().NewInput( - "Y", "Expand X according to the shape of Y")); + .AddCheckpoint(R"ROC(fix expand_as_v2 and add new input [Y])ROC", + paddle::framework::compatible::OpVersionDesc().NewInput( + "Y", "Expand X according to the shape of Y")); diff --git a/paddle/fluid/operators/expand_as_v2_op_npu.cc b/paddle/fluid/operators/expand_as_v2_op_npu.cc index 67d95e12400..28fd922d77b 100644 --- a/paddle/fluid/operators/expand_as_v2_op_npu.cc +++ b/paddle/fluid/operators/expand_as_v2_op_npu.cc @@ -30,10 +30,11 @@ class ExpandAsV2NPUKernel : public framework::OpKernel { "expand_as_v2 op must be greater than or equal to " "the rank (%d) of the input 'x'.", target_rank, rank)); - PADDLE_ENFORCE_GE(rank, 1, platform::errors::InvalidArgument( - "The rank (%d) of the input 'x' for " - "expand_as_v2 op must be positive.", - rank)); + PADDLE_ENFORCE_GE( + rank, 1, + platform::errors::InvalidArgument("The rank (%d) of the input 'x' for " + "expand_as_v2 op must be positive.", + rank)); PADDLE_ENFORCE_LE(target_rank, MAX_RANK_SUPPORTED, platform::errors::InvalidArgument( "The rank (%d) of the input 'target_tensor' for " diff --git a/paddle/fluid/operators/expand_as_v2_op_xpu.cc b/paddle/fluid/operators/expand_as_v2_op_xpu.cc index 0912b280aa6..fc3d77f3cc8 100644 --- a/paddle/fluid/operators/expand_as_v2_op_xpu.cc +++ b/paddle/fluid/operators/expand_as_v2_op_xpu.cc @@ -33,10 +33,11 @@ class ExpandAsV2XPUKernel : public framework::OpKernel { "expand_as_v2 op must be greater than or equal to " "the rank (%d) of the input 'x'.", target_rank, rank)); - PADDLE_ENFORCE_GE(rank, 1, platform::errors::InvalidArgument( - "The rank (%d) of the input 'x' for " - "expand_as_v2 op must be positive.", - rank)); + PADDLE_ENFORCE_GE( + rank, 1, + platform::errors::InvalidArgument("The rank (%d) of the input 'x' for " + "expand_as_v2 op must be positive.", + rank)); PADDLE_ENFORCE_LE(target_rank, MAX_RANK_SUPPORTED, platform::errors::InvalidArgument( "The rank (%d) of the input 'target_tensor' for " diff --git a/paddle/fluid/operators/expand_op.cc b/paddle/fluid/operators/expand_op.cc index e45761112d4..04cdbd5a606 100644 --- a/paddle/fluid/operators/expand_op.cc +++ b/paddle/fluid/operators/expand_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/expand_op.h" + #include #include #include diff --git a/paddle/fluid/operators/expand_op.h b/paddle/fluid/operators/expand_op.h index 05cd893b057..880adad743f 100644 --- a/paddle/fluid/operators/expand_op.h +++ b/paddle/fluid/operators/expand_op.h @@ -213,12 +213,13 @@ class ExpandGradKernel : public framework::OpKernel { framework::TensorCopy(*in0, context.GetPlace(), context.device_context(), out0); } else { - PADDLE_ENFORCE_GE(dims, 1, platform::errors::InvalidArgument( - "The number of dimensions of the input " - "'Out@GRAD' for Op(expand_grad)" - " must be greater than or equal to 1, but " - "the value received is %d.", - dims)); + PADDLE_ENFORCE_GE(dims, 1, + platform::errors::InvalidArgument( + "The number of dimensions of the input " + "'Out@GRAD' for Op(expand_grad)" + " must be greater than or equal to 1, but " + "the value received is %d.", + dims)); PADDLE_ENFORCE_LE(dims, MAX_RANK_SUPPORTED, platform::errors::InvalidArgument( "The number of dimensions of the input 'Out@GRAD' " diff --git a/paddle/fluid/operators/expand_v2_op.cc b/paddle/fluid/operators/expand_v2_op.cc index 292f706cb18..6aeea745911 100644 --- a/paddle/fluid/operators/expand_v2_op.cc +++ b/paddle/fluid/operators/expand_v2_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/expand_v2_op.h" + #include #include #include diff --git a/paddle/fluid/operators/expand_v2_op_npu.cc b/paddle/fluid/operators/expand_v2_op_npu.cc index c9fe19fd091..c64bdabf599 100644 --- a/paddle/fluid/operators/expand_v2_op_npu.cc +++ b/paddle/fluid/operators/expand_v2_op_npu.cc @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/expand_v2_op.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/expand_v2_op.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" namespace paddle { diff --git a/paddle/fluid/operators/expand_v2_op_xpu.cc b/paddle/fluid/operators/expand_v2_op_xpu.cc index cb2165c4e92..3d010c964bc 100644 --- a/paddle/fluid/operators/expand_v2_op_xpu.cc +++ b/paddle/fluid/operators/expand_v2_op_xpu.cc @@ -13,8 +13,8 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/expand_v2_op.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/expand_v2_op.h" namespace paddle { namespace operators { @@ -110,10 +110,11 @@ class ExpandV2XPUKernel : public framework::OpKernel { r = xpu::broadcast(dev_ctx.x_context(), x_data, out_data, x_shape, out_shape); } - PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, platform::errors::External( - "XPU API(broadcast) return wrong " - "value[%d %s] in ExpandV2XPUKernel.", - r, XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU API(broadcast) return wrong " + "value[%d %s] in ExpandV2XPUKernel.", + r, XPUAPIErrorMsg[r])); } }; diff --git a/paddle/fluid/operators/fake_dequantize_op.cc b/paddle/fluid/operators/fake_dequantize_op.cc index 8172f441e64..5a3a1cf53de 100644 --- a/paddle/fluid/operators/fake_dequantize_op.cc +++ b/paddle/fluid/operators/fake_dequantize_op.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fake_dequantize_op.h" + #include #include + #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/fake_dequantize_op.cu.h b/paddle/fluid/operators/fake_dequantize_op.cu.h index 9859dd4607c..50f772ec45d 100644 --- a/paddle/fluid/operators/fake_dequantize_op.cu.h +++ b/paddle/fluid/operators/fake_dequantize_op.cu.h @@ -119,10 +119,10 @@ struct ChannelDequantizeFunctor { quant_stride *= in_dims[i]; } - DequantizeOneScaleQuantAxisN< - T><<>>( - in_data, scale_factor, max_range, num, in_dims[quant_axis], - quant_stride, out_data); + DequantizeOneScaleQuantAxisN + <<>>( + in_data, scale_factor, max_range, num, in_dims[quant_axis], + quant_stride, out_data); } else if (scale_num == 2) { // Not need to consider quant_axis int num = in->numel(); diff --git a/paddle/fluid/operators/fake_dequantize_op.h b/paddle/fluid/operators/fake_dequantize_op.h index aad2c2c7d98..e623a638922 100644 --- a/paddle/fluid/operators/fake_dequantize_op.h +++ b/paddle/fluid/operators/fake_dequantize_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/ddim.h" diff --git a/paddle/fluid/operators/fake_quantize_op.cc b/paddle/fluid/operators/fake_quantize_op.cc index ac72f23d46e..855c78d2998 100644 --- a/paddle/fluid/operators/fake_quantize_op.cc +++ b/paddle/fluid/operators/fake_quantize_op.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fake_quantize_op.h" + #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/transform.h" @@ -832,7 +834,7 @@ REGISTER_OP_VERSION(moving_average_abs_max_scale) "Delete output in order to make the inference model not " "save moving_average_abs_max_scale operator. This will " "make the quantitative model be correctly applied in inference.")) - .AddCheckpoint( - R"ROC(Incompatible upgrade of output [Out])ROC", - paddle::framework::compatible::OpVersionDesc().NewOutput( - "Out", "In order to support dygraph qat, add output again.")); + .AddCheckpoint(R"ROC(Incompatible upgrade of output [Out])ROC", + paddle::framework::compatible::OpVersionDesc().NewOutput( + "Out", + "In order to support dygraph qat, add output again.")); diff --git a/paddle/fluid/operators/fake_quantize_op.cu.h b/paddle/fluid/operators/fake_quantize_op.cu.h index a6130c272d7..580521183cb 100644 --- a/paddle/fluid/operators/fake_quantize_op.cu.h +++ b/paddle/fluid/operators/fake_quantize_op.cu.h @@ -17,6 +17,7 @@ limitations under the License. */ #endif // PADDLE_FLUID_OPERATORS_FAKE_QUANTIZE_OP_CU_H_ #include + #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/operators/fake_quantize_op.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" @@ -80,10 +81,10 @@ struct FindAbsMaxFunctor { framework::Tensor max; T* max_data = max.mutable_data(phi::make_ddim({grid}), ctx.GetPlace()); - FindAbsMaxKernel<<>>( - in, num, max_data); - FindAbsMaxKernel<<<1, block, 1024 * sizeof(T), ctx.stream()>>>( - max_data, grid, out); + FindAbsMaxKernel + <<>>(in, num, max_data); + FindAbsMaxKernel + <<<1, block, 1024 * sizeof(T), ctx.stream()>>>(max_data, grid, out); } }; @@ -176,9 +177,9 @@ struct FindChannelAbsMaxFunctor { int cout = in_dims[0]; int grid = cout; int block = 1024; - FindChannelAbsMaxKernelQuantAxis0< - T><<>>( - in_data, num, cout, out_abs_max); + FindChannelAbsMaxKernelQuantAxis0 + <<>>(in_data, num, cout, + out_abs_max); } else if (quant_axis == 1) { int cin = in_dims[0]; int cout = in_dims[1]; @@ -193,17 +194,17 @@ struct FindChannelAbsMaxFunctor { for (int i = 0; i < cin / max_threads; i++) { int block = max_threads; - FindChannelAbsMaxKernelQuantAxis1< - T><<>>( - in_data, num, cin, cout, out_abs_max); + FindChannelAbsMaxKernelQuantAxis1 + <<>>( + in_data, num, cin, cout, out_abs_max); in_data += num / cin; } int block = cin % max_threads; if (block > 0) { - FindChannelAbsMaxKernelQuantAxis1< - T><<>>( - in_data, num, in_dims[0], in_dims[1], out_abs_max); + FindChannelAbsMaxKernelQuantAxis1 + <<>>( + in_data, num, in_dims[0], in_dims[1], out_abs_max); } } } @@ -549,16 +550,16 @@ struct ChannelClipFakeQuantDequantFunctor { if (quant_axis == 0) { int grid = in_dims[0]; int block = 1024; - ChannelClipAndQuantDequantKernelQuantAxis0< - T><<>>(in_data, scale_data, bin_cnt, - num, in_dims[0], out_data); + ChannelClipAndQuantDequantKernelQuantAxis0 + <<>>(in_data, scale_data, bin_cnt, num, + in_dims[0], out_data); } else if (quant_axis == 1) { int grid = in_dims[0] * in_dims[1]; int block = 1024; - ChannelClipAndQuantDequantKernelQuantAxis1< - T><<>>( - in_data, scale_data, bin_cnt, num, in_dims[0], in_dims[1], out_data); + ChannelClipAndQuantDequantKernelQuantAxis1 + <<>>(in_data, scale_data, bin_cnt, num, + in_dims[0], in_dims[1], out_data); } } }; diff --git a/paddle/fluid/operators/fake_quantize_op.h b/paddle/fluid/operators/fake_quantize_op.h index dc3f081cc9e..182db11ed84 100644 --- a/paddle/fluid/operators/fake_quantize_op.h +++ b/paddle/fluid/operators/fake_quantize_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" diff --git a/paddle/fluid/operators/fc_op.cc b/paddle/fluid/operators/fc_op.cc index 6e646f0d4bf..68ef8f3c2be 100644 --- a/paddle/fluid/operators/fc_op.cc +++ b/paddle/fluid/operators/fc_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fc_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/fc_op.h b/paddle/fluid/operators/fc_op.h index 47c71286035..1c76c2c36b8 100644 --- a/paddle/fluid/operators/fc_op.h +++ b/paddle/fluid/operators/fc_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/fc_functor.h" diff --git a/paddle/fluid/operators/fill_any_like_op_xpu.cc b/paddle/fluid/operators/fill_any_like_op_xpu.cc index ec4ba6e926c..a07fbe5a7a5 100644 --- a/paddle/fluid/operators/fill_any_like_op_xpu.cc +++ b/paddle/fluid/operators/fill_any_like_op_xpu.cc @@ -15,7 +15,6 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include "paddle/fluid/framework/op_registry.h" - #include "paddle/phi/kernels/full_kernel.h" namespace paddle { diff --git a/paddle/fluid/operators/fill_constant_op.cc b/paddle/fluid/operators/fill_constant_op.cc index 07593a70f05..d6726b99813 100644 --- a/paddle/fluid/operators/fill_constant_op.cc +++ b/paddle/fluid/operators/fill_constant_op.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fill_constant_op.h" + #include + #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/fill_constant_op_npu.cc b/paddle/fluid/operators/fill_constant_op_npu.cc index edd8613ba52..a121eb8cc84 100644 --- a/paddle/fluid/operators/fill_constant_op_npu.cc +++ b/paddle/fluid/operators/fill_constant_op_npu.cc @@ -84,9 +84,10 @@ class FillConstantNPUKernel : public framework::OpKernel { const auto &dev_ctx = ctx.template device_context(); auto op_func = [&shape, &value]( - const std::vector &inputs, const std::vector &outputs, - const NPUAttributeMap &attrs, - const platform::NPUDeviceContext &dev_ctx) { + const std::vector &inputs, + const std::vector &outputs, + const NPUAttributeMap &attrs, + const platform::NPUDeviceContext &dev_ctx) { Tensor tensor_value; tensor_value.mutable_data({1}, dev_ctx.GetPlace()); FillNpuTensorWithConstant(&tensor_value, diff --git a/paddle/fluid/operators/fill_diagonal_tensor_op.h b/paddle/fluid/operators/fill_diagonal_tensor_op.h index ebb980b66af..5bee72f5268 100644 --- a/paddle/fluid/operators/fill_diagonal_tensor_op.h +++ b/paddle/fluid/operators/fill_diagonal_tensor_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/fill_op.cc b/paddle/fluid/operators/fill_op.cc index 521ddd4ec12..e934b794f8b 100644 --- a/paddle/fluid/operators/fill_op.cc +++ b/paddle/fluid/operators/fill_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fill_op.h" + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/fill_op.h b/paddle/fluid/operators/fill_op.h index c5cbffbf5c6..7f7e0f2b31a 100644 --- a/paddle/fluid/operators/fill_op.h +++ b/paddle/fluid/operators/fill_op.h @@ -14,9 +14,9 @@ limitations under the License. */ #pragma once +#include #include -#include #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/fill_zeros_like_op.cc b/paddle/fluid/operators/fill_zeros_like_op.cc index 2d340829332..518d8414c50 100644 --- a/paddle/fluid/operators/fill_zeros_like_op.cc +++ b/paddle/fluid/operators/fill_zeros_like_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fill_zeros_like_op.h" + #include "paddle/fluid/platform/complex.h" namespace paddle { diff --git a/paddle/fluid/operators/fill_zeros_like_op.cu.cc b/paddle/fluid/operators/fill_zeros_like_op.cu.cc index 4cb0887c1f3..91809b8cd11 100644 --- a/paddle/fluid/operators/fill_zeros_like_op.cu.cc +++ b/paddle/fluid/operators/fill_zeros_like_op.cu.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fill_zeros_like_op.h" + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/float16.h" diff --git a/paddle/fluid/operators/filter_by_instag_op.cc b/paddle/fluid/operators/filter_by_instag_op.cc index 02ea2d59ae3..cb1e3083320 100644 --- a/paddle/fluid/operators/filter_by_instag_op.cc +++ b/paddle/fluid/operators/filter_by_instag_op.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/operators/filter_by_instag_op.h" #include + #include "paddle/fluid/framework/no_need_buffer_vars_inference.h" #include "paddle/fluid/framework/var_type_inference.h" diff --git a/paddle/fluid/operators/filter_by_instag_op.cu b/paddle/fluid/operators/filter_by_instag_op.cu index 7870efba4e7..75680a61b30 100644 --- a/paddle/fluid/operators/filter_by_instag_op.cu +++ b/paddle/fluid/operators/filter_by_instag_op.cu @@ -20,6 +20,7 @@ #include #include + #include #include #include @@ -30,11 +31,10 @@ #include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" +#include "paddle/fluid/operators/filter_by_instag_op.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/operators/filter_by_instag_op.h" - #if defined(PADDLE_WITH_CUDA) namespace cg = cooperative_groups; #endif @@ -277,7 +277,7 @@ __global__ void filter_copy_fuse_kernel( T* dst = out_data + output_start_idx * x1_embed_size; const T* src_start = x1_data + x1_lods_data[p] * x1_embed_size; const T* src_end = x1_data + x1_lods_data[p + 1] * x1_embed_size; - for (const T *j = src_start; j != src_end; dst++, j++) { + for (const T* j = src_start; j != src_end; dst++, j++) { *dst = *j; } } @@ -306,7 +306,7 @@ __global__ void copy_grad_kernel(const size_t N, const int ins_per_thread, const T* src_end = out_grad_data + (map_data[p * 3] + map_data[p * 3 + 2]) * x1_embed_size; - for (const T *j = src_start; j != src_end; dst++, j++) { + for (const T* j = src_start; j != src_end; dst++, j++) { *dst = *j; } } diff --git a/paddle/fluid/operators/filter_by_instag_op.h b/paddle/fluid/operators/filter_by_instag_op.h index 3abc980ceaa..6172fef9b4b 100644 --- a/paddle/fluid/operators/filter_by_instag_op.h +++ b/paddle/fluid/operators/filter_by_instag_op.h @@ -20,6 +20,7 @@ #include #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/mixed_vector.h" diff --git a/paddle/fluid/operators/flatten_op.cc b/paddle/fluid/operators/flatten_op.cc index d1ac573b844..2e767c37051 100644 --- a/paddle/fluid/operators/flatten_op.cc +++ b/paddle/fluid/operators/flatten_op.cc @@ -13,10 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/flatten_op.h" + #include #include #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/fluid/operators/flatten_op.h b/paddle/fluid/operators/flatten_op.h index cacd30cad8a..6a91cd8b941 100644 --- a/paddle/fluid/operators/flatten_op.h +++ b/paddle/fluid/operators/flatten_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/phi_utils.h" #include "paddle/fluid/platform/device_context.h" diff --git a/paddle/fluid/operators/flip_op.cc b/paddle/fluid/operators/flip_op.cc index e1ee1a86a2f..b00cbf5c4fc 100644 --- a/paddle/fluid/operators/flip_op.cc +++ b/paddle/fluid/operators/flip_op.cc @@ -93,10 +93,9 @@ REGISTER_OPERATOR(flip, ops::FlipOp, ops::FlipOpMaker, ops::FlipOpInferVarType, FlipInferShapeFunctor); /* ========================== register checkpoint ===========================*/ -REGISTER_OP_VERSION(flip) - .AddCheckpoint( - R"ROC(Upgrade flip, add new attr [axis] and delete attr [dims].)ROC", - paddle::framework::compatible::OpVersionDesc() - .NewAttr("axis", "The added attr 'axis' doesn't set default value.", - paddle::none) - .DeleteAttr("dims", "The attr 'dims' is deleted.")); +REGISTER_OP_VERSION(flip).AddCheckpoint( + R"ROC(Upgrade flip, add new attr [axis] and delete attr [dims].)ROC", + paddle::framework::compatible::OpVersionDesc() + .NewAttr("axis", "The added attr 'axis' doesn't set default value.", + paddle::none) + .DeleteAttr("dims", "The attr 'dims' is deleted.")); diff --git a/paddle/fluid/operators/fold_op.h b/paddle/fluid/operators/fold_op.h index c0aa47a0b4f..fd1a7558b71 100644 --- a/paddle/fluid/operators/fold_op.h +++ b/paddle/fluid/operators/fold_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/im2col.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/frame_op.cc b/paddle/fluid/operators/frame_op.cc index 2ff9beb36f2..00c98cae10e 100644 --- a/paddle/fluid/operators/frame_op.cc +++ b/paddle/fluid/operators/frame_op.cc @@ -33,10 +33,11 @@ class FrameOp : public framework::OperatorWithKernel { const int x_rank = x_dims.size(); PADDLE_ENFORCE_GE( - x_rank, 1, platform::errors::InvalidArgument( - "Input(X) of FrameOp should be a tensor which contains " - "at least 1 dimension, but got rank %s.", - x_rank)); + x_rank, 1, + platform::errors::InvalidArgument( + "Input(X) of FrameOp should be a tensor which contains " + "at least 1 dimension, but got rank %s.", + x_rank)); PADDLE_ENFORCE_GT(hop_length, 0, platform::errors::InvalidArgument( "Attribute(hop_length) of FrameOp should be greater " diff --git a/paddle/fluid/operators/fsp_op.cc b/paddle/fluid/operators/fsp_op.cc index f00ec6a1e14..16ce2b43bf4 100644 --- a/paddle/fluid/operators/fsp_op.cc +++ b/paddle/fluid/operators/fsp_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fsp_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/fused/attention_layer_norm.h b/paddle/fluid/operators/fused/attention_layer_norm.h index 43491a9faf1..b960b835979 100644 --- a/paddle/fluid/operators/fused/attention_layer_norm.h +++ b/paddle/fluid/operators/fused/attention_layer_norm.h @@ -38,11 +38,10 @@ class AttnLayerNorm { auto stream = dev_ctx_.stream(); switch (GetDesiredBlockDim(feature_size_)) { - FIXED_BLOCK_DIM_CASE( - LayerNormForward, - kBlockDim><<>>( - x_data, scale_data, bias_data, y_data, mean_data, var_data, - epsilon_, feature_size_)); + FIXED_BLOCK_DIM_CASE(LayerNormForward, kBlockDim> + <<>>( + x_data, scale_data, bias_data, y_data, mean_data, + var_data, epsilon_, feature_size_)); default: PADDLE_THROW(platform::errors::InvalidArgument( "Feature_size must be larger than 1")); diff --git a/paddle/fluid/operators/fused/attn_bias_add.cu.h b/paddle/fluid/operators/fused/attn_bias_add.cu.h index b059223eaf6..feac0f79530 100644 --- a/paddle/fluid/operators/fused/attn_bias_add.cu.h +++ b/paddle/fluid/operators/fused/attn_bias_add.cu.h @@ -120,24 +120,24 @@ void LaunchBiasAddFwKernel(const platform::CUDADeviceContext& ctx, int m, int n, auto stream = ctx.stream(); switch (vec_size) { case 4: { - BroadcastKernelBinary<<>>( - in0, in1, out, use_broadcast, numel, configlists, main_tid, tail_tid, - func); + BroadcastKernelBinary + <<>>(in0, in1, out, use_broadcast, numel, + configlists, main_tid, tail_tid, + func); break; } case 2: { - BroadcastKernelBinary<<>>( - in0, in1, out, use_broadcast, numel, configlists, main_tid, tail_tid, - func); + BroadcastKernelBinary + <<>>(in0, in1, out, use_broadcast, numel, + configlists, main_tid, tail_tid, + func); break; } case 1: { - BroadcastKernelBinary<<>>( - in0, in1, out, use_broadcast, numel, configlists, main_tid, tail_tid, - func); + BroadcastKernelBinary + <<>>(in0, in1, out, use_broadcast, numel, + configlists, main_tid, tail_tid, + func); break; } default: { @@ -176,8 +176,8 @@ void Launch1DColumnReduce(gpuStream_t stream, const int max_threads, const int block = 256; const int max_blocks = std::max(max_threads / block, 1); const int grid = std::min(left_num, max_blocks); - Compute1DColumnReduceKernel<<>>( - reduce_num, left_num, d_out, d_bias); + Compute1DColumnReduceKernel + <<>>(reduce_num, left_num, d_out, d_bias); } void SetConfigForColumnReduce(const int max_threads, const int reduce_num, @@ -273,8 +273,8 @@ void Launch2DColumnReduce(const platform::CUDADeviceContext& dev_ctx, const auto& stream = dev_ctx.stream(); if (!should_reduce_again) { - BiasAddBwSinglePassKernel<<>>(d_out, reduce_num, - left_num, d_bias); + BiasAddBwSinglePassKernel + <<>>(d_out, reduce_num, left_num, d_bias); } else { framework::Tensor tmp_sum; tmp_sum.Resize({grid.y, left_num}); diff --git a/paddle/fluid/operators/fused/attn_gemm.h b/paddle/fluid/operators/fused/attn_gemm.h index 304aad16ad0..a85b2f99bb1 100644 --- a/paddle/fluid/operators/fused/attn_gemm.h +++ b/paddle/fluid/operators/fused/attn_gemm.h @@ -14,12 +14,10 @@ limitations under the License. */ #pragma once -#include "paddle/fluid/platform/float16.h" -#include "paddle/phi/kernels/funcs/blas/blas.h" -#include "paddle/phi/kernels/funcs/elementwise_functor.h" - #include "paddle/fluid/operators/kernel_primitives/kernel_primitives.h" #include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h" +#include "paddle/fluid/platform/float16.h" +#include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/broadcast_function.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h" diff --git a/paddle/fluid/operators/fused/conv_fusion_op.cc b/paddle/fluid/operators/fused/conv_fusion_op.cc index 671e94061cb..490d92880c9 100644 --- a/paddle/fluid/operators/fused/conv_fusion_op.cc +++ b/paddle/fluid/operators/fused/conv_fusion_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/operators/conv_op.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" diff --git a/paddle/fluid/operators/fused/conv_fusion_op.cu b/paddle/fluid/operators/fused/conv_fusion_op.cu index 8191c85f2a1..9ca9f8aaf74 100644 --- a/paddle/fluid/operators/fused/conv_fusion_op.cu +++ b/paddle/fluid/operators/fused/conv_fusion_op.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/conv_search_cache.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/conv_cudnn_op_cache.h" diff --git a/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc b/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc index 516b10fa021..09fa3a247e6 100644 --- a/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc +++ b/paddle/fluid/operators/fused/cudnn_bn_add_relu_test.cc @@ -182,19 +182,20 @@ void ComputeBatchNormForward(const platform::CUDADeviceContext &ctx, std::string data_layout = "NHWC"; attrs.insert({"data_layout", data_layout}); - auto op = framework::OpRegistry::CreateOp( - "batch_norm", {{"X", {"X"}}, - {"Scale", {"Scale"}}, - {"Bias", {"Bias"}}, - {"Mean", {"Mean"}}, - {"Variance", {"Variance"}}}, - {{"Y", {"Y"}}, - {"MeanOut", {"Mean"}}, - {"VarianceOut", {"Variance"}}, - {"SavedMean", {"SavedMean"}}, - {"SavedVariance", {"SavedVariance"}}, - {"ReserveSpace", {"ReserveSpace"}}}, - attrs); + auto op = + framework::OpRegistry::CreateOp("batch_norm", + {{"X", {"X"}}, + {"Scale", {"Scale"}}, + {"Bias", {"Bias"}}, + {"Mean", {"Mean"}}, + {"Variance", {"Variance"}}}, + {{"Y", {"Y"}}, + {"MeanOut", {"Mean"}}, + {"VarianceOut", {"Variance"}}, + {"SavedMean", {"SavedMean"}}, + {"SavedVariance", {"SavedVariance"}}, + {"ReserveSpace", {"ReserveSpace"}}}, + attrs); op->Run(scope, ctx.GetPlace()); paddle::framework::TensorCopySync(*y, platform::CPUPlace(), cpu_y); @@ -314,8 +315,9 @@ void ComputeFusedBNAddReluBackward( attrs.insert({"epsilon", epsilon}); attrs.insert({"act_type", act_type}); - auto op = framework::OpRegistry::CreateOp( - "fused_bn_add_activation_grad", {{"X", {"X"}}, + auto op = + framework::OpRegistry::CreateOp("fused_bn_add_activation_grad", + {{"X", {"X"}}, {"Y", {"Y"}}, {"Y@GRAD", {"Y@GRAD"}}, {"Scale", {"Scale"}}, @@ -323,11 +325,11 @@ void ComputeFusedBNAddReluBackward( {"SavedMean", {"SavedMean"}}, {"SavedVariance", {"SavedVariance"}}, {"ReserveSpace", {"ReserveSpace"}}}, - {{"X@GRAD", {"X@GRAD"}}, - {"Z@GRAD", {"Z@GRAD"}}, - {"Scale@GRAD", {"Scale@GRAD"}}, - {"Bias@GRAD", {"Bias@GRAD"}}}, - attrs); + {{"X@GRAD", {"X@GRAD"}}, + {"Z@GRAD", {"Z@GRAD"}}, + {"Scale@GRAD", {"Scale@GRAD"}}, + {"Bias@GRAD", {"Bias@GRAD"}}}, + attrs); op->Run(scope, ctx.GetPlace()); paddle::framework::TensorCopySync(*dx, platform::CPUPlace(), cpu_dx); diff --git a/paddle/fluid/operators/fused/cudnn_fusion_helper.h b/paddle/fluid/operators/fused/cudnn_fusion_helper.h index 13fad0b7cbb..a8f700c2119 100644 --- a/paddle/fluid/operators/fused/cudnn_fusion_helper.h +++ b/paddle/fluid/operators/fused/cudnn_fusion_helper.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/operator_kernel_configs.h" #include "paddle/fluid/platform/dynload/cudnn.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc b/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc index 5881322007a..f4443bba3fd 100644 --- a/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc +++ b/paddle/fluid/operators/fused/cudnn_norm_conv_test.cc @@ -167,9 +167,10 @@ void ComputeConv2DBackward(const platform::CUDADeviceContext &ctx, attrs.insert({"workspace_size_MB", 512}); auto op = framework::OpRegistry::CreateOp( - "conv2d_grad", {{"Input", {"Input"}}, - {"Filter", {"Filter"}}, - {"Output@GRAD", {"Output@GRAD"}}}, + "conv2d_grad", + {{"Input", {"Input"}}, + {"Filter", {"Filter"}}, + {"Output@GRAD", {"Output@GRAD"}}}, {{"Input@GRAD", {"Input@GRAD"}}, {"Filter@GRAD", {"Filter@GRAD"}}}, attrs); op->Run(scope, ctx.GetPlace()); diff --git a/paddle/fluid/operators/fused/fmha_ref.h b/paddle/fluid/operators/fused/fmha_ref.h index 38f9aff226e..ce95b0a320c 100644 --- a/paddle/fluid/operators/fused/fmha_ref.h +++ b/paddle/fluid/operators/fused/fmha_ref.h @@ -186,8 +186,9 @@ class FMHARef { if (dropout_param_.dropout_prob_) { DropoutFwGPUKernelDriver( static_cast(dev_ctx_), - dropout_param_.is_test_, static_cast( - dropout_param_.dropout_implementation_), + dropout_param_.is_test_, + static_cast( + dropout_param_.dropout_implementation_), dropout_param_.dropout_prob_, dropout_param_.is_upscale_in_train_, dropout_param_.is_fix_seed_, dropout_param_.seed_val_, static_cast(*softmax_out_tensor), dropout_param_.seed_, diff --git a/paddle/fluid/operators/fused/fused_attention_op.cc b/paddle/fluid/operators/fused/fused_attention_op.cc index a1adec9641a..06ede8e2c7b 100644 --- a/paddle/fluid/operators/fused/fused_attention_op.cc +++ b/paddle/fluid/operators/fused/fused_attention_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { @@ -88,12 +89,13 @@ class FusedAttentionOp : public framework::OperatorWithKernel { // y: qkv's weight: [3, num_head, dim_head, dim_embed] auto x_dim = ctx->GetInputDim("X"); auto y_dim = ctx->GetInputDim("QKVW"); - PADDLE_ENFORCE_EQ(x_dim.size(), 3, platform::errors::InvalidArgument( - "The dimensions of x must be 3" - "(batch_size, seq_len, dim_embed)," - "but received dimensions of" - "Input is [%d]", - x_dim.size())); + PADDLE_ENFORCE_EQ( + x_dim.size(), 3, + platform::errors::InvalidArgument("The dimensions of x must be 3" + "(batch_size, seq_len, dim_embed)," + "but received dimensions of" + "Input is [%d]", + x_dim.size())); PADDLE_ENFORCE_EQ(y_dim.size(), 4, platform::errors::InvalidArgument( "The dimensions of qkv_weight must be 4" diff --git a/paddle/fluid/operators/fused/fused_attention_op.cu b/paddle/fluid/operators/fused/fused_attention_op.cu index f25bd539928..73fdd29fd62 100644 --- a/paddle/fluid/operators/fused/fused_attention_op.cu +++ b/paddle/fluid/operators/fused/fused_attention_op.cu @@ -13,21 +13,21 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/operators/fused/attention_layer_norm.h" +#include "paddle/fluid/operators/fused/attn_gemm.h" +#include "paddle/fluid/operators/fused/fmha_ref.h" +#include "paddle/fluid/operators/fused/fused_dropout_helper.h" #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" - #include "paddle/phi/kernels/funcs/broadcast_function.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h" #include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/fluid/operators/fused/attention_layer_norm.h" -#include "paddle/fluid/operators/fused/attn_gemm.h" -#include "paddle/fluid/operators/fused/fmha_ref.h" -#include "paddle/fluid/operators/fused/fused_dropout_helper.h" - #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/device/gpu/nccl_helper.h" @@ -463,11 +463,13 @@ class FusedAttentionGradKernel : public framework::OpKernel { auto *bias_dropout_residual_out_data = bias_dropout_residual_out->data(); auto *d_ln_2_scale_data = - (d_ln_2_scale == nullptr ? nullptr : d_ln_2_scale->mutable_data( - ctx.GetPlace())); + (d_ln_2_scale == nullptr + ? nullptr + : d_ln_2_scale->mutable_data(ctx.GetPlace())); auto *d_ln_2_bias_data = - (d_ln_2_bias == nullptr ? nullptr : d_ln_2_bias->mutable_data( - ctx.GetPlace())); + (d_ln_2_bias == nullptr + ? nullptr + : d_ln_2_bias->mutable_data(ctx.GetPlace())); auto *d_bias_dropout_residual_out_data = d_bias_dropout_residual_out->mutable_data(ctx.GetPlace()); diff --git a/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cc b/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cc index 781f51d70ec..56f9afdbe90 100644 --- a/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cc +++ b/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cu b/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cu index 71a2c9728cc..35a48611a74 100644 --- a/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cu +++ b/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cu @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/fused/fused_dropout_helper.h" diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_activation_op.cc index 1b3521f1496..464856003f0 100644 --- a/paddle/fluid/operators/fused/fused_bn_activation_op.cc +++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cc @@ -13,9 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fused/fused_bn_activation_op.h" + #include #include #include + #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_registry.h" @@ -70,20 +72,22 @@ void FusedBatchNormActOp::InferShape(framework::InferShapeContext *ctx) const { const auto x_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE_GE(x_dims.size(), 2, platform::errors::PreconditionNotMet( - "ShapeError: the dimension of input " - "X must greater than or equal to 2." - "But received: the shape of input X " - "= [%s], the dimension of input X =" - "[%d]", - x_dims, x_dims.size())); - PADDLE_ENFORCE_LE(x_dims.size(), 5, platform::errors::PreconditionNotMet( - "ShapeError: the dimension of input " - "X must smaller than or equal to 5." - "But received: the shape of input X " - "= [%s], the dimension of input X =" - "[%d]", - x_dims, x_dims.size())); + PADDLE_ENFORCE_GE( + x_dims.size(), 2, + platform::errors::PreconditionNotMet("ShapeError: the dimension of input " + "X must greater than or equal to 2." + "But received: the shape of input X " + "= [%s], the dimension of input X =" + "[%d]", + x_dims, x_dims.size())); + PADDLE_ENFORCE_LE( + x_dims.size(), 5, + platform::errors::PreconditionNotMet("ShapeError: the dimension of input " + "X must smaller than or equal to 5." + "But received: the shape of input X " + "= [%s], the dimension of input X =" + "[%d]", + x_dims, x_dims.size())); const int64_t C = x_dims[x_dims.size() - 1]; @@ -140,22 +144,26 @@ framework::OpKernelType FusedBatchNormActOp::GetExpectedKernelType( if (input_data_type == framework::proto::VarType::FP64) { bn_param_type = framework::proto::VarType::FP64; } - PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( - ctx.Input("Scale")->dtype()), - platform::errors::PreconditionNotMet( - "Scale input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( - ctx.Input("Bias")->dtype()), - platform::errors::PreconditionNotMet( - "Bias input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( - ctx.Input("Mean")->dtype()), - platform::errors::PreconditionNotMet( - "Mean input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( - ctx.Input("Variance")->dtype()), - platform::errors::PreconditionNotMet( - "Variance input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType(ctx.Input("Scale")->dtype()), + platform::errors::PreconditionNotMet( + "Scale input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType(ctx.Input("Bias")->dtype()), + platform::errors::PreconditionNotMet( + "Bias input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType(ctx.Input("Mean")->dtype()), + platform::errors::PreconditionNotMet( + "Mean input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType(ctx.Input("Variance")->dtype()), + platform::errors::PreconditionNotMet( + "Variance input should be of float type")); framework::LibraryType library = framework::LibraryType::kPlain; framework::DataLayout layout = framework::DataLayout::kAnyLayout; diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.cu b/paddle/fluid/operators/fused/fused_bn_activation_op.cu index 9e709c9a01a..0ebe21dfc60 100644 --- a/paddle/fluid/operators/fused/fused_bn_activation_op.cu +++ b/paddle/fluid/operators/fused/fused_bn_activation_op.cu @@ -16,6 +16,7 @@ #include #include #include + #include "cub/cub.cuh" #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/operators/activation_op.h" @@ -181,8 +182,9 @@ class FusedBatchNormActKernel ctx.GetPlace()), variance_out->template mutable_data>( ctx.GetPlace()), - epsilon, saved_mean->template mutable_data>( - ctx.GetPlace()), + epsilon, + saved_mean->template mutable_data>( + ctx.GetPlace()), saved_variance->template mutable_data>( ctx.GetPlace()), activation_desc_, workspace_ptr, workspace_size, reserve_space_ptr, @@ -343,10 +345,12 @@ class FusedBatchNormActGradKernel /*dBnScaleBiasDesc=*/bn_param_desc_, /*bnScaleData=*/scale->template data>(), /*bnBiasData=*/bias->template data>(), - /*dBnScaleData=*/d_scale - ->template mutable_data>(ctx.GetPlace()), - /*dBnBiasData=*/d_bias - ->template mutable_data>(ctx.GetPlace()), + /*dBnScaleData=*/ + d_scale->template mutable_data>( + ctx.GetPlace()), + /*dBnBiasData=*/ + d_bias->template mutable_data>( + ctx.GetPlace()), /*epsilon=*/epsilon, /*savedMean=*/saved_mean_data, /*savedInvVariance=*/saved_var_data, diff --git a/paddle/fluid/operators/fused/fused_bn_activation_op.h b/paddle/fluid/operators/fused/fused_bn_activation_op.h index b8404e4c655..da9bca4fc22 100644 --- a/paddle/fluid/operators/fused/fused_bn_activation_op.h +++ b/paddle/fluid/operators/fused/fused_bn_activation_op.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/grad_op_desc_maker.h" #include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/operator.h" diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc index d667fafb835..5d06ac19f9e 100644 --- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cc @@ -13,9 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fused/fused_bn_add_activation_op.h" + #include #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { @@ -52,20 +54,22 @@ void FusedBatchNormAddActOp::InferShape( "of input X = [%s], and the shape of " "input Y = [%s]", x_dims, z_dims)); - PADDLE_ENFORCE_GE(x_dims.size(), 2, platform::errors::InvalidArgument( - "ShapeError: the dimensions of input " - "must greater than or equal to 2." - "But received: the shape of input " - "= [%s], the dimension of input = " - "[%d]", - x_dims, x_dims.size())); - PADDLE_ENFORCE_LE(x_dims.size(), 5, platform::errors::InvalidArgument( - "ShapeError: the dimensions of input " - "must smaller than or equal to 5." - "But received: the shape of input " - "= [%s], the dimension of input = " - "[%d]", - x_dims, x_dims.size())); + PADDLE_ENFORCE_GE( + x_dims.size(), 2, + platform::errors::InvalidArgument("ShapeError: the dimensions of input " + "must greater than or equal to 2." + "But received: the shape of input " + "= [%s], the dimension of input = " + "[%d]", + x_dims, x_dims.size())); + PADDLE_ENFORCE_LE( + x_dims.size(), 5, + platform::errors::InvalidArgument("ShapeError: the dimensions of input " + "must smaller than or equal to 5." + "But received: the shape of input " + "= [%s], the dimension of input = " + "[%d]", + x_dims, x_dims.size())); const int64_t C = x_dims[x_dims.size() - 1]; diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu index 421c1bacb66..2f7fc616012 100644 --- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.cu @@ -16,6 +16,7 @@ #include #include #include + #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/fused/fused_bn_add_activation_op.h" @@ -160,8 +161,9 @@ class FusedBatchNormAddActKernel ctx.GetPlace()), variance_out->template mutable_data>( ctx.GetPlace()), - epsilon, saved_mean->template mutable_data>( - ctx.GetPlace()), + epsilon, + saved_mean->template mutable_data>( + ctx.GetPlace()), saved_variance->template mutable_data>( ctx.GetPlace()), activation_desc_, workspace_ptr, workspace_size, reserve_space_ptr, diff --git a/paddle/fluid/operators/fused/fused_bn_add_activation_op.h b/paddle/fluid/operators/fused/fused_bn_add_activation_op.h index d5e5ae9bda6..07d2e4564b6 100644 --- a/paddle/fluid/operators/fused/fused_bn_add_activation_op.h +++ b/paddle/fluid/operators/fused/fused_bn_add_activation_op.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/grad_op_desc_maker.h" #include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/operator.h" diff --git a/paddle/fluid/operators/fused/fused_dropout_act_bias.h b/paddle/fluid/operators/fused/fused_dropout_act_bias.h old mode 100755 new mode 100644 index 9f5a1bad047..f7af7deff53 --- a/paddle/fluid/operators/fused/fused_dropout_act_bias.h +++ b/paddle/fluid/operators/fused/fused_dropout_act_bias.h @@ -109,15 +109,15 @@ void LaunchDropoutActBias(Functor act_functor, const uint64_t seed, const int real_vec_size = cols % VecSize == 0 ? VecSize : 1; const auto config = Get1DBlocksAnd2DGrids(ctx, rows, cols, real_vec_size); if (cols % VecSize == 0) { - FusedDropoutActBias<<< - config.block_per_grid, config.thread_per_block, 0, ctx.stream()>>>( - act_functor, seed, rows, cols, increment, dropout_prob, - is_upscale_in_train, is_test, src, bias, dst, mask_data); + FusedDropoutActBias + <<>>( + act_functor, seed, rows, cols, increment, dropout_prob, + is_upscale_in_train, is_test, src, bias, dst, mask_data); } else { - FusedDropoutActBias<<< - config.block_per_grid, config.thread_per_block, 0, ctx.stream()>>>( - act_functor, seed, rows, cols, increment, dropout_prob, - is_upscale_in_train, is_test, src, bias, dst, mask_data); + FusedDropoutActBias + <<>>( + act_functor, seed, rows, cols, increment, dropout_prob, + is_upscale_in_train, is_test, src, bias, dst, mask_data); } } @@ -231,28 +231,28 @@ void LaunchDropoutActBiasGrad(Functor act_functor, const T *dout, dim3 block_dim(threads, 128, 1); dim3 grid_dim(blocks, 1, 1); if (cols % VecSize == 0) { - FusedDropoutActBiasGrad< - T, MaskType, 8, 128, VecSize, - Functor><<>>( - act_functor, dout, mask, src, bias, factor, rows, cols, dx, dbias); + FusedDropoutActBiasGrad + <<>>(act_functor, dout, mask, + src, bias, factor, rows, + cols, dx, dbias); } else { - FusedDropoutActBiasGrad< - T, MaskType, 8, 128, 1, - Functor><<>>( - act_functor, dout, mask, src, bias, factor, rows, cols, dx, dbias); + FusedDropoutActBiasGrad + <<>>(act_functor, dout, mask, + src, bias, factor, rows, + cols, dx, dbias); } } else { const uint64_t n = rows * cols; platform::GpuLaunchConfig config = platform::GetGpuLaunchConfig1D(ctx, n / real_vec_size); if (n % VecSize == 0) { - FusedDropoutActGrad<<< - config.block_per_grid, config.thread_per_block, 0, ctx.stream()>>>( - act_functor, dout, mask, src, factor, n, dx); + FusedDropoutActGrad + <<>>( + act_functor, dout, mask, src, factor, n, dx); } else { - FusedDropoutActGrad<<< - config.block_per_grid, config.thread_per_block, 0, ctx.stream()>>>( - act_functor, dout, mask, src, factor, n, dx); + FusedDropoutActGrad + <<>>( + act_functor, dout, mask, src, factor, n, dx); } } } diff --git a/paddle/fluid/operators/fused/fused_dropout_helper.h b/paddle/fluid/operators/fused/fused_dropout_helper.h index c352f08ec2b..6dc1c446bd7 100644 --- a/paddle/fluid/operators/fused/fused_dropout_helper.h +++ b/paddle/fluid/operators/fused/fused_dropout_helper.h @@ -30,7 +30,7 @@ namespace operators { * The DropoutParam will be used in the fused_dropout_act_bias, * fused_residual_dropout_bias(pre_layer_norm=ture) or * fused_layernorm_residual_dropout_bias(pre_layer_norm=false). -*/ + */ struct DropoutParam { uint64_t seed; float dropout_prob; @@ -232,8 +232,8 @@ class FusedDropoutLayerNormHelper : public FusedDropoutHelper { using U = LayerNormParamType; switch (GetDesiredBlockDim(this->cols_)) { FIXED_BLOCK_DIM_CASE( - LayerNormForward< - T, U, kBlockDim><<rows_, kBlockDim, 0, ctx.stream()>>>( + LayerNormForward + <<rows_, kBlockDim, 0, ctx.stream()>>>( src, gamma, beta, out, mean, variance, epsilon_, this->cols_)); } } diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc b/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc index 3e69bf08067..a43562b2972 100644 --- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc +++ b/paddle/fluid/operators/fused/fused_elemwise_activation_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fused/fused_elemwise_activation_op.h" + #include #include diff --git a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h b/paddle/fluid/operators/fused/fused_elemwise_activation_op.h index 5404cdeab01..3ce54968355 100644 --- a/paddle/fluid/operators/fused/fused_elemwise_activation_op.h +++ b/paddle/fluid/operators/fused/fused_elemwise_activation_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" @@ -412,8 +413,9 @@ class FusedElemwiseActivationGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { auto in_y = ctx.Input("Y"); - PADDLE_ENFORCE_NE(in_y, nullptr, platform::errors::InvalidArgument( - "Input(Y) should not be nullptr.")); + PADDLE_ENFORCE_NE( + in_y, nullptr, + platform::errors::InvalidArgument("Input(Y) should not be nullptr.")); auto in_out = ctx.Input("Out"); PADDLE_ENFORCE_NE( in_out, nullptr, @@ -449,15 +451,17 @@ class FusedElemwiseActivationGradKernel : public framework::OpKernel { " so the number of 'Out' should be two.")); } else { if (!InputXCanBeAbsent(functor_list)) { - PADDLE_ENFORCE_NE(in_x, nullptr, platform::errors::InvalidArgument( - "Input(X) should not be null.")); + PADDLE_ENFORCE_NE( + in_x, nullptr, + platform::errors::InvalidArgument("Input(X) should not be null.")); } } // Get in_x if (ctx.HasInput("X")) { - PADDLE_ENFORCE_NE(in_x, nullptr, platform::errors::InvalidArgument( - "Input(X) should not be null.")); + PADDLE_ENFORCE_NE( + in_x, nullptr, + platform::errors::InvalidArgument("Input(X) should not be null.")); } else { // If functor_list contains elementwise_add, the backward doesn't use // in_x, in_y and in_out. diff --git a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cc b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cc index 6746b3b8e84..951189269c7 100644 --- a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cc +++ b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/errors.h" diff --git a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu index 13f1c6808ae..f0cb2edb670 100644 --- a/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu +++ b/paddle/fluid/operators/fused/fused_embedding_eltwise_layernorm_op.cu @@ -13,7 +13,9 @@ // limitations under the License. #include + #include + #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/malloc.h" diff --git a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc index 7308f307792..625bfe36e38 100644 --- a/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc +++ b/paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.h" + #include + #include "paddle/fluid/platform/cpu_info.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/cpu_vec.h" @@ -100,10 +102,11 @@ void FusedEmbeddingFCLSTMOp::InferShape( platform::errors::InvalidArgument( "The rank of Input(Bias) should be 2, but received value is:%d.", b_dims.size())); - PADDLE_ENFORCE_EQ(b_dims[0], 1, platform::errors::InvalidArgument( - "The first dimension of Input(Bias) " - "should be 1, but received value is:%d.", - b_dims[0])); + PADDLE_ENFORCE_EQ(b_dims[0], 1, + platform::errors::InvalidArgument( + "The first dimension of Input(Bias) " + "should be 1, but received value is:%d.", + b_dims[0])); PADDLE_ENFORCE_EQ( b_dims[1], (ctx->Attrs().Get("use_peepholes") ? 7 : 4) * frame_size, platform::errors::InvalidArgument( @@ -237,21 +240,21 @@ This operator fuse the X into LSTM, more details can refer to LSTM op. template class FusedEmbeddingFCLSTMKernel : public framework::OpKernel { public: -#define INIT_VEC_FUNC \ - std::function act_gate, act_cell, act_cand; \ - auto& act_gate_str = ctx.Attr("gate_activation"); \ - auto& act_cell_str = ctx.Attr("cell_activation"); \ - auto& act_cand_str = ctx.Attr("candidate_activation"); \ - if (platform::MayIUse(platform::avx)) { \ - phi::funcs::VecActivations act_functor; \ - act_gate = act_functor(act_gate_str); \ - act_cell = act_functor(act_cell_str); \ - act_cand = act_functor(act_cand_str); \ - } else { \ - phi::funcs::VecActivations act_functor; \ - act_gate = act_functor(act_gate_str); \ - act_cell = act_functor(act_cell_str); \ - act_cand = act_functor(act_cand_str); \ +#define INIT_VEC_FUNC \ + std::function act_gate, act_cell, act_cand; \ + auto& act_gate_str = ctx.Attr("gate_activation"); \ + auto& act_cell_str = ctx.Attr("cell_activation"); \ + auto& act_cand_str = ctx.Attr("candidate_activation"); \ + if (platform::MayIUse(platform::avx)) { \ + phi::funcs::VecActivations act_functor; \ + act_gate = act_functor(act_gate_str); \ + act_cell = act_functor(act_cell_str); \ + act_cand = act_functor(act_cand_str); \ + } else { \ + phi::funcs::VecActivations act_functor; \ + act_gate = act_functor(act_gate_str); \ + act_cell = act_functor(act_cell_str); \ + act_cand = act_functor(act_cand_str); \ } #define INIT_BASE_INPUT_OUTPUT \ diff --git a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc index ec3a76e316e..cb3bf585775 100644 --- a/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc +++ b/paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fused/fused_embedding_seq_pool_op.h" + #include + #include "paddle/fluid/framework/var_type_inference.h" namespace paddle { diff --git a/paddle/fluid/operators/fused/fused_fc_elementwise_layernorm_op.cu b/paddle/fluid/operators/fused/fused_fc_elementwise_layernorm_op.cu index 04d3730a77d..2c0184fea46 100644 --- a/paddle/fluid/operators/fused/fused_fc_elementwise_layernorm_op.cu +++ b/paddle/fluid/operators/fused/fused_fc_elementwise_layernorm_op.cu @@ -179,22 +179,20 @@ class FusedFCElementwiseLayerNormOpKernel : public framework::OpKernel { if (with_relu) { switch (platform::RoundToPowerOfTwo(N)) { CUDA_LAUNCH_KERNEL_HELPER( - InplaceAddReluAddLayerNormKernel< - T, true, - kPowerOfTwoDim><<>>( - y_data, bias_0_data, bias_1_data, scale_data, out_data, - mean_data, variance_data, M, N, epsilon)); + InplaceAddReluAddLayerNormKernel + <<>>(y_data, bias_0_data, bias_1_data, scale_data, + out_data, mean_data, variance_data, M, N, + epsilon)); } } else { switch (platform::RoundToPowerOfTwo(N)) { CUDA_LAUNCH_KERNEL_HELPER( - InplaceAddReluAddLayerNormKernel< - T, false, - kPowerOfTwoDim><<>>( - y_data, bias_0_data, bias_1_data, scale_data, out_data, - mean_data, variance_data, M, N, epsilon)); + InplaceAddReluAddLayerNormKernel + <<>>(y_data, bias_0_data, bias_1_data, scale_data, + out_data, mean_data, variance_data, M, N, + epsilon)); } } } diff --git a/paddle/fluid/operators/fused/fused_feedforward_op.cc b/paddle/fluid/operators/fused/fused_feedforward_op.cc index 8e15232acda..d3cc1b91276 100644 --- a/paddle/fluid/operators/fused/fused_feedforward_op.cc +++ b/paddle/fluid/operators/fused/fused_feedforward_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/operators/matmul_v2_op.h" diff --git a/paddle/fluid/operators/fused/fused_feedforward_op.cu b/paddle/fluid/operators/fused/fused_feedforward_op.cu index 2eb9885286d..675ec29da67 100644 --- a/paddle/fluid/operators/fused/fused_feedforward_op.cu +++ b/paddle/fluid/operators/fused/fused_feedforward_op.cu @@ -14,11 +14,10 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" -#include "paddle/fluid/operators/matmul_v2_op.h" -#include "paddle/phi/kernels/funcs/blas/blas.h" - #include "paddle/fluid/operators/fused/fused_dropout_helper.h" #include "paddle/fluid/operators/layer_norm_kernel.cu.h" +#include "paddle/fluid/operators/matmul_v2_op.h" +#include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/broadcast_function.h" #include "paddle/phi/kernels/funcs/elementwise_functor.h" @@ -387,20 +386,19 @@ class FusedFeedForwardGradKernel : public framework::OpKernel { !pre_layer_norm ? context.Input("Ln2Bias") : nullptr; auto* d_x = context.Output(framework::GradVarName("X")); - auto* d_ln1_scale = pre_layer_norm - ? context.Output( - framework::GradVarName("Ln1Scale")) - : nullptr; - auto* d_ln1_bias = pre_layer_norm - ? context.Output( - framework::GradVarName("Ln1Bias")) - : nullptr; - auto* d_ln2_scale = - pre_layer_norm ? nullptr : context.Output( - framework::GradVarName("Ln2Scale")); - auto* d_ln2_bias = - pre_layer_norm ? nullptr : context.Output( - framework::GradVarName("Ln2Bias")); + auto* d_ln1_scale = pre_layer_norm ? context.Output( + framework::GradVarName("Ln1Scale")) + : nullptr; + auto* d_ln1_bias = pre_layer_norm ? context.Output( + framework::GradVarName("Ln1Bias")) + : nullptr; + auto* d_ln2_scale = pre_layer_norm + ? nullptr + : context.Output( + framework::GradVarName("Ln2Scale")); + auto* d_ln2_bias = pre_layer_norm ? nullptr + : context.Output( + framework::GradVarName("Ln2Bias")); auto* d_linear1_weight = context.Output( framework::GradVarName("Linear1Weight")); auto* d_linear1_bias = context.Output( diff --git a/paddle/fluid/operators/fused/fused_gate_attention_op.cc b/paddle/fluid/operators/fused/fused_gate_attention_op.cc index ba9dbd82e3d..0bbeabd5fc9 100644 --- a/paddle/fluid/operators/fused/fused_gate_attention_op.cc +++ b/paddle/fluid/operators/fused/fused_gate_attention_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/fused/fused_gate_attention_op.cu b/paddle/fluid/operators/fused/fused_gate_attention_op.cu index b1badf72557..8f375a22cc0 100644 --- a/paddle/fluid/operators/fused/fused_gate_attention_op.cu +++ b/paddle/fluid/operators/fused/fused_gate_attention_op.cu @@ -374,9 +374,9 @@ class FusedGateAttentionOpKernel : public framework::OpKernel { v_transpose_out, qkv_transpose_out, softmax_out, fmha_out, &config); // 3. Gating Linear - Tensor *fmha_or_gate_out = - !has_gating ? fmha_out : ComputeGatingLinearForward(ctx, config, - query, fmha_out); + Tensor *fmha_or_gate_out = !has_gating ? fmha_out + : ComputeGatingLinearForward( + ctx, config, query, fmha_out); // 4. Output Linear ComputeOutputLinearForward(ctx, config, fmha_or_gate_out); diff --git a/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cc b/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cc index 7cb6777e5a7..978daa3be85 100644 --- a/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cc +++ b/paddle/fluid/operators/fused/fused_gemm_epilogue_op.cc @@ -14,6 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fused/fused_gemm_epilogue_op.h" + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" @@ -369,8 +370,9 @@ class FusedGemmEpilogueOpGradMaker : public framework::SingleGradOpMaker { protected: void Apply(GradOpPtr op) const override { const auto& act_type = this->template Attr("activation"); - PADDLE_ENFORCE_EQ(act_type, "none", phi::errors::InvalidArgument( - "The activation should be none.")); + PADDLE_ENFORCE_EQ( + act_type, "none", + phi::errors::InvalidArgument("The activation should be none.")); op->SetType(this->ForwardOpType() + "_grad"); op->SetInput("X", this->Input("X")); diff --git a/paddle/fluid/operators/fused/fused_gemm_epilogue_op.h b/paddle/fluid/operators/fused/fused_gemm_epilogue_op.h index 8ff41b2c961..b00bdfe5660 100644 --- a/paddle/fluid/operators/fused/fused_gemm_epilogue_op.h +++ b/paddle/fluid/operators/fused/fused_gemm_epilogue_op.h @@ -16,9 +16,11 @@ limitations under the License. */ #pragma once #include + #include #include #include + #include "gflags/gflags.h" #include "paddle/fluid/platform/dynload/cublasLt.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h b/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h index 9d7d34ebdc9..f72f73438c0 100644 --- a/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h +++ b/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h @@ -441,11 +441,10 @@ void LaunchLayernormResidualDropoutBias( // call layernorm forward switch (GetDesiredBlockDim(cols)) { FIXED_BLOCK_DIM_CASE( - LayerNormForward< - T, U, kBlockDim, - ScaleBiasWithSameTypeX><<>>( - dst, scale, layernorm_bias, layernorm_dst, mean, var, epsilon, - cols)); + LayerNormForward + <<>>(dst, scale, layernorm_bias, + layernorm_dst, mean, var, + epsilon, cols)); default: PADDLE_THROW(platform::errors::InvalidArgument( "Product from begin_norm_axis to end must be larger than 1")); @@ -468,11 +467,11 @@ void LaunchLayernormResidualDropoutBias( static_cast(std::ceil(rows / static_cast(ROWS_PER_CTA))); \ fused_fast_ln_fwd_kernel< \ T, U, LayerNormScaleBiasT, uint8_t, \ - VecSize, WARPS_M, WARPS_N, BYTES_PER_LDG, \ - cols><<>>( \ - rows, cols, seed, dropout_prob, is_upscale_in_train, is_test, \ - increment, epsilon, src, residual, bias, scale, layernorm_bias, \ - mask_data, mean, var, dst, layernorm_dst); \ + VecSize, WARPS_M, WARPS_N, BYTES_PER_LDG, cols> \ + <<>>( \ + rows, cols, seed, dropout_prob, is_upscale_in_train, is_test, \ + increment, epsilon, src, residual, bias, scale, layernorm_bias, \ + mask_data, mean, var, dst, layernorm_dst); \ } break #define LAUNCH_FUSED_FAST_LN_KERNEL \ @@ -494,12 +493,11 @@ void LaunchLayernormResidualDropoutBias( const int VecSize = MAX_CACHE_BYTES / sizeof(T); if (cols % VecSize != 0) { int blockDim = GetDesiredBlockDim(cols); - FusedLayernormResidualDropoutBias< - T, uint8_t, 1, U, - ScaleBiasWithSameTypeX><<>>( - rows, cols, seed, dropout_prob, is_upscale_in_train, is_test, increment, - epsilon, src, residual, bias, scale, layernorm_bias, mask_data, dst, - layernorm_dst, mean, var); + FusedLayernormResidualDropoutBias + <<>>( + rows, cols, seed, dropout_prob, is_upscale_in_train, is_test, + increment, epsilon, src, residual, bias, scale, layernorm_bias, + mask_data, dst, layernorm_dst, mean, var); } else { if (can_call_fast_ln_kernel) { switch (cols) { @@ -512,12 +510,12 @@ void LaunchLayernormResidualDropoutBias( } } else { int blockDim = GetDesiredBlockDim(cols / VecSize); - FusedLayernormResidualDropoutBias< - T, uint8_t, VecSize, U, - ScaleBiasWithSameTypeX><<>>( - rows, cols, seed, dropout_prob, is_upscale_in_train, is_test, - increment, epsilon, src, residual, bias, scale, layernorm_bias, - mask_data, dst, layernorm_dst, mean, var); + FusedLayernormResidualDropoutBias + <<>>( + rows, cols, seed, dropout_prob, is_upscale_in_train, is_test, + increment, epsilon, src, residual, bias, scale, layernorm_bias, + mask_data, dst, layernorm_dst, mean, var); } } } diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cc b/paddle/fluid/operators/fused/fused_multi_transformer_op.cc index 98602e4edd0..63627db49d6 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cc +++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { @@ -62,12 +63,13 @@ class FusedMultiTransformerOp : public framework::OperatorWithKernel { // y: qkv's weight: [3, num_head, dim_head, dim_embed] auto x_dim = ctx->GetInputDim("X"); auto y_dim = ctx->GetInputsDim("QKVW")[0]; - PADDLE_ENFORCE_EQ(x_dim.size(), 3, platform::errors::InvalidArgument( - "The dimensions of x must be 3" - "(batch_size, seq_len, dim_embed)," - "but received dimensions of" - "Input is [%d]", - x_dim.size())); + PADDLE_ENFORCE_EQ( + x_dim.size(), 3, + platform::errors::InvalidArgument("The dimensions of x must be 3" + "(batch_size, seq_len, dim_embed)," + "but received dimensions of" + "Input is [%d]", + x_dim.size())); PADDLE_ENFORCE_EQ(y_dim.size(), 4, platform::errors::InvalidArgument( "The dimensions of qkv_weight must be 4" diff --git a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu index c13c287f4af..01c5b79fff1 100644 --- a/paddle/fluid/operators/fused/fused_multi_transformer_op.cu +++ b/paddle/fluid/operators/fused/fused_multi_transformer_op.cu @@ -18,18 +18,18 @@ limitations under the License. */ #include #include + #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/platform/device/gpu/gpu_device_function.h" -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" - -#include "paddle/phi/kernels/funcs/math_function.h" - #include "paddle/fluid/operators/fused/attention_layer_norm.h" #include "paddle/fluid/operators/fused/attn_gemm.h" #include "paddle/fluid/operators/fused/fmha_ref.h" #include "paddle/fluid/operators/fused/fused_dropout_helper.h" +#include "paddle/fluid/platform/device/gpu/gpu_device_function.h" +#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" +#include "paddle/phi/kernels/funcs/math_function.h" #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #include "paddle/fluid/platform/collective_helper.h" @@ -861,10 +861,9 @@ inline size_t smem_size_in_bytes( size_t smem_sz = smem_size_in_bytes(params, Dh, THDS_PER_VALUE, \ THDS_PER_BLOCK, pad_active_groups); \ dim3 grid(params.num_head, params.batch_size); \ - masked_multihead_attention_kernel< \ - T, Dh, Dh_MAX, THDS_PER_KEY, THDS_PER_VALUE, \ - THDS_PER_BLOCK><<>>( \ - params, pad_active_groups) + masked_multihead_attention_kernel \ + <<>>(params, pad_active_groups) template void fmha_launch_kernel(const Masked_multihead_attention_params ¶ms, diff --git a/paddle/fluid/operators/fused/fused_residual_dropout_bias.h b/paddle/fluid/operators/fused/fused_residual_dropout_bias.h index 1d3085a013f..0cc31e6fc32 100644 --- a/paddle/fluid/operators/fused/fused_residual_dropout_bias.h +++ b/paddle/fluid/operators/fused/fused_residual_dropout_bias.h @@ -153,16 +153,15 @@ void LaunchResidualDropoutBias(const uint32_t rows, const uint32_t cols, const int real_vec_size = cols % VecSize == 0 ? VecSize : 1; auto config = Get1DBlocksAnd2DGrids(ctx, rows, cols, real_vec_size); if (cols % VecSize == 0) { - FusedResidualDropoutBias<<< - config.block_per_grid, config.thread_per_block, 0, ctx.stream()>>>( - rows, cols, seed, dropout_prob, is_upscale_in_train, src, residual, - bias, mask_data, dst, increment, is_test); + FusedResidualDropoutBias + <<>>( + rows, cols, seed, dropout_prob, is_upscale_in_train, src, residual, + bias, mask_data, dst, increment, is_test); } else { - FusedResidualDropoutBias< - T, uint8_t, - 1><<>>( - rows, cols, seed, dropout_prob, is_upscale_in_train, src, residual, - bias, mask_data, dst, increment, is_test); + FusedResidualDropoutBias + <<>>( + rows, cols, seed, dropout_prob, is_upscale_in_train, src, residual, + bias, mask_data, dst, increment, is_test); } } @@ -263,27 +262,26 @@ void LaunchResidualDropoutBiasGrad(const T *dout, const MaskType *mask, dim3 block_dim(threads, 128, 1); dim3 grid_dim(blocks, 1, 1); if (cols % VecSize == 0) { - FusedResidualDropoutBiasGrad< - T, MaskType, 8, 128, - VecSize><<>>( - dout, mask, factor, rows, cols, dx, dbias); + FusedResidualDropoutBiasGrad + <<>>(dout, mask, factor, rows, + cols, dx, dbias); } else { - FusedResidualDropoutBiasGrad<<>>( - dout, mask, factor, rows, cols, dx, dbias); + FusedResidualDropoutBiasGrad + <<>>(dout, mask, factor, rows, + cols, dx, dbias); } } else { const uint64_t n = rows * cols; platform::GpuLaunchConfig config = platform::GetGpuLaunchConfig1D(ctx, n / real_vec_size); if (n % VecSize == 0) { - FusedResidualDropoutGrad<<< - config.block_per_grid, config.thread_per_block, 0, ctx.stream()>>>( - dout, mask, factor, n, dx); + FusedResidualDropoutGrad + <<>>( + dout, mask, factor, n, dx); } else { - FusedResidualDropoutGrad<<< - config.block_per_grid, config.thread_per_block, 0, ctx.stream()>>>( - dout, mask, factor, n, dx); + FusedResidualDropoutGrad + <<>>( + dout, mask, factor, n, dx); } } } diff --git a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc index 23b82ac5d96..e316f58b3f7 100644 --- a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc +++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fused/fused_seqpool_cvm_op.h" + #include namespace paddle { namespace operators { @@ -34,9 +35,10 @@ class FusedSeqpoolCVMOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( cvm_dims.size(), 2UL, platform::errors::InvalidArgument("Input(CVM)'s rank should be 2.")); - PADDLE_ENFORCE_EQ(cvm_dims[1], 2UL, platform::errors::InvalidArgument( - "The 2nd dimension of " - "Input(CVM) should be 2.")); + PADDLE_ENFORCE_EQ( + cvm_dims[1], 2UL, + platform::errors::InvalidArgument("The 2nd dimension of " + "Input(CVM) should be 2.")); auto ins_dims = ctx->GetInputsDim("X"); const int cvm_offset = ctx->Attrs().Get("cvm_offset"); diff --git a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu index 3770a536a8f..2b6b7d49345 100644 --- a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu +++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.cu @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/operators/fused/fused_seqpool_cvm_op.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" diff --git a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.h b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.h index 6042772adb0..e3bc424f259 100644 --- a/paddle/fluid/operators/fused/fused_seqpool_cvm_op.h +++ b/paddle/fluid/operators/fused/fused_seqpool_cvm_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" diff --git a/paddle/fluid/operators/fused/fused_softmax_mask.cu.h b/paddle/fluid/operators/fused/fused_softmax_mask.cu.h index 11f1011dec3..4c00f778ced 100644 --- a/paddle/fluid/operators/fused/fused_softmax_mask.cu.h +++ b/paddle/fluid/operators/fused/fused_softmax_mask.cu.h @@ -114,10 +114,9 @@ __global__ void FusedSoftmaxMaskVecKernel(T* dst, const T* src, const T* mask, } } -#define SOFTMAX_MASK_KERNEL(VEC_SIZE, ELEMENTS) \ - FusedSoftmaxMaskVecKernel<<>>( \ - dst, src, mask, seq_len) +#define SOFTMAX_MASK_KERNEL(VEC_SIZE, ELEMENTS) \ + FusedSoftmaxMaskVecKernel \ + <<>>(dst, src, mask, seq_len) // FIXME(wangxi): It is found that the performance of VEC_SIZE=2 is better // than that of =4 and =8. Further analysis of the kernel is needed later. diff --git a/paddle/fluid/operators/fused/fused_transformer_op.cc b/paddle/fluid/operators/fused/fused_transformer_op.cc index 9e5fc42fc76..d11171eb2d0 100644 --- a/paddle/fluid/operators/fused/fused_transformer_op.cc +++ b/paddle/fluid/operators/fused/fused_transformer_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fused/fused_transformer_op.h" + #include namespace paddle { @@ -157,5 +158,5 @@ void FusedMHA::ComputeForward(T* output, T* softmax_mask) {} template void FusedMHA::ComputeBackward(const T* grad_output, T* softmax_mask, T* grad_x) {} -} -} \ No newline at end of file +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/fused/fused_transformer_op.h b/paddle/fluid/operators/fused/fused_transformer_op.h index 2d2d390d243..a2d5862abf0 100644 --- a/paddle/fluid/operators/fused/fused_transformer_op.h +++ b/paddle/fluid/operators/fused/fused_transformer_op.h @@ -151,5 +151,5 @@ class FusedTransformerEncoderLayer { std::string act_method; }; -} -} +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/fused/fusion_conv_inception_op.cc b/paddle/fluid/operators/fused/fusion_conv_inception_op.cc index eeeb004003c..802cd18e1db 100644 --- a/paddle/fluid/operators/fused/fusion_conv_inception_op.cc +++ b/paddle/fluid/operators/fused/fusion_conv_inception_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" @@ -35,8 +36,9 @@ class ConvInceptionFusionOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( in_dims.size(), 4, platform::errors::InvalidArgument("Conv intput should be 4-D tensor.")); - PADDLE_ENFORCE_EQ(w_dims.size(), 4, platform::errors::InvalidArgument( - "There should be 4 filters.")); + PADDLE_ENFORCE_EQ( + w_dims.size(), 4, + platform::errors::InvalidArgument("There should be 4 filters.")); PADDLE_ENFORCE_EQ(w_dims[0][1], in_dims[1], platform::errors::InvalidArgument( "Invalid fileter channel number %d, which should be " diff --git a/paddle/fluid/operators/fused/fusion_group_op.cu.cc b/paddle/fluid/operators/fused/fusion_group_op.cu.cc index 94949f56331..c592bbe7d3e 100644 --- a/paddle/fluid/operators/fused/fusion_group_op.cu.cc +++ b/paddle/fluid/operators/fused/fusion_group_op.cu.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fused/fusion_group_op.h" + #include "paddle/fluid/platform/float16.h" namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/fused/fusion_group_op.h b/paddle/fluid/operators/fused/fusion_group_op.h index 5e5f2c60ffb..f71355b85d9 100644 --- a/paddle/fluid/operators/fused/fusion_group_op.h +++ b/paddle/fluid/operators/fused/fusion_group_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device_code.h" diff --git a/paddle/fluid/operators/fused/fusion_gru_op.cc b/paddle/fluid/operators/fused/fusion_gru_op.cc index afbd5380a83..fd05155bc2c 100644 --- a/paddle/fluid/operators/fused/fusion_gru_op.cc +++ b/paddle/fluid/operators/fused/fusion_gru_op.cc @@ -13,9 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fused/fusion_gru_op.h" + #include // for memcpy #include #include + #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/operators/jit/kernels.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/fluid/operators/fused/fusion_lstm_op.cc b/paddle/fluid/operators/fused/fusion_lstm_op.cc index 3dada660aef..f2e6f099b4b 100644 --- a/paddle/fluid/operators/fused/fusion_lstm_op.cc +++ b/paddle/fluid/operators/fused/fusion_lstm_op.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fused/fusion_lstm_op.h" + #include + #include "paddle/fluid/operators/jit/kernels.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/fc_functor.h" diff --git a/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc b/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc index bed5125b995..c9d6d42efac 100644 --- a/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc +++ b/paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc @@ -13,8 +13,10 @@ * limitations under the License. */ #include "paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.h" + #include #include + #include "paddle/fluid/operators/jit/kernels.h" namespace paddle { @@ -24,10 +26,11 @@ void FusionRepeatedFCReluOp::InferShape( framework::InferShapeContext* ctx) const { OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "FusionRepeatedFCRelu"); auto sz = ctx->Inputs("W").size(); - PADDLE_ENFORCE_GT(sz, 1UL, platform::errors::InvalidArgument( - "Inputs(W) of FusionRepeatedFCReluOp should " - "be greater than 1, but received value is %d.", - sz)); + PADDLE_ENFORCE_GT(sz, 1UL, + platform::errors::InvalidArgument( + "Inputs(W) of FusionRepeatedFCReluOp should " + "be greater than 1, but received value is %d.", + sz)); PADDLE_ENFORCE_EQ( ctx->Inputs("Bias").size(), sz, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc index ee28a548056..b99b53de9c4 100644 --- a/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc +++ b/paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.h" + #include // for min, max #include + #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/fc_functor.h" diff --git a/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc b/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc index 58613173ad2..7341d1f864d 100644 --- a/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc +++ b/paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.h" + #include + #include "paddle/fluid/platform/cpu_info.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/cpu_vec.h" @@ -48,8 +50,9 @@ void FusionSeqExpandConcatFCOp::InferShape( for (size_t i = 1; i < ins_dims.size(); ++i) { sum += ins_dims[i][1]; } - PADDLE_ENFORCE_EQ(sum, w_dims[0], platform::errors::InvalidArgument( - "FC height should be sum of all inputs " + PADDLE_ENFORCE_EQ( + sum, w_dims[0], + platform::errors::InvalidArgument("FC height should be sum of all inputs " "width, but received FC height is: %d, " "sum of all inputs width is: %d.", w_dims[0], sum)); diff --git a/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc b/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc index e574d67e398..1d487ef3dab 100644 --- a/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc +++ b/paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc @@ -13,8 +13,10 @@ * limitations under the License. */ #include "paddle/fluid/operators/fused/fusion_seqpool_concat_op.h" + #include #include + #include "paddle/fluid/operators/jit/kernels.h" namespace paddle { @@ -29,17 +31,19 @@ void FusionSeqPoolConcatOp::InferShape( ctx->Inputs("X").size())); OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "FusionSeqPoolConcat"); int axis = ctx->Attrs().Get("axis"); - PADDLE_ENFORCE_EQ(axis, 1, platform::errors::InvalidArgument( - "FusionSeqPoolConcatOp only supports concat " - "axis=1 yet, but received axis value is %d", - axis)); + PADDLE_ENFORCE_EQ(axis, 1, + platform::errors::InvalidArgument( + "FusionSeqPoolConcatOp only supports concat " + "axis=1 yet, but received axis value is %d", + axis)); auto ins_dims = ctx->GetInputsDim("X"); const size_t n = ins_dims.size(); - PADDLE_ENFORCE_GT(n, 0UL, platform::errors::InvalidArgument( - "Input tensors count should be greater than 0, " - "but received value is %d.", - n)); + PADDLE_ENFORCE_GT(n, 0UL, + platform::errors::InvalidArgument( + "Input tensors count should be greater than 0, " + "but received value is %d.", + n)); if (n == 1) { LOG(WARNING) << "Only have one input, may waste memory"; } diff --git a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc index c74cc504840..d29bc00b545 100644 --- a/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc +++ b/paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.cc @@ -13,8 +13,10 @@ * limitations under the License. */ #include "paddle/fluid/operators/fused/fusion_seqpool_cvm_concat_op.h" + #include #include + #include "paddle/fluid/operators/jit/kernels.h" namespace paddle { @@ -31,20 +33,23 @@ void FusionSeqPoolCVMConcatOp::InferShape( paddle::platform::errors::InvalidArgument( "Output(Out) of FusionSeqPoolCVMConcatOp should not be null.")); int axis = ctx->Attrs().Get("axis"); - PADDLE_ENFORCE_EQ(axis, 1, paddle::platform::errors::InvalidArgument( - "FusionSeqPoolCVMConcatOp only supports " - "concat axis=1 yet, but received %d.", - axis)); + PADDLE_ENFORCE_EQ(axis, 1, + paddle::platform::errors::InvalidArgument( + "FusionSeqPoolCVMConcatOp only supports " + "concat axis=1 yet, but received %d.", + axis)); bool use_cvm = ctx->Attrs().Get("use_cvm"); - PADDLE_ENFORCE_EQ(use_cvm, true, paddle::platform::errors::InvalidArgument( - "FusionSeqPoolCVMConcatOp only supports " - "use_cvm is true yet, but received %d.", - use_cvm)); + PADDLE_ENFORCE_EQ(use_cvm, true, + paddle::platform::errors::InvalidArgument( + "FusionSeqPoolCVMConcatOp only supports " + "use_cvm is true yet, but received %d.", + use_cvm)); auto ins_dims = ctx->GetInputsDim("X"); const size_t n = ins_dims.size(); - PADDLE_ENFORCE_GT(n, 0UL, paddle::platform::errors::InvalidArgument( - "Input tensors count should > 0.")); + PADDLE_ENFORCE_GT(n, 0UL, + paddle::platform::errors::InvalidArgument( + "Input tensors count should > 0.")); if (n == 1) { LOG(WARNING) << "Only have one input, may waste memory"; } diff --git a/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc b/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc index 870f72b8c7f..047fefc1eeb 100644 --- a/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc +++ b/paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc @@ -13,8 +13,10 @@ * limitations under the License. */ #include "paddle/fluid/operators/fused/fusion_squared_mat_sub_op.h" + #include #include + #include "paddle/fluid/operators/jit/kernels.h" namespace paddle { diff --git a/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cc b/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cc index 954cd7cc7a4..bf8e9818e54 100644 --- a/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cc +++ b/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.h" + #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cu.cc b/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cu.cc index 786f5b4e077..eb29859d8d1 100644 --- a/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cu.cc +++ b/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.cu.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.h" + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.h b/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.h index 66e6c00da2d..52140c0ca46 100644 --- a/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.h +++ b/paddle/fluid/operators/fused/fusion_transpose_flatten_concat_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/phi/core/ddim.h" namespace paddle { diff --git a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc index 0ffc4c91b85..c9956dcdd20 100644 --- a/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc +++ b/paddle/fluid/operators/fused/mkldnn/multi_gru_mkldnn_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include + #include "dnnl.hpp" #include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/operator.h" @@ -31,8 +32,8 @@ using paddle::platform::CPUDeviceContext; using paddle::platform::CreateKey; using paddle::platform::MKLDNNGetDataType; using paddle::platform::MKLDNNMemDesc; -using platform::to_void_cast; using phi::vectorize; +using platform::to_void_cast; using Direction = dnnl::rnn_direction; namespace { diff --git a/paddle/fluid/operators/fused/multi_gru_op.cc b/paddle/fluid/operators/fused/multi_gru_op.cc index e7d697767fc..ad0cc0bd1cf 100644 --- a/paddle/fluid/operators/fused/multi_gru_op.cc +++ b/paddle/fluid/operators/fused/multi_gru_op.cc @@ -17,6 +17,7 @@ limitations under the License. */ #include // for memcpy #include #include + #include "paddle/fluid/operators/jit/kernels.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/fc_functor.h" diff --git a/paddle/fluid/operators/fused/multi_gru_op.h b/paddle/fluid/operators/fused/multi_gru_op.h index ebd3faf44a8..8b064c8754f 100644 --- a/paddle/fluid/operators/fused/multi_gru_op.h +++ b/paddle/fluid/operators/fused/multi_gru_op.h @@ -19,9 +19,9 @@ limitations under the License. */ namespace paddle { namespace operators { +using framework::ExecutionContext; using framework::LoDTensor; using framework::Tensor; -using framework::ExecutionContext; class MultiGRUOp : public framework::OperatorWithKernel { public: diff --git a/paddle/fluid/operators/fused/multihead_matmul_op.cc b/paddle/fluid/operators/fused/multihead_matmul_op.cc index 8f2c04d5afe..79b886c3729 100644 --- a/paddle/fluid/operators/fused/multihead_matmul_op.cc +++ b/paddle/fluid/operators/fused/multihead_matmul_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/errors.h" diff --git a/paddle/fluid/operators/fused/multihead_matmul_op.cu b/paddle/fluid/operators/fused/multihead_matmul_op.cu index f0e05659c92..30155346716 100644 --- a/paddle/fluid/operators/fused/multihead_matmul_op.cu +++ b/paddle/fluid/operators/fused/multihead_matmul_op.cu @@ -13,7 +13,9 @@ // limitations under the License. #include + #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/math/bert_encoder_functor.h" @@ -105,8 +107,8 @@ void TransQKVWithBias(const int batch, const int seq_len, const int head_size, platform::errors::InvalidArgument( "head_num (%d) * head_size (%d) should <= %d", head_num, head_size, 1024 * 4)); - TransposeQkvKernel<<>>(h, input4, bias4, - output4); + TransposeQkvKernel + <<>>(h, input4, bias4, output4); } else if (head_size % 2 == 0 && scratch_size % 2 == 0) { const int h = head_size / 2; const float2 *input2 = reinterpret_cast(input); @@ -118,8 +120,8 @@ void TransQKVWithBias(const int batch, const int seq_len, const int head_size, platform::errors::InvalidArgument( "head_num (%d) * head_size (%d) should <= %d", head_num, head_size, 1024 * 2)); - TransposeQkvKernel<<>>(h, input2, bias2, - output2); + TransposeQkvKernel + <<>>(h, input2, bias2, output2); } else { const dim3 block(head_size, head_num, 1); // limit head_size * head_num to max block size(1024). @@ -127,8 +129,8 @@ void TransQKVWithBias(const int batch, const int seq_len, const int head_size, platform::errors::InvalidArgument( "head_num (%d) * head_size (%d) should <= %d", head_num, head_size, 1024)); - TransposeQkvKernel<<>>(head_size, input, - bias, output); + TransposeQkvKernel + <<>>(head_size, input, bias, output); } } diff --git a/paddle/fluid/operators/fused/resnet_unit_op.cc b/paddle/fluid/operators/fused/resnet_unit_op.cc index 6f4246aadd9..d5860fe9cf1 100644 --- a/paddle/fluid/operators/fused/resnet_unit_op.cc +++ b/paddle/fluid/operators/fused/resnet_unit_op.cc @@ -115,13 +115,14 @@ class ResNetUnitOp : public framework::OperatorWithKernel { bn_param_shape = {1, 1, 1, bn_param_shape[0]}; } framework::DDim bn_param_dims = phi::make_ddim(bn_param_shape); - PADDLE_ENFORCE_EQ(x_dims.size(), 4, platform::errors::InvalidArgument( - "The dimensions of input " - "must equal to 4." - "But received: the shape of input " - "= [%s], the dimension of input = " - "[%d]", - x_dims, x_dims.size())); + PADDLE_ENFORCE_EQ( + x_dims.size(), 4, + platform::errors::InvalidArgument("The dimensions of input " + "must equal to 4." + "But received: the shape of input " + "= [%s], the dimension of input = " + "[%d]", + x_dims, x_dims.size())); PADDLE_ENFORCE_EQ(w_dims.size(), 4, platform::errors::InvalidArgument( "The dimensions of filter " @@ -180,14 +181,16 @@ class ResNetUnitOp : public framework::OperatorWithKernel { // and var tensors should be float when input tensor's dtype is float16. auto bn_param_type = framework::proto::VarType::FP32; - PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( - ctx.Input("ScaleX")->dtype()), - platform::errors::InvalidArgument( - "Scale input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( - ctx.Input("BiasX")->dtype()), - platform::errors::InvalidArgument( - "Bias input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType(ctx.Input("ScaleX")->dtype()), + platform::errors::InvalidArgument( + "Scale input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType(ctx.Input("BiasX")->dtype()), + platform::errors::InvalidArgument( + "Bias input should be of float type")); framework::LibraryType library = framework::LibraryType::kPlain; framework::DataLayout layout = framework::DataLayout::kAnyLayout; return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout, diff --git a/paddle/fluid/operators/fused/skip_layernorm_op.cc b/paddle/fluid/operators/fused/skip_layernorm_op.cc index 442f359c0da..6ac6f51e4ce 100644 --- a/paddle/fluid/operators/fused/skip_layernorm_op.cc +++ b/paddle/fluid/operators/fused/skip_layernorm_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/errors.h" diff --git a/paddle/fluid/operators/fused/skip_layernorm_op.cu b/paddle/fluid/operators/fused/skip_layernorm_op.cu index e755ea33755..66a164ff31b 100644 --- a/paddle/fluid/operators/fused/skip_layernorm_op.cu +++ b/paddle/fluid/operators/fused/skip_layernorm_op.cu @@ -13,7 +13,9 @@ // limitations under the License. #include + #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/math/bert_encoder_functor.h" diff --git a/paddle/fluid/operators/fused_softmax_mask_op.cc b/paddle/fluid/operators/fused_softmax_mask_op.cc index a4138002833..a33070d94b9 100644 --- a/paddle/fluid/operators/fused_softmax_mask_op.cc +++ b/paddle/fluid/operators/fused_softmax_mask_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fused_softmax_mask_op.h" + #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/fused_softmax_mask_op.cu b/paddle/fluid/operators/fused_softmax_mask_op.cu index c4ab4de8a64..b68a6907d7a 100644 --- a/paddle/fluid/operators/fused_softmax_mask_op.cu +++ b/paddle/fluid/operators/fused_softmax_mask_op.cu @@ -40,6 +40,7 @@ limitations under the License. */ #include #include #include + #include #include diff --git a/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc b/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc index c737ba361e0..eefca7b6ab5 100644 --- a/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc +++ b/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cc @@ -11,6 +11,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.h" + #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cu b/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cu index d4c5b887705..4ee90eb3184 100644 --- a/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cu +++ b/paddle/fluid/operators/fused_softmax_mask_upper_triangle_op.cu @@ -39,6 +39,7 @@ limitations under the License. */ #include #include #include + #include #include @@ -395,49 +396,49 @@ class SoftmaxMaskFuseUpperTriangleKernel : public framework::OpKernel { switch (pow2_index) { case 5: // 32 - SoftmaxMaskFuseUpperTriangleGPUKernel< - T, 5><<>>(x_data, y_data, batch_count, - key_seq_len); + SoftmaxMaskFuseUpperTriangleGPUKernel + <<>>(x_data, y_data, batch_count, + key_seq_len); break; case 6: // 64 - SoftmaxMaskFuseUpperTriangleGPUKernel< - T, 6><<>>(x_data, y_data, batch_count, - key_seq_len); + SoftmaxMaskFuseUpperTriangleGPUKernel + <<>>(x_data, y_data, batch_count, + key_seq_len); break; case 7: // 128 - SoftmaxMaskFuseUpperTriangleGPUKernel< - T, 7><<>>(x_data, y_data, batch_count, - key_seq_len); + SoftmaxMaskFuseUpperTriangleGPUKernel + <<>>(x_data, y_data, batch_count, + key_seq_len); break; case 8: // 256 - SoftmaxMaskFuseUpperTriangleGPUKernel< - T, 8><<>>(x_data, y_data, batch_count, - key_seq_len); + SoftmaxMaskFuseUpperTriangleGPUKernel + <<>>(x_data, y_data, batch_count, + key_seq_len); break; case 9: // 512 - SoftmaxMaskFuseUpperTriangleGPUKernel< - T, 9><<>>(x_data, y_data, batch_count, - key_seq_len); + SoftmaxMaskFuseUpperTriangleGPUKernel + <<>>(x_data, y_data, batch_count, + key_seq_len); break; case 10: // 1024 - SoftmaxMaskFuseUpperTriangleGPUKernel< - T, 10><<>>(x_data, y_data, batch_count, - key_seq_len); + SoftmaxMaskFuseUpperTriangleGPUKernel + <<>>(x_data, y_data, batch_count, + key_seq_len); break; case 11: // 2048 - SoftmaxMaskFuseUpperTriangleGPUKernel< - T, 11><<>>(x_data, y_data, batch_count, - key_seq_len); + SoftmaxMaskFuseUpperTriangleGPUKernel + <<>>(x_data, y_data, batch_count, + key_seq_len); break; case 12: // 4096 - SoftmaxMaskFuseUpperTriangleGPUKernel< - T, 12><<>>(x_data, y_data, batch_count, - key_seq_len); + SoftmaxMaskFuseUpperTriangleGPUKernel + <<>>(x_data, y_data, batch_count, + key_seq_len); break; case 13: // 8192 - SoftmaxMaskFuseUpperTriangleGPUKernel< - T, 13><<>>(x_data, y_data, batch_count, - key_seq_len); + SoftmaxMaskFuseUpperTriangleGPUKernel + <<>>(x_data, y_data, batch_count, + key_seq_len); break; default: break; @@ -483,58 +484,58 @@ class SoftmaxMaskFuseUpperTriangleGradKernel : public framework::OpKernel { switch (pow2_index) { case 5: // 32 - SoftmaxMaskFuseUpperTriangleGradGPUKernel< - T, 5><<>>(grad_y_data, grad_x_data, - softmax_rst_data, batch_count, - key_seq_len); + SoftmaxMaskFuseUpperTriangleGradGPUKernel + <<>>(grad_y_data, grad_x_data, + softmax_rst_data, batch_count, + key_seq_len); break; case 6: // 64 - SoftmaxMaskFuseUpperTriangleGradGPUKernel< - T, 6><<>>(grad_y_data, grad_x_data, - softmax_rst_data, batch_count, - key_seq_len); + SoftmaxMaskFuseUpperTriangleGradGPUKernel + <<>>(grad_y_data, grad_x_data, + softmax_rst_data, batch_count, + key_seq_len); break; case 7: // 128 - SoftmaxMaskFuseUpperTriangleGradGPUKernel< - T, 7><<>>(grad_y_data, grad_x_data, - softmax_rst_data, batch_count, - key_seq_len); + SoftmaxMaskFuseUpperTriangleGradGPUKernel + <<>>(grad_y_data, grad_x_data, + softmax_rst_data, batch_count, + key_seq_len); break; case 8: // 256 - SoftmaxMaskFuseUpperTriangleGradGPUKernel< - T, 8><<>>(grad_y_data, grad_x_data, - softmax_rst_data, batch_count, - key_seq_len); + SoftmaxMaskFuseUpperTriangleGradGPUKernel + <<>>(grad_y_data, grad_x_data, + softmax_rst_data, batch_count, + key_seq_len); break; case 9: // 512 - SoftmaxMaskFuseUpperTriangleGradGPUKernel< - T, 9><<>>(grad_y_data, grad_x_data, - softmax_rst_data, batch_count, - key_seq_len); + SoftmaxMaskFuseUpperTriangleGradGPUKernel + <<>>(grad_y_data, grad_x_data, + softmax_rst_data, batch_count, + key_seq_len); break; case 10: // 1024 - SoftmaxMaskFuseUpperTriangleGradGPUKernel< - T, 10><<>>(grad_y_data, grad_x_data, - softmax_rst_data, - batch_count, key_seq_len); + SoftmaxMaskFuseUpperTriangleGradGPUKernel + <<>>(grad_y_data, grad_x_data, + softmax_rst_data, batch_count, + key_seq_len); break; case 11: // 2048 - SoftmaxMaskFuseUpperTriangleGradGPUKernel< - T, 11><<>>(grad_y_data, grad_x_data, - softmax_rst_data, - batch_count, key_seq_len); + SoftmaxMaskFuseUpperTriangleGradGPUKernel + <<>>(grad_y_data, grad_x_data, + softmax_rst_data, batch_count, + key_seq_len); break; case 12: // 4096 - SoftmaxMaskFuseUpperTriangleGradGPUKernel< - T, 12><<>>(grad_y_data, grad_x_data, - softmax_rst_data, - batch_count, key_seq_len); + SoftmaxMaskFuseUpperTriangleGradGPUKernel + <<>>(grad_y_data, grad_x_data, + softmax_rst_data, batch_count, + key_seq_len); break; case 13: // 8192 - SoftmaxMaskFuseUpperTriangleGradGPUKernel< - T, 13><<>>(grad_y_data, grad_x_data, - softmax_rst_data, - batch_count, key_seq_len); + SoftmaxMaskFuseUpperTriangleGradGPUKernel + <<>>(grad_y_data, grad_x_data, + softmax_rst_data, batch_count, + key_seq_len); break; default: break; diff --git a/paddle/fluid/operators/gather_op.cc b/paddle/fluid/operators/gather_op.cc index 9f2b48a24b4..d44dd324d6c 100644 --- a/paddle/fluid/operators/gather_op.cc +++ b/paddle/fluid/operators/gather_op.cc @@ -153,7 +153,7 @@ REGISTER_OPERATOR(gather_grad, ops::GatherGradOp, ops::GatherGradNoNeedBufferVarInferer, GatherGradInferShapeFunctor); -REGISTER_OP_VERSION(gather) - .AddCheckpoint(R"ROC(upgrad gather, add a new input [Axis])ROC", - paddle::framework::compatible::OpVersionDesc().NewInput( - "Axis", "Specify the axis of gather operation.")); +REGISTER_OP_VERSION(gather).AddCheckpoint( + R"ROC(upgrad gather, add a new input [Axis])ROC", + paddle::framework::compatible::OpVersionDesc().NewInput( + "Axis", "Specify the axis of gather operation.")); diff --git a/paddle/fluid/operators/gather_op_xpu.cc b/paddle/fluid/operators/gather_op_xpu.cc index 9dd8f58d242..327eec2a6ca 100644 --- a/paddle/fluid/operators/gather_op_xpu.cc +++ b/paddle/fluid/operators/gather_op_xpu.cc @@ -168,10 +168,11 @@ class GatherGradOpXPUKernel : public framework::OpKernel { r = xpu::cast_v2(dev_ctx.x_context(), index->data(), index_int_ptr_l3, index->numel()); - PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, platform::errors::External( - "XPU API(cast_v2) return wrong " - "value[%d %s]", - r, XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU API(cast_v2) return wrong " + "value[%d %s]", + r, XPUAPIErrorMsg[r])); r = xpu::gather_grad( dev_ctx.x_context(), diff --git a/paddle/fluid/operators/gather_scatter_kernel.cu b/paddle/fluid/operators/gather_scatter_kernel.cu index f97eb3d5e9d..6c4a7a01f3f 100644 --- a/paddle/fluid/operators/gather_scatter_kernel.cu +++ b/paddle/fluid/operators/gather_scatter_kernel.cu @@ -132,10 +132,11 @@ struct gpu_gather_scatter_functor { int64_t grid = (n + block - 1) / block; auto stream = reinterpret_cast(ctx).stream(); - GatherScatterGPUKernel<<>>( - self_data, dim, index_data, src_data, inner_dim_size, select_dim_size, - replaced_select_dim_size, outer_dim_size, index_size, reduce_op); + GatherScatterGPUKernel + <<>>(self_data, dim, index_data, src_data, + inner_dim_size, select_dim_size, + replaced_select_dim_size, outer_dim_size, + index_size, reduce_op); } }; // struct gpu_gather_scatter_functor diff --git a/paddle/fluid/operators/gather_test.cc b/paddle/fluid/operators/gather_test.cc index c962dd06523..676143bf011 100644 --- a/paddle/fluid/operators/gather_test.cc +++ b/paddle/fluid/operators/gather_test.cc @@ -12,11 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/phi/kernels/funcs/gather.h" + #include #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/place.h" -#include "paddle/phi/kernels/funcs/gather.h" TEST(Gather, GatherData) { paddle::framework::Tensor* src = new paddle::framework::Tensor(); diff --git a/paddle/fluid/operators/gaussian_random_op.cu b/paddle/fluid/operators/gaussian_random_op.cu index deac932d59b..1e89091b202 100644 --- a/paddle/fluid/operators/gaussian_random_op.cu +++ b/paddle/fluid/operators/gaussian_random_op.cu @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" diff --git a/paddle/fluid/operators/gaussian_random_op_xpu.cc b/paddle/fluid/operators/gaussian_random_op_xpu.cc index 5a1ac46f615..2ffc90fbd8c 100644 --- a/paddle/fluid/operators/gaussian_random_op_xpu.cc +++ b/paddle/fluid/operators/gaussian_random_op_xpu.cc @@ -15,6 +15,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include + #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/gelu_op.cc b/paddle/fluid/operators/gelu_op.cc index 3be2606bfc9..080ceaa45e3 100644 --- a/paddle/fluid/operators/gelu_op.cc +++ b/paddle/fluid/operators/gelu_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" diff --git a/paddle/fluid/operators/gelu_op_xpu.cc b/paddle/fluid/operators/gelu_op_xpu.cc index 559d2448ad9..408638f7d2c 100644 --- a/paddle/fluid/operators/gelu_op_xpu.cc +++ b/paddle/fluid/operators/gelu_op_xpu.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" diff --git a/paddle/fluid/operators/graph_khop_sampler_op.cc b/paddle/fluid/operators/graph_khop_sampler_op.cc index c83ee258406..edf7d20c6d5 100644 --- a/paddle/fluid/operators/graph_khop_sampler_op.cc +++ b/paddle/fluid/operators/graph_khop_sampler_op.cc @@ -19,10 +19,11 @@ namespace operators { void InputShapeCheck(const framework::DDim& dims, std::string tensor_name) { if (dims.size() == 2) { - PADDLE_ENFORCE_EQ(dims[1], 1, platform::errors::InvalidArgument( - "The last dim of %s should be 1 when it " - "is 2D, but we get %d", - tensor_name, dims[1])); + PADDLE_ENFORCE_EQ(dims[1], 1, + platform::errors::InvalidArgument( + "The last dim of %s should be 1 when it " + "is 2D, but we get %d", + tensor_name, dims[1])); } else { PADDLE_ENFORCE_EQ( dims.size(), 1, diff --git a/paddle/fluid/operators/graph_khop_sampler_op.cu b/paddle/fluid/operators/graph_khop_sampler_op.cu index df977b43512..a63fdc89e24 100644 --- a/paddle/fluid/operators/graph_khop_sampler_op.cu +++ b/paddle/fluid/operators/graph_khop_sampler_op.cu @@ -26,6 +26,7 @@ limitations under the License. */ #include #include #include + #include #ifdef PADDLE_WITH_HIP @@ -217,15 +218,16 @@ void SampleNeighbors(const framework::ExecutionContext& ctx, const T* src, constexpr int TILE_SIZE = BLOCK_WARPS * 16; const dim3 block(WARP_SIZE, BLOCK_WARPS); const dim3 grid((bs + TILE_SIZE - 1) / TILE_SIZE); - GraphSampleNeighborsCUDAKernel<<< - grid, block, 0, - reinterpret_cast(ctx.device_context()) - .stream()>>>( - 0, k, bs, thrust::raw_pointer_cast(inputs->data()), src, dst_count, - src_eids, thrust::raw_pointer_cast(outputs->data()), - thrust::raw_pointer_cast(outputs_eids->data()), - thrust::raw_pointer_cast(output_ptr.data()), - thrust::raw_pointer_cast(output_idxs.data()), return_eids); + GraphSampleNeighborsCUDAKernel + <<( + ctx.device_context()) + .stream()>>>( + 0, k, bs, thrust::raw_pointer_cast(inputs->data()), src, dst_count, + src_eids, thrust::raw_pointer_cast(outputs->data()), + thrust::raw_pointer_cast(outputs_eids->data()), + thrust::raw_pointer_cast(output_ptr.data()), + thrust::raw_pointer_cast(output_idxs.data()), return_eids); // 5. Get inputs = outputs - inputs: if (!is_last_layer) { @@ -264,19 +266,19 @@ void FillHashTable(const framework::ExecutionContext& ctx, const T* input, int grid_tmp = (num_input + block - 1) / block; int grid = grid_tmp < max_grid_dimx ? grid_tmp : max_grid_dimx; // 1. Insert data into keys and values. - BuildHashTable< - T><<( - ctx.device_context()) - .stream()>>>( + BuildHashTable<<( + ctx.device_context()) + .stream()>>>( input, num_input, len_hashtable, thrust::raw_pointer_cast(keys->data()), thrust::raw_pointer_cast(key_index->data())); // 2. Get item index count. thrust::device_vector item_count(num_input + 1, 0); - GetItemIndexCount< - T><<( - ctx.device_context()) - .stream()>>>( + GetItemIndexCount<<( + ctx.device_context()) + .stream()>>>( input, thrust::raw_pointer_cast(item_count.data()), num_input, len_hashtable, thrust::raw_pointer_cast(keys->data()), thrust::raw_pointer_cast(key_index->data())); @@ -287,16 +289,16 @@ void FillHashTable(const framework::ExecutionContext& ctx, const T* input, unique_items->resize(total_unique_items); // 3. Get unique items. - FillUniqueItems< - T><<( - ctx.device_context()) - .stream()>>>( - input, num_input, len_hashtable, - thrust::raw_pointer_cast(unique_items->data()), - thrust::raw_pointer_cast(item_count.data()), - thrust::raw_pointer_cast(keys->data()), - thrust::raw_pointer_cast(values->data()), - thrust::raw_pointer_cast(key_index->data())); + FillUniqueItems + <<( + ctx.device_context()) + .stream()>>>(input, num_input, len_hashtable, + thrust::raw_pointer_cast(unique_items->data()), + thrust::raw_pointer_cast(item_count.data()), + thrust::raw_pointer_cast(keys->data()), + thrust::raw_pointer_cast(values->data()), + thrust::raw_pointer_cast(key_index->data())); } template @@ -337,23 +339,23 @@ void ReindexFunc(const framework::ExecutionContext& ctx, int64_t max_grid_dimx = dev_ctx.GetCUDAMaxGridDimSize()[0]; int64_t grid_tmp = (outputs->size() + block - 1) / block; int64_t grid = grid_tmp < max_grid_dimx ? grid_tmp : max_grid_dimx; - ReindexSrcOutput< - T><<( - ctx.device_context()) - .stream()>>>( + ReindexSrcOutput<<( + ctx.device_context()) + .stream()>>>( thrust::raw_pointer_cast(outputs->data()), outputs->size(), size, thrust::raw_pointer_cast(keys.data()), thrust::raw_pointer_cast(values.data())); int grid_ = (bs + block - 1) / block; - ReindexInputNodes<<( - ctx.device_context()) - .stream()>>>( - thrust::raw_pointer_cast(orig_nodes->data()), bs, - thrust::raw_pointer_cast(reindex_nodes->data()), size, - thrust::raw_pointer_cast(keys.data()), - thrust::raw_pointer_cast(values.data())); + ReindexInputNodes + <<( + ctx.device_context()) + .stream()>>>(thrust::raw_pointer_cast(orig_nodes->data()), bs, + thrust::raw_pointer_cast(reindex_nodes->data()), size, + thrust::raw_pointer_cast(keys.data()), + thrust::raw_pointer_cast(values.data())); } template @@ -532,15 +534,16 @@ class GraphKhopSamplerOpCUDAKernel : public framework::OpKernel { const dim3 block(WARP_SIZE, BLOCK_WARPS); const dim3 grid((unique_dst_size + TILE_SIZE - 1) / TILE_SIZE); - GetDstEdgeCUDAKernel<<< - grid, block, 0, reinterpret_cast( - ctx.device_context()) - .stream()>>>( - unique_dst_size, - thrust::raw_pointer_cast(unique_dst_merge_reindex.data()), - thrust::raw_pointer_cast(dst_sample_counts_merge.data()), - thrust::raw_pointer_cast(dst_ptr.data()), - thrust::raw_pointer_cast(dst_merge.data())); + GetDstEdgeCUDAKernel + <<( + ctx.device_context()) + .stream()>>>( + unique_dst_size, + thrust::raw_pointer_cast(unique_dst_merge_reindex.data()), + thrust::raw_pointer_cast(dst_sample_counts_merge.data()), + thrust::raw_pointer_cast(dst_ptr.data()), + thrust::raw_pointer_cast(dst_merge.data())); // 8. Give operator's outputs. auto* out_src = ctx.Output("Out_Src"); diff --git a/paddle/fluid/operators/graph_khop_sampler_op.h b/paddle/fluid/operators/graph_khop_sampler_op.h index d7121cb5493..1005a6ab11c 100644 --- a/paddle/fluid/operators/graph_khop_sampler_op.h +++ b/paddle/fluid/operators/graph_khop_sampler_op.h @@ -15,10 +15,12 @@ limitations under the License. */ #pragma once #include + #include #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/operators/group_norm_op.cc b/paddle/fluid/operators/group_norm_op.cc index f6a1e20a1a1..4d989ed1f2e 100644 --- a/paddle/fluid/operators/group_norm_op.cc +++ b/paddle/fluid/operators/group_norm_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/group_norm_op.h" + #include #include #include diff --git a/paddle/fluid/operators/group_norm_op.cu b/paddle/fluid/operators/group_norm_op.cu index bb8031b0cc4..84eb2fbc7d3 100644 --- a/paddle/fluid/operators/group_norm_op.cu +++ b/paddle/fluid/operators/group_norm_op.cu @@ -322,9 +322,9 @@ class GroupNormKernel ScalarGetMeanAndVarNCHW<<>>( x_data, mean_data, temp_var_data, size); } else { - VectorizedGetMeanAndVarNCHW< - T, AccT, vec_size><<>>( - x_data, mean_data, temp_var_data, size); + VectorizedGetMeanAndVarNCHW + <<>>(x_data, mean_data, + temp_var_data, size); } } else { set_zero(dev_ctx, mean, static_cast(0)); @@ -613,16 +613,16 @@ class GroupNormGradKernel } block_size_nchw = std::max(block_size_nchw, kps::details::kWarpSize); dim3 blocks(block_size_nchw); - ScalarGetDsDbCUDAKernel< - T><<>>( - imsize, x_data, dy_data, ds_data, db_data); + ScalarGetDsDbCUDAKernel + <<>>( + imsize, x_data, dy_data, ds_data, db_data); if (d_scale || d_bias) { const int block = 256; - GetScaleBiasGradientCUDAKernel< - T><<<(C + block - 1) / block, block, 0, dev_ctx.stream()>>>( - x_dims[0], C, groups, epsilon, mean_data, var_data, ds_data, - db_data, d_scale_data, d_bias_data); + GetScaleBiasGradientCUDAKernel + <<<(C + block - 1) / block, block, 0, dev_ctx.stream()>>>( + x_dims[0], C, groups, epsilon, mean_data, var_data, ds_data, + db_data, d_scale_data, d_bias_data); } if (d_x_data != nullptr) { @@ -639,10 +639,10 @@ class GroupNormGradKernel T* p2_data = p2.data(); T* p3_data = p3.data(); - GetBackwardParamsCUDAKernel<<< - dim3(x_dims[0], groups), block_dims, 0, dev_ctx.stream()>>>( - imsize, groups, group_size, epsilon, mean_data, var_data, - scale_data, ds_data, db_data, p1_data, p2_data, p3_data); + GetBackwardParamsCUDAKernel + <<>>( + imsize, groups, group_size, epsilon, mean_data, var_data, + scale_data, ds_data, db_data, p1_data, p2_data, p3_data); GetXGradientCUDAKernel<<>>( imsize, C, group_size, groups, p1_data, p2_data, p3_data, x_data, dy_data, d_x_data); diff --git a/paddle/fluid/operators/group_norm_op.h b/paddle/fluid/operators/group_norm_op.h index 2d80ab89471..28a3ad2a8e1 100644 --- a/paddle/fluid/operators/group_norm_op.h +++ b/paddle/fluid/operators/group_norm_op.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/group_norm_op_npu.cc b/paddle/fluid/operators/group_norm_op_npu.cc index 8de8647186e..dfc509941bc 100644 --- a/paddle/fluid/operators/group_norm_op_npu.cc +++ b/paddle/fluid/operators/group_norm_op_npu.cc @@ -12,8 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/group_norm_op.h" #include + +#include "paddle/fluid/operators/group_norm_op.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" namespace paddle { diff --git a/paddle/fluid/operators/gru_op.cc b/paddle/fluid/operators/gru_op.cc index 58cbdfda347..21ad5914c5d 100644 --- a/paddle/fluid/operators/gru_op.cc +++ b/paddle/fluid/operators/gru_op.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/gru_op.h" + #include #include + #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/detail/gru_cpu_kernel.h" #include "paddle/phi/kernels/funcs/detail/gru_kernel.h" diff --git a/paddle/fluid/operators/gru_op.h b/paddle/fluid/operators/gru_op.h index 852655034c8..4cc6c65983f 100644 --- a/paddle/fluid/operators/gru_op.h +++ b/paddle/fluid/operators/gru_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/detail/activation_functions.h" diff --git a/paddle/fluid/operators/gru_unit_op.cc b/paddle/fluid/operators/gru_unit_op.cc index 8998c51f0df..b6d9ef50f83 100644 --- a/paddle/fluid/operators/gru_unit_op.cc +++ b/paddle/fluid/operators/gru_unit_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/gru_unit_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/gru_unit_op.h b/paddle/fluid/operators/gru_unit_op.h index 291f5f4ad26..2dd1515919b 100644 --- a/paddle/fluid/operators/gru_unit_op.h +++ b/paddle/fluid/operators/gru_unit_op.h @@ -77,9 +77,9 @@ class GRUUnitKernel : public framework::OpKernel { // calculate unactivated gate outputs if (bias) { auto b = framework::EigenMatrix::From(*bias); - g.device(place) = x + - b.reshape(Eigen::array({{1, frame_size * 3}})) - .broadcast(Eigen::array({{batch_size, 1}})); + g.device(place) = + x + b.reshape(Eigen::array({{1, frame_size * 3}})) + .broadcast(Eigen::array({{batch_size, 1}})); } else { g.device(place) = x; } diff --git a/paddle/fluid/operators/hinge_loss_op.cc b/paddle/fluid/operators/hinge_loss_op.cc index cce80518354..f72fe9282ab 100644 --- a/paddle/fluid/operators/hinge_loss_op.cc +++ b/paddle/fluid/operators/hinge_loss_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/hinge_loss_op.h" + #include #include #include diff --git a/paddle/fluid/operators/huber_loss_op_xpu.cc b/paddle/fluid/operators/huber_loss_op_xpu.cc index ccddec27795..2fafd186215 100644 --- a/paddle/fluid/operators/huber_loss_op_xpu.cc +++ b/paddle/fluid/operators/huber_loss_op_xpu.cc @@ -39,10 +39,11 @@ class HuberLossXPUKernel : public framework::OpKernel { ctx.template device_context(); int r = xpu::huber_loss(dev_ctx.x_context(), in0_data, in1_data, residual_data, out_data, in0->numel(), 1, delta); - PADDLE_ENFORCE_EQ(r, XPU_SUCCESS, platform::errors::External( - "XPU API(huber_loss) return wrong " - "value[%d %s]", - r, XPUAPIErrorMsg[r])); + PADDLE_ENFORCE_EQ( + r, XPU_SUCCESS, + platform::errors::External("XPU API(huber_loss) return wrong " + "value[%d %s]", + r, XPUAPIErrorMsg[r])); } }; diff --git a/paddle/fluid/operators/im2sequence_op.cc b/paddle/fluid/operators/im2sequence_op.cc index d248857b8f4..107384742bb 100644 --- a/paddle/fluid/operators/im2sequence_op.cc +++ b/paddle/fluid/operators/im2sequence_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/im2sequence_op.h" + #include #include #include diff --git a/paddle/fluid/operators/im2sequence_op.h b/paddle/fluid/operators/im2sequence_op.h index b0c4b9b4a99..218161fd00a 100644 --- a/paddle/fluid/operators/im2sequence_op.h +++ b/paddle/fluid/operators/im2sequence_op.h @@ -15,6 +15,7 @@ #pragma once #include #include + #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/index_impl.cu.h b/paddle/fluid/operators/index_impl.cu.h index bb26e2f445e..d8417e42e1b 100644 --- a/paddle/fluid/operators/index_impl.cu.h +++ b/paddle/fluid/operators/index_impl.cu.h @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" @@ -73,16 +74,16 @@ void IndexKernel(const KPDevice &dev_ctx, Tensor *out, Functor func) { size_t main_offset = (numel / (vec_size * block)) * vec_size * block; switch (vec_size) { case 4: - VectorizedIndexKernel<<>>( - out_data, numel, main_offset, func); + VectorizedIndexKernel + <<>>(out_data, numel, main_offset, func); break; case 2: - VectorizedIndexKernel<<>>( - out_data, numel, main_offset, func); + VectorizedIndexKernel + <<>>(out_data, numel, main_offset, func); break; case 1: - VectorizedIndexKernel<<>>( - out_data, numel, main_offset, func); + VectorizedIndexKernel + <<>>(out_data, numel, main_offset, func); break; default: { PADDLE_THROW(paddle::platform::errors::Unimplemented( diff --git a/paddle/fluid/operators/index_sample_op.cc b/paddle/fluid/operators/index_sample_op.cc index d17c6368c75..15fc0f6d14f 100644 --- a/paddle/fluid/operators/index_sample_op.cc +++ b/paddle/fluid/operators/index_sample_op.cc @@ -13,11 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include -#include "paddle/fluid/framework/no_need_buffer_vars_inference.h" -#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/framework/infershape_utils.h" +#include "paddle/fluid/framework/no_need_buffer_vars_inference.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/platform/enforce.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/binary.h" namespace paddle { diff --git a/paddle/fluid/operators/index_select_op.h b/paddle/fluid/operators/index_select_op.h index 684829be269..c82aaab0fe1 100644 --- a/paddle/fluid/operators/index_select_op.h +++ b/paddle/fluid/operators/index_select_op.h @@ -14,6 +14,7 @@ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/inplace_abn_op.cc b/paddle/fluid/operators/inplace_abn_op.cc index d420d0319bf..6cb8d664d80 100644 --- a/paddle/fluid/operators/inplace_abn_op.cc +++ b/paddle/fluid/operators/inplace_abn_op.cc @@ -13,9 +13,11 @@ // limitations under the License. #include "paddle/fluid/operators/inplace_abn_op.h" + #include #include #include + #include "paddle/fluid/operators/batch_norm_op.h" #include "paddle/phi/kernels/batch_norm_grad_kernel.h" #include "paddle/phi/kernels/batch_norm_kernel.h" @@ -38,18 +40,21 @@ class InplaceABNOp : public paddle::operators::BatchNormOp { if (input_data_type == framework::proto::VarType::FP64) { bn_param_type = framework::proto::VarType::FP64; } - PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( - ctx.Input("Scale")->dtype()), - platform::errors::InvalidArgument( - "Scale input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( - ctx.Input("Bias")->dtype()), - platform::errors::InvalidArgument( - "Bias input should be of float type")); - PADDLE_ENFORCE_EQ(bn_param_type, framework::TransToProtoVarType( - ctx.Input("Mean")->dtype()), - platform::errors::InvalidArgument( - "Mean input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType(ctx.Input("Scale")->dtype()), + platform::errors::InvalidArgument( + "Scale input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType(ctx.Input("Bias")->dtype()), + platform::errors::InvalidArgument( + "Bias input should be of float type")); + PADDLE_ENFORCE_EQ( + bn_param_type, + framework::TransToProtoVarType(ctx.Input("Mean")->dtype()), + platform::errors::InvalidArgument( + "Mean input should be of float type")); PADDLE_ENFORCE_EQ( bn_param_type, framework::TransToProtoVarType(ctx.Input("Variance")->dtype()), @@ -209,8 +214,9 @@ class InplaceABNKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* x = ctx.Input("X"); auto* y = ctx.Output("Y"); - PADDLE_ENFORCE_EQ(x, y, platform::errors::InvalidArgument( - "X and Y not inplaced in inplace mode")); + PADDLE_ENFORCE_EQ(x, y, + platform::errors::InvalidArgument( + "X and Y not inplaced in inplace mode")); auto activation = GetInplaceABNActivationType(ctx.Attr("activation")); auto& place = *ctx.template device_context().eigen_device(); diff --git a/paddle/fluid/operators/inplace_abn_op.cu b/paddle/fluid/operators/inplace_abn_op.cu index 6476023fcd2..7245629e565 100644 --- a/paddle/fluid/operators/inplace_abn_op.cu +++ b/paddle/fluid/operators/inplace_abn_op.cu @@ -28,8 +28,9 @@ class InplaceABNKernel void Compute(const framework::ExecutionContext& ctx) const override { auto* y = ctx.Output("Y"); auto* x = ctx.Input("X"); - PADDLE_ENFORCE_EQ(x, y, platform::errors::InvalidArgument( - "X and Y not inplaced in inplace mode")); + PADDLE_ENFORCE_EQ(x, y, + platform::errors::InvalidArgument( + "X and Y not inplaced in inplace mode")); auto activation = GetInplaceABNActivationType(ctx.Attr("activation")); auto& place = *ctx.template device_context().eigen_device(); diff --git a/paddle/fluid/operators/inplace_abn_op.h b/paddle/fluid/operators/inplace_abn_op.h index 94240497858..275209911d1 100644 --- a/paddle/fluid/operators/inplace_abn_op.h +++ b/paddle/fluid/operators/inplace_abn_op.h @@ -14,6 +14,7 @@ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/activation_op.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/instance_norm_op.cc b/paddle/fluid/operators/instance_norm_op.cc index de92de453a3..21ccf777051 100644 --- a/paddle/fluid/operators/instance_norm_op.cc +++ b/paddle/fluid/operators/instance_norm_op.cc @@ -13,9 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/instance_norm_op.h" + #include #include #include + #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_version_registry.h" @@ -38,16 +40,18 @@ framework::OpKernelType InstanceNormOp::GetExpectedKernelType( in_param_type = framework::proto::VarType::FP64; } if (ctx.HasInput("Scale")) { - PADDLE_ENFORCE_EQ(in_param_type, framework::TransToProtoVarType( - ctx.Input("Scale")->dtype()), - platform::errors::InvalidArgument( - "Scale input should be of float type")); + PADDLE_ENFORCE_EQ( + in_param_type, + framework::TransToProtoVarType(ctx.Input("Scale")->dtype()), + platform::errors::InvalidArgument( + "Scale input should be of float type")); } if (ctx.HasInput("Bias")) { - PADDLE_ENFORCE_EQ(in_param_type, framework::TransToProtoVarType( - ctx.Input("Bias")->dtype()), - platform::errors::InvalidArgument( - "Bias input should be of float type")); + PADDLE_ENFORCE_EQ( + in_param_type, + framework::TransToProtoVarType(ctx.Input("Bias")->dtype()), + platform::errors::InvalidArgument( + "Bias input should be of float type")); } return framework::OpKernelType(input_data_type, ctx.GetPlace()); diff --git a/paddle/fluid/operators/instance_norm_op.h b/paddle/fluid/operators/instance_norm_op.h index 265e4acef0d..3f99cdf10c6 100644 --- a/paddle/fluid/operators/instance_norm_op.h +++ b/paddle/fluid/operators/instance_norm_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/interpolate_op.cc b/paddle/fluid/operators/interpolate_op.cc index fda168c94e1..3c746d7c08a 100644 --- a/paddle/fluid/operators/interpolate_op.cc +++ b/paddle/fluid/operators/interpolate_op.cc @@ -10,9 +10,11 @@ limitations under the License. */ #include "paddle/fluid/operators/interpolate_op.h" + #include #include #include + #include "paddle/fluid/framework/op_registry.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" @@ -112,11 +114,12 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { PADDLE_ENFORCE_EQ("bilinear" == interp_method || "nearest" == interp_method || "bicubic" == interp_method, - true, platform::errors::InvalidArgument( - "Interpolation method can only be \"bilinear\" " - "or \"nearest\" or \"bicubic\" when " - "Input(X) dimension is 4, but got method is %s.", - interp_method)); + true, + platform::errors::InvalidArgument( + "Interpolation method can only be \"bilinear\" " + "or \"nearest\" or \"bicubic\" when " + "Input(X) dimension is 4, but got method is %s.", + interp_method)); const DataLayout data_layout = framework::StringToDataLayout( ctx->Attrs().Get("data_layout")); diff --git a/paddle/fluid/operators/interpolate_op.cu b/paddle/fluid/operators/interpolate_op.cu index 8a63c9a3946..729eba43d72 100644 --- a/paddle/fluid/operators/interpolate_op.cu +++ b/paddle/fluid/operators/interpolate_op.cu @@ -11,6 +11,7 @@ #include #include + #include "paddle/fluid/operators/interpolate_op.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" @@ -860,9 +861,10 @@ static void Interpolate1DCUDAFwd(const framework::ExecutionContext& ctx, out_w = size_data[0]; } } - PADDLE_ENFORCE_GT(out_w, 0, platform::errors::InvalidArgument( - "out_w in Attr(out_shape) of Op(interpolate) " - "should be greater than 0.")); + PADDLE_ENFORCE_GT(out_w, 0, + platform::errors::InvalidArgument( + "out_w in Attr(out_shape) of Op(interpolate) " + "should be greater than 0.")); framework::DDim dim_out; if (data_layout == DataLayout::kNCHW) { dim_out = {n, c, out_w}; @@ -942,12 +944,14 @@ static void Interpolate2DCUDAFwd(const framework::ExecutionContext& ctx, out_w = size_data[1]; } } - PADDLE_ENFORCE_GT(out_h, 0, platform::errors::InvalidArgument( - "out_h in Attr(out_shape) of Op(interpolate) " - "should be greater than 0.")); - PADDLE_ENFORCE_GT(out_w, 0, platform::errors::InvalidArgument( - "out_w in Attr(out_shape) of Op(interpolate) " - "should be greater than 0.")); + PADDLE_ENFORCE_GT(out_h, 0, + platform::errors::InvalidArgument( + "out_h in Attr(out_shape) of Op(interpolate) " + "should be greater than 0.")); + PADDLE_ENFORCE_GT(out_w, 0, + platform::errors::InvalidArgument( + "out_w in Attr(out_shape) of Op(interpolate) " + "should be greater than 0.")); framework::DDim dim_out; if (data_layout == DataLayout::kNCHW) { @@ -984,21 +988,21 @@ static void Interpolate2DCUDAFwd(const framework::ExecutionContext& ctx, platform::GetGpuLaunchConfig1D(ctx.cuda_device_context(), pixelNum); if ("nearest" == interp_method) { - KeNearestNeighborInterpFw< - T><<>>( - input_data, in_h, in_w, n, in_chw, output_data, out_h, out_w, n, - out_chw, c, ratio_h, ratio_w, align_corners, data_layout); + KeNearestNeighborInterpFw + <<>>( + input_data, in_h, in_w, n, in_chw, output_data, out_h, out_w, n, + out_chw, c, ratio_h, ratio_w, align_corners, data_layout); } else if ("bilinear" == interp_method) { KeBilinearInterpFw<<>>( input_data, in_h, in_w, n, in_chw, output_data, out_h, out_w, n, out_chw, c, ratio_h, ratio_w, align_corners, align_mode, data_layout); } else if ("bicubic" == interp_method) { - KeBicubicInterpFw<<>>( - input_data, in_h, in_w, n, in_chw, output_data, out_h, out_w, n, - out_chw, c, ratio_h, ratio_w, align_corners, data_layout); + KeBicubicInterpFw + <<>>( + input_data, in_h, in_w, n, in_chw, output_data, out_h, out_w, n, + out_chw, c, ratio_h, ratio_w, align_corners, data_layout); } } @@ -1051,15 +1055,18 @@ static void Interpolate3DCUDAFwd(const framework::ExecutionContext& ctx, out_w = size_data[2]; } } - PADDLE_ENFORCE_GT(out_d, 0, platform::errors::InvalidArgument( - "out_d in Attr(out_shape) of Op(interpolate) " - "should be greater than 0.")); - PADDLE_ENFORCE_GT(out_h, 0, platform::errors::InvalidArgument( - "out_h in Attr(out_shape) of Op(interpolate) " - "should be greater than 0.")); - PADDLE_ENFORCE_GT(out_w, 0, platform::errors::InvalidArgument( - "out_w in Attr(out_shape) of Op(interpolate) " - "should be greater than 0.")); + PADDLE_ENFORCE_GT(out_d, 0, + platform::errors::InvalidArgument( + "out_d in Attr(out_shape) of Op(interpolate) " + "should be greater than 0.")); + PADDLE_ENFORCE_GT(out_h, 0, + platform::errors::InvalidArgument( + "out_h in Attr(out_shape) of Op(interpolate) " + "should be greater than 0.")); + PADDLE_ENFORCE_GT(out_w, 0, + platform::errors::InvalidArgument( + "out_w in Attr(out_shape) of Op(interpolate) " + "should be greater than 0.")); framework::DDim dim_out; if (data_layout == DataLayout::kNCHW) { @@ -1271,11 +1278,11 @@ static void Interpolate2DCUDABwd(const framework::ExecutionContext& ctx, platform::GetGpuLaunchConfig1D(ctx.cuda_device_context(), pixelNum); if ("nearest" == interp_method) { - KeNearestNeighborInterpBw< - T><<>>( - input_grad_data, in_h, in_w, n, in_chw, output_grad_data, out_h, out_w, - n, out_chw, c, ratio_h, ratio_w, align_corners, data_layout); + KeNearestNeighborInterpBw + <<>>( + input_grad_data, in_h, in_w, n, in_chw, output_grad_data, out_h, + out_w, n, out_chw, c, ratio_h, ratio_w, align_corners, data_layout); } else if ("bilinear" == interp_method) { KeBilinearInterpBw<<>>( @@ -1283,10 +1290,10 @@ static void Interpolate2DCUDABwd(const framework::ExecutionContext& ctx, n, out_chw, c, ratio_h, ratio_w, align_corners, align_mode, data_layout); } else if ("bicubic" == interp_method) { - KeBicubicInterpBw<<>>( - input_grad_data, in_h, in_w, n, in_chw, output_grad_data, out_h, out_w, - n, out_chw, c, ratio_h, ratio_w, align_corners, data_layout); + KeBicubicInterpBw + <<>>( + input_grad_data, in_h, in_w, n, in_chw, output_grad_data, out_h, + out_w, n, out_chw, c, ratio_h, ratio_w, align_corners, data_layout); } } diff --git a/paddle/fluid/operators/interpolate_op.h b/paddle/fluid/operators/interpolate_op.h index 57b5eb553cc..18caed22b48 100644 --- a/paddle/fluid/operators/interpolate_op.h +++ b/paddle/fluid/operators/interpolate_op.h @@ -13,6 +13,7 @@ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/hostdevice.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -808,9 +809,10 @@ static void Interpolate1DCPUFwd(const framework::ExecutionContext& ctx, out_w = out_size_data[0]; } } - PADDLE_ENFORCE_GT(out_w, 0, platform::errors::InvalidArgument( - "out_w in Attr(out_shape) of Op(interpolate) " - "should be greater than 0.")); + PADDLE_ENFORCE_GT(out_w, 0, + platform::errors::InvalidArgument( + "out_w in Attr(out_shape) of Op(interpolate) " + "should be greater than 0.")); framework::DDim dim_out; if (data_layout == DataLayout::kNCHW) { dim_out = {n, c, out_w}; @@ -876,12 +878,14 @@ static void Interpolate2DCPUFwd(const framework::ExecutionContext& ctx, out_w = out_size_data[1]; } } - PADDLE_ENFORCE_GT(out_h, 0, platform::errors::InvalidArgument( - "out_h in Attr(out_shape) of Op(interpolate) " - "should be greater than 0.")); - PADDLE_ENFORCE_GT(out_w, 0, platform::errors::InvalidArgument( - "out_w in Attr(out_shape) of Op(interpolate) " - "should be greater than 0.")); + PADDLE_ENFORCE_GT(out_h, 0, + platform::errors::InvalidArgument( + "out_h in Attr(out_shape) of Op(interpolate) " + "should be greater than 0.")); + PADDLE_ENFORCE_GT(out_w, 0, + platform::errors::InvalidArgument( + "out_w in Attr(out_shape) of Op(interpolate) " + "should be greater than 0.")); framework::DDim dim_out; if (data_layout == DataLayout::kNCHW) { dim_out = {n, c, out_h, out_w}; @@ -964,15 +968,18 @@ static void Interpolate3DCPUFwd(const framework::ExecutionContext& ctx, out_w = out_size_data[2]; } } - PADDLE_ENFORCE_GT(out_d, 0, platform::errors::InvalidArgument( - "out_d in Attr(out_shape) of Op(interpolate) " - "should be greater than 0.")); - PADDLE_ENFORCE_GT(out_h, 0, platform::errors::InvalidArgument( - "out_h in Attr(out_shape) of Op(interpolate) " - "should be greater than 0.")); - PADDLE_ENFORCE_GT(out_w, 0, platform::errors::InvalidArgument( - "out_w in Attr(out_shape) of Op(interpolate) " - "should be greater than 0.")); + PADDLE_ENFORCE_GT(out_d, 0, + platform::errors::InvalidArgument( + "out_d in Attr(out_shape) of Op(interpolate) " + "should be greater than 0.")); + PADDLE_ENFORCE_GT(out_h, 0, + platform::errors::InvalidArgument( + "out_h in Attr(out_shape) of Op(interpolate) " + "should be greater than 0.")); + PADDLE_ENFORCE_GT(out_w, 0, + platform::errors::InvalidArgument( + "out_w in Attr(out_shape) of Op(interpolate) " + "should be greater than 0.")); framework::DDim dim_out; if (data_layout == DataLayout::kNCHW) { diff --git a/paddle/fluid/operators/interpolate_op_npu.cc b/paddle/fluid/operators/interpolate_op_npu.cc old mode 100755 new mode 100644 index f83f149b87c..0cbac393af5 --- a/paddle/fluid/operators/interpolate_op_npu.cc +++ b/paddle/fluid/operators/interpolate_op_npu.cc @@ -12,9 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/interpolate_op.h" #include #include + +#include "paddle/fluid/operators/interpolate_op.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" namespace paddle { diff --git a/paddle/fluid/operators/interpolate_op_xpu.cc b/paddle/fluid/operators/interpolate_op_xpu.cc index 9576dc84524..09780505ac2 100644 --- a/paddle/fluid/operators/interpolate_op_xpu.cc +++ b/paddle/fluid/operators/interpolate_op_xpu.cc @@ -111,14 +111,16 @@ class InterpolateXPUKernel : public framework::OpKernel { out_w = out_size_data[1]; } } - PADDLE_ENFORCE_GT(out_h, 0, platform::errors::InvalidArgument( - "out_h in Attr(out_shape) of " - "Op(interpolate) " - "should be greater than 0.")); - PADDLE_ENFORCE_GT(out_w, 0, platform::errors::InvalidArgument( - "out_w in Attr(out_shape) of " - "Op(interpolate) " - "should be greater than 0.")); + PADDLE_ENFORCE_GT( + out_h, 0, + platform::errors::InvalidArgument("out_h in Attr(out_shape) of " + "Op(interpolate) " + "should be greater than 0.")); + PADDLE_ENFORCE_GT( + out_w, 0, + platform::errors::InvalidArgument("out_w in Attr(out_shape) of " + "Op(interpolate) " + "should be greater than 0.")); framework::DDim dim_out; if (data_layout == DataLayout::kNCHW) { dim_out = {n, c, out_h, out_w}; diff --git a/paddle/fluid/operators/interpolate_v2_op.cc b/paddle/fluid/operators/interpolate_v2_op.cc index d0d7b7694fc..6bac35ee1d4 100644 --- a/paddle/fluid/operators/interpolate_v2_op.cc +++ b/paddle/fluid/operators/interpolate_v2_op.cc @@ -40,10 +40,11 @@ static void Interpolate1DInferShapeCheck(framework::InferShapeContext* ctx) { const DataLayout data_layout = framework::StringToDataLayout( ctx->Attrs().Get("data_layout")); for (int i = 0; i < dim_x.size(); ++i) { - PADDLE_ENFORCE_NE(dim_x[i], 0, platform::errors::InvalidArgument( - "The shape of input(x) should be larged " - "than 0, bug received shape[%d] is %d ", - i, dim_x[i])); + PADDLE_ENFORCE_NE(dim_x[i], 0, + platform::errors::InvalidArgument( + "The shape of input(x) should be larged " + "than 0, bug received shape[%d] is %d ", + i, dim_x[i])); } if (ctx->HasInputs("SizeTensor")) { // top prority size @@ -144,10 +145,11 @@ static void Interpolate2DInferShapeCheck(framework::InferShapeContext* ctx) { ctx->Attrs().Get("data_layout")); for (int i = 0; i < dim_x.size(); ++i) { - PADDLE_ENFORCE_NE(dim_x[i], 0, platform::errors::InvalidArgument( - "The shape of input(x) should be larged " - "than 0, bug received shape[%d] is %d ", - i, dim_x[i])); + PADDLE_ENFORCE_NE(dim_x[i], 0, + platform::errors::InvalidArgument( + "The shape of input(x) should be larged " + "than 0, bug received shape[%d] is %d ", + i, dim_x[i])); } if (ctx->HasInputs("SizeTensor")) { @@ -263,10 +265,11 @@ static void Interpolate3DInferShapeCheck(framework::InferShapeContext* ctx) { ctx->Attrs().Get("data_layout")); for (int i = 0; i < dim_x.size(); ++i) { - PADDLE_ENFORCE_NE(dim_x[i], 0, platform::errors::InvalidArgument( - "The shape of input(x) should be larged " - "than 0, bug received shape[%d] is %d ", - i, dim_x[i])); + PADDLE_ENFORCE_NE(dim_x[i], 0, + platform::errors::InvalidArgument( + "The shape of input(x) should be larged " + "than 0, bug received shape[%d] is %d ", + i, dim_x[i])); } if (ctx->HasInputs("SizeTensor")) { diff --git a/paddle/fluid/operators/interpolate_v2_op_npu.cc b/paddle/fluid/operators/interpolate_v2_op_npu.cc index 615b5ea142b..97f39aa4902 100644 --- a/paddle/fluid/operators/interpolate_v2_op_npu.cc +++ b/paddle/fluid/operators/interpolate_v2_op_npu.cc @@ -12,9 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/platform/device/npu/npu_op_runner.h" - #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/platform/device/npu/npu_op_runner.h" #include "paddle/phi/kernels/funcs/interpolate_function.h" namespace paddle { diff --git a/paddle/fluid/operators/interpolate_v2_op_xpu.cc b/paddle/fluid/operators/interpolate_v2_op_xpu.cc index 9cbfc951583..9d52c9a865e 100644 --- a/paddle/fluid/operators/interpolate_v2_op_xpu.cc +++ b/paddle/fluid/operators/interpolate_v2_op_xpu.cc @@ -114,14 +114,16 @@ class InterpolateV2XPUKernel : public framework::OpKernel { out_w = out_size_data[1]; } } - PADDLE_ENFORCE_GT(out_h, 0, platform::errors::InvalidArgument( - "out_h in Attr(out_shape) of " - "Op(interpolate) " - "should be greater than 0.")); - PADDLE_ENFORCE_GT(out_w, 0, platform::errors::InvalidArgument( - "out_w in Attr(out_shape) of " - "Op(interpolate) " - "should be greater than 0.")); + PADDLE_ENFORCE_GT( + out_h, 0, + platform::errors::InvalidArgument("out_h in Attr(out_shape) of " + "Op(interpolate) " + "should be greater than 0.")); + PADDLE_ENFORCE_GT( + out_w, 0, + platform::errors::InvalidArgument("out_w in Attr(out_shape) of " + "Op(interpolate) " + "should be greater than 0.")); framework::DDim dim_out; if (data_layout == DataLayout::kNCHW) { dim_out = {n, c, out_h, out_w}; diff --git a/paddle/fluid/operators/inverse_op.cc b/paddle/fluid/operators/inverse_op.cc index f5b817a0e11..c4f3fbb2ca7 100644 --- a/paddle/fluid/operators/inverse_op.cc +++ b/paddle/fluid/operators/inverse_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/inverse_op.h" + #include #include diff --git a/paddle/fluid/operators/isfinite_op.cc b/paddle/fluid/operators/isfinite_op.cc index 2e770f98525..456c1c2d44f 100644 --- a/paddle/fluid/operators/isfinite_op.cc +++ b/paddle/fluid/operators/isfinite_op.cc @@ -120,15 +120,16 @@ namespace ops = paddle::operators; paddle::framework::EmptyGradOpMaker, \ paddle::framework::EmptyGradOpMaker) -#define REGISTER_OVERFLOW_CPU_KERNEL(op_type, functor) \ - REGISTER_OP_CPU_KERNEL( \ - op_type, ops::OverflowKernel, \ - ops::OverflowKernel, \ - ops::OverflowKernel, \ - ops::OverflowKernel, \ + ops::OverflowKernel, \ + ops::OverflowKernel, \ + ops::OverflowKernel); REGISTER_OP_MAKER(isinf, "isinf(X)"); diff --git a/paddle/fluid/operators/isfinite_op.cu b/paddle/fluid/operators/isfinite_op.cu index e233e371364..d1437d5b44d 100644 --- a/paddle/fluid/operators/isfinite_op.cu +++ b/paddle/fluid/operators/isfinite_op.cu @@ -18,8 +18,9 @@ namespace ops = paddle::operators; namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL( - isinf, ops::OverflowKernel, + isinf, + ops::OverflowKernel, ops::OverflowKernel, ops::OverflowKernel); REGISTER_OP_CUDA_KERNEL( - isfinite, ops::OverflowKernel, + isfinite, + ops::OverflowKernel, ops::OverflowKernel, ops::OverflowKernel(upper - lower), (n - 1))); PADDLE_ENFORCE_GT( - n, 0, paddle::platform::errors::InvalidArgument( - "The Sgd size should be larger than 0. But the n is %d.", n)); + n, 0, + paddle::platform::errors::InvalidArgument( + "The Sgd size should be larger than 0. But the n is %d.", n)); std::vector all, out; for (int i = 0; i < n; ++i) { all.push_back(i); diff --git a/paddle/fluid/operators/jit/gen/act.cc b/paddle/fluid/operators/jit/gen/act.cc index 677e9979399..5a73e3c56d5 100644 --- a/paddle/fluid/operators/jit/gen/act.cc +++ b/paddle/fluid/operators/jit/gen/act.cc @@ -122,9 +122,8 @@ bool VTanhCreator::CanBeUsed(const int& d) const { } size_t VReluCreator::CodeSize(const int& d) const { - return 96 /* init size */ + - (d / YMM_FLOAT_BLOCK + 3) * 4 /* instructions */ * - 8 /* average bytes for each instruction */; + return 96 /* init size */ + (d / YMM_FLOAT_BLOCK + 3) * 4 /* instructions */ * + 8 /* average bytes for each instruction */; } size_t VSquareCreator::CodeSize(const int& d) const { diff --git a/paddle/fluid/operators/jit/gen/jitcode.h b/paddle/fluid/operators/jit/gen/jitcode.h index bd84368a573..24434c5993b 100644 --- a/paddle/fluid/operators/jit/gen/jitcode.h +++ b/paddle/fluid/operators/jit/gen/jitcode.h @@ -16,6 +16,7 @@ #include #include + #include "paddle/fluid/operators/jit/gen_base.h" #include "paddle/fluid/platform/cpu_info.h" diff --git a/paddle/fluid/operators/jit/gen/matmul.cc b/paddle/fluid/operators/jit/gen/matmul.cc index 3b2139c9ed0..9c859229c5a 100644 --- a/paddle/fluid/operators/jit/gen/matmul.cc +++ b/paddle/fluid/operators/jit/gen/matmul.cc @@ -122,20 +122,23 @@ class MatMulCreator : public JitCodeCreator { std::unique_ptr CreateJitCode( const matmul_attr_t& attr) const override { PADDLE_ENFORCE_GT( - attr.m, 0, platform::errors::InvalidArgument( - "The attribute m (first matrix's row) of MatMul should " - "be larger than 0. But it is %d.", - attr.m)); + attr.m, 0, + platform::errors::InvalidArgument( + "The attribute m (first matrix's row) of MatMul should " + "be larger than 0. But it is %d.", + attr.m)); PADDLE_ENFORCE_GT( - attr.n, 0, platform::errors::InvalidArgument( - "The attribute n (first matrix's col) of MatMul should " - "be larger than 0. But it is %d.", - attr.n)); + attr.n, 0, + platform::errors::InvalidArgument( + "The attribute n (first matrix's col) of MatMul should " + "be larger than 0. But it is %d.", + attr.n)); PADDLE_ENFORCE_GT( - attr.k, 0, platform::errors::InvalidArgument( - "The attribute k (second matrix's col) of MatMul should " - "be larger than 0. But it is %d.", - attr.k)); + attr.k, 0, + platform::errors::InvalidArgument( + "The attribute k (second matrix's col) of MatMul should " + "be larger than 0. But it is %d.", + attr.k)); return make_unique(attr, CodeSize(attr)); } }; diff --git a/paddle/fluid/operators/jit/gen/matmul.h b/paddle/fluid/operators/jit/gen/matmul.h index eb7328d7e06..af626326340 100644 --- a/paddle/fluid/operators/jit/gen/matmul.h +++ b/paddle/fluid/operators/jit/gen/matmul.h @@ -15,6 +15,7 @@ #pragma once #include // for malloc and free + #include #include @@ -33,10 +34,11 @@ class MatMulJitCode : public JitCode { size_t code_size = 256 * 1024, void* code_ptr = nullptr) : JitCode(code_size, code_ptr), m_(attr.m), n_(attr.n), k_(attr.k) { - PADDLE_ENFORCE_EQ(m_, 1, platform::errors::Unimplemented( - "Jitcode of matmul only support m==1 (first " - "matrix's row) now. But m is %d.", - m_)); + PADDLE_ENFORCE_EQ(m_, 1, + platform::errors::Unimplemented( + "Jitcode of matmul only support m==1 (first " + "matrix's row) now. But m is %d.", + m_)); this->genCode(); } diff --git a/paddle/fluid/operators/jit/gen/seqpool.cc b/paddle/fluid/operators/jit/gen/seqpool.cc index 52fdf04f3f6..4788050a14c 100644 --- a/paddle/fluid/operators/jit/gen/seqpool.cc +++ b/paddle/fluid/operators/jit/gen/seqpool.cc @@ -62,22 +62,23 @@ class SeqPoolCreator : public JitCodeCreator { return platform::MayIUse(platform::avx); } size_t CodeSize(const seq_pool_attr_t& attr) const override { - return 96 + - ((attr.w / YMM_FLOAT_BLOCK + 4 /* for rest */) * - 4 /* load, mul and save */ + - 256) * - 16; + return 96 + ((attr.w / YMM_FLOAT_BLOCK + 4 /* for rest */) * + 4 /* load, mul and save */ + + 256) * + 16; } std::unique_ptr CreateJitCode( const seq_pool_attr_t& attr) const override { - PADDLE_ENFORCE_GT(attr.w, 0, platform::errors::InvalidArgument( - "The attribute width of SeqPool should " - "be larger than 0. But it is %d.", - attr.w)); - PADDLE_ENFORCE_GT(attr.h, 0, platform::errors::InvalidArgument( - "The attribute height of SeqPool should " - "be larger than 0. But it is %d.", - attr.h)); + PADDLE_ENFORCE_GT(attr.w, 0, + platform::errors::InvalidArgument( + "The attribute width of SeqPool should " + "be larger than 0. But it is %d.", + attr.w)); + PADDLE_ENFORCE_GT(attr.h, 0, + platform::errors::InvalidArgument( + "The attribute height of SeqPool should " + "be larger than 0. But it is %d.", + attr.h)); return make_unique(attr, CodeSize(attr)); } }; diff --git a/paddle/fluid/operators/jit/gen_base.cc b/paddle/fluid/operators/jit/gen_base.cc index 5baafa11cfe..2a3c347c16a 100644 --- a/paddle/fluid/operators/jit/gen_base.cc +++ b/paddle/fluid/operators/jit/gen_base.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/operators/jit/gen_base.h" #include + #include "paddle/fluid/memory/allocation/cpu_allocator.h" // for posix_memalign #include "paddle/fluid/platform/cpu_info.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/jit/gen_base.h b/paddle/fluid/operators/jit/gen_base.h index c22a7f3ec92..761c52b7d7c 100644 --- a/paddle/fluid/operators/jit/gen_base.h +++ b/paddle/fluid/operators/jit/gen_base.h @@ -17,8 +17,8 @@ #include // for unique_ptr #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/operators/jit/kernel_base.h" DECLARE_bool(dump_jitcode); diff --git a/paddle/fluid/operators/jit/helper.cc b/paddle/fluid/operators/jit/helper.cc index 46da6fba2e9..07d69658632 100644 --- a/paddle/fluid/operators/jit/helper.cc +++ b/paddle/fluid/operators/jit/helper.cc @@ -13,7 +13,9 @@ * limitations under the License. */ #include "paddle/fluid/operators/jit/helper.h" + #include + #include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -112,10 +114,11 @@ void pack_weights(const float* src, float* dst, int n, int k) { int block, rest; const auto groups = packed_groups(n, k, &block, &rest); std::for_each(groups.begin(), groups.end(), [&](int i) { - PADDLE_ENFORCE_GT(i, 0, platform::errors::InvalidArgument( - "Each element of groups should be larger than " - "0. However the element: %d doesn't satify.", - i)); + PADDLE_ENFORCE_GT(i, 0, + platform::errors::InvalidArgument( + "Each element of groups should be larger than " + "0. However the element: %d doesn't satify.", + i)); }); int sum = std::accumulate(groups.begin(), groups.end(), 0); std::memset(dst, 0, k * sum * block * sizeof(float)); diff --git a/paddle/fluid/operators/jit/kernel_base.h b/paddle/fluid/operators/jit/kernel_base.h index 9a48d9c3c8d..0389828b495 100644 --- a/paddle/fluid/operators/jit/kernel_base.h +++ b/paddle/fluid/operators/jit/kernel_base.h @@ -14,6 +14,7 @@ #pragma once #include + #include "paddle/fluid/operators/jit/macro.h" #include "paddle/fluid/platform/macros.h" diff --git a/paddle/fluid/operators/jit/kernel_key.cc b/paddle/fluid/operators/jit/kernel_key.cc index 4f652002bc7..528aec9ace1 100644 --- a/paddle/fluid/operators/jit/kernel_key.cc +++ b/paddle/fluid/operators/jit/kernel_key.cc @@ -13,6 +13,7 @@ * limitations under the License. */ #include "paddle/fluid/operators/jit/kernel_key.h" + #include // XXH64: 13.8 GB/s namespace paddle { diff --git a/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc b/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc index 7e1f7ab8bf8..f11a690523b 100644 --- a/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc +++ b/paddle/fluid/operators/jit/more/intrinsic/crf_decoding.cc @@ -13,7 +13,9 @@ * limitations under the License. */ #include "paddle/fluid/operators/jit/more/intrinsic/crf_decoding.h" + #include + #include "paddle/fluid/operators/jit/registry.h" #include "paddle/fluid/platform/cpu_info.h" diff --git a/paddle/fluid/operators/jit/more/intrinsic/layer_norm.cc b/paddle/fluid/operators/jit/more/intrinsic/layer_norm.cc index 61d8c50c568..ef8fe6963c0 100644 --- a/paddle/fluid/operators/jit/more/intrinsic/layer_norm.cc +++ b/paddle/fluid/operators/jit/more/intrinsic/layer_norm.cc @@ -13,7 +13,9 @@ * limitations under the License. */ #include "paddle/fluid/operators/jit/more/intrinsic/layer_norm.h" + #include + #include "paddle/fluid/operators/jit/registry.h" #include "paddle/fluid/platform/cpu_info.h" diff --git a/paddle/fluid/operators/jit/more/mix/mix.cc b/paddle/fluid/operators/jit/more/mix/mix.cc index a4459cee5b8..f0008d4152f 100644 --- a/paddle/fluid/operators/jit/more/mix/mix.cc +++ b/paddle/fluid/operators/jit/more/mix/mix.cc @@ -13,6 +13,7 @@ * limitations under the License. */ #include "paddle/fluid/operators/jit/more/mix/mix.h" + #include "paddle/fluid/operators/jit/kernels.h" #include "paddle/fluid/operators/jit/registry.h" diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.cc b/paddle/fluid/operators/jit/more/mkl/mkl.cc index 75ebddb1259..16bf045aa66 100644 --- a/paddle/fluid/operators/jit/more/mkl/mkl.cc +++ b/paddle/fluid/operators/jit/more/mkl/mkl.cc @@ -13,6 +13,7 @@ * limitations under the License. */ #include "paddle/fluid/operators/jit/more/mkl/mkl.h" + #include "paddle/fluid/operators/jit/refer/refer.h" #include "paddle/fluid/operators/jit/registry.h" #include "paddle/fluid/platform/cpu_info.h" diff --git a/paddle/fluid/operators/jit/more/mkl/mkl.h b/paddle/fluid/operators/jit/more/mkl/mkl.h index 5f3c29ad5ef..ad04b4618cb 100644 --- a/paddle/fluid/operators/jit/more/mkl/mkl.h +++ b/paddle/fluid/operators/jit/more/mkl/mkl.h @@ -117,10 +117,11 @@ void EmbSeqPool(const T* table, const int64_t* idx, T* out, "The idx shoud be lower than the attribute table_height of " "EmbSeqPool. But %dth of idx is %d and table_height is %d.", i, idx[i], attr->table_height)); - PADDLE_ENFORCE_GE(idx[i], 0, platform::errors::InvalidArgument( - "The idx shoud be equal to or larger than " - "the 0. But %dth of idx is %d.", - i, idx[i])); + PADDLE_ENFORCE_GE(idx[i], 0, + platform::errors::InvalidArgument( + "The idx shoud be equal to or larger than " + "the 0. But %dth of idx is %d.", + i, idx[i])); }; for (int64_t w = 0; w != attr->index_width; ++w) { @@ -204,11 +205,12 @@ void Sgd(const T* lr, const T* param, const T* grad, const int64_t* rows, "less than the attribute. But %dth of rows " "is %d and grad_width is %d.", i, h_idx, attr->param_height)); - PADDLE_ENFORCE_GE(h_idx, 0, platform::errors::InvalidArgument( - "The rows of Sgd should be " - "larger than 0. But %dth of rows " - "is %d.", - i, h_idx)); + PADDLE_ENFORCE_GE( + h_idx, 0, + platform::errors::InvalidArgument("The rows of Sgd should be " + "larger than 0. But %dth of rows " + "is %d.", + i, h_idx)); VAXPY(scalar, grad + i * width, out + h_idx * width, width); } } else { @@ -220,11 +222,12 @@ void Sgd(const T* lr, const T* param, const T* grad, const int64_t* rows, "less than the attribute. But %dth of rows " "is %d and grad_width is %d.", i, h_idx, attr->param_height)); - PADDLE_ENFORCE_GE(h_idx, 0, platform::errors::InvalidArgument( - "The rows of Sgd should be " - "larger than 0. But %dth of rows " - "is %d.", - i, h_idx)); + PADDLE_ENFORCE_GE( + h_idx, 0, + platform::errors::InvalidArgument("The rows of Sgd should be " + "larger than 0. But %dth of rows " + "is %d.", + i, h_idx)); VScal(&scalar, grad + i * width, out + h_idx * width, width); VAdd(param + h_idx * width, out + h_idx * width, out + h_idx * width, width); diff --git a/paddle/fluid/operators/jit/refer/refer.cc b/paddle/fluid/operators/jit/refer/refer.cc index 779d4c172b8..9919f2d46dd 100644 --- a/paddle/fluid/operators/jit/refer/refer.cc +++ b/paddle/fluid/operators/jit/refer/refer.cc @@ -13,6 +13,7 @@ * limitations under the License. */ #include "paddle/fluid/operators/jit/refer/refer.h" + #include "paddle/fluid/operators/jit/registry.h" namespace refer = paddle::operators::jit::refer; diff --git a/paddle/fluid/operators/jit/refer/refer.h b/paddle/fluid/operators/jit/refer/refer.h index 79b2e174efc..3f1e5b3235b 100644 --- a/paddle/fluid/operators/jit/refer/refer.h +++ b/paddle/fluid/operators/jit/refer/refer.h @@ -481,10 +481,11 @@ void EmbSeqPool(const T* table, const int64_t* idx, T* out, "The idx shoud be lower than the attribute table_height of " "EmbSeqPool. But %dth of idx is %d and table_height is %d.", i, idx[i], attr->table_height)); - PADDLE_ENFORCE_GE(idx[i], 0, platform::errors::InvalidArgument( - "The idx shoud be equal to or larger than " - "the 0. But %dth of idx is %d.", - i, idx[i])); + PADDLE_ENFORCE_GE(idx[i], 0, + platform::errors::InvalidArgument( + "The idx shoud be equal to or larger than " + "the 0. But %dth of idx is %d.", + i, idx[i])); }; for (int64_t w = 0; w != attr->index_width; ++w) { @@ -539,11 +540,12 @@ void Sgd(const T* lr, const T* param, const T* grad, const int64_t* rows, "less than the attribute. But %dth of rows " "is %d and grad_width is %d.", i, h_idx, attr->param_height)); - PADDLE_ENFORCE_GE(h_idx, 0, platform::errors::InvalidArgument( - "The rows of Sgd should be " - "larger than 0. But %dth of rows " - "is %d.", - i, h_idx)); + PADDLE_ENFORCE_GE( + h_idx, 0, + platform::errors::InvalidArgument("The rows of Sgd should be " + "larger than 0. But %dth of rows " + "is %d.", + i, h_idx)); for (int64_t j = 0; j < attr->grad_width; ++j) { out[h_idx * attr->grad_width + j] = param[h_idx * attr->grad_width + j] - diff --git a/paddle/fluid/operators/jit/registry.h b/paddle/fluid/operators/jit/registry.h index 567a9032369..15d5e605b01 100644 --- a/paddle/fluid/operators/jit/registry.h +++ b/paddle/fluid/operators/jit/registry.h @@ -18,6 +18,7 @@ #include #include #include // for std::move + #include "paddle/fluid/operators/jit/kernel_base.h" #include "paddle/fluid/operators/jit/kernel_pool.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/operators/jit/test.cc b/paddle/fluid/operators/jit/test.cc index 74f2d62c64d..27e816248ab 100644 --- a/paddle/fluid/operators/jit/test.cc +++ b/paddle/fluid/operators/jit/test.cc @@ -286,48 +286,48 @@ void TestKernelLSTM() { ref(&step, &attr); VLOG(10) << attr; - auto verifier = []( - const typename KernelTuple::func_type tgt, - const std::vector& xsrc, const std::vector& wp, - const std::vector& ct_1, const std::vector& ct_ref, - const std::vector& ht_ref, - const typename KernelTuple::attr_type& attr) { - EXPECT_TRUE(tgt != nullptr); - EXPECT_EQ(ct_ref.size(), ht_ref.size()); - EXPECT_EQ(ct_1.size(), ht_ref.size()); - EXPECT_EQ(xsrc.size(), 4 * ht_ref.size()); - EXPECT_EQ(wp.size(), 3 * ht_ref.size()); - - // x could be changed after compute, so copy to save src - int d = ht_ref.size(); - std::vector x(xsrc.size()), ct(ct_ref.size()), - ht(ht_ref.size()); - std::vector checked(2 * d); - std::copy(xsrc.begin(), xsrc.end(), x.begin()); - - const T* ct_1_data = ct_1.data(); - const T* wp_data = wp.data(); - const T* ct_ref_data = ct_ref.data(); - const T* ht_ref_data = ht_ref.data(); - T* x_data = x.data(); - T* ct_data = ct.data(); - T* ht_data = ht.data(); - T* checked_data = checked.data(); - - jit::lstm_t step; - step.gates = x_data; - step.ct_1 = ct_1_data; - step.ct = ct_data; - step.ht = ht_data; - if (attr.use_peephole) { - step.wp = wp_data; - step.checked = checked_data; - } - - tgt(&step, &attr); - ExpectEQ(ct_data, ct_ref_data, d); - ExpectEQ(ht_data, ht_ref_data, d); - }; + auto verifier = + [](const typename KernelTuple::func_type tgt, + const std::vector& xsrc, const std::vector& wp, + const std::vector& ct_1, const std::vector& ct_ref, + const std::vector& ht_ref, + const typename KernelTuple::attr_type& attr) { + EXPECT_TRUE(tgt != nullptr); + EXPECT_EQ(ct_ref.size(), ht_ref.size()); + EXPECT_EQ(ct_1.size(), ht_ref.size()); + EXPECT_EQ(xsrc.size(), 4 * ht_ref.size()); + EXPECT_EQ(wp.size(), 3 * ht_ref.size()); + + // x could be changed after compute, so copy to save src + int d = ht_ref.size(); + std::vector x(xsrc.size()), ct(ct_ref.size()), + ht(ht_ref.size()); + std::vector checked(2 * d); + std::copy(xsrc.begin(), xsrc.end(), x.begin()); + + const T* ct_1_data = ct_1.data(); + const T* wp_data = wp.data(); + const T* ct_ref_data = ct_ref.data(); + const T* ht_ref_data = ht_ref.data(); + T* x_data = x.data(); + T* ct_data = ct.data(); + T* ht_data = ht.data(); + T* checked_data = checked.data(); + + jit::lstm_t step; + step.gates = x_data; + step.ct_1 = ct_1_data; + step.ct = ct_data; + step.ht = ht_data; + if (attr.use_peephole) { + step.wp = wp_data; + step.checked = checked_data; + } + + tgt(&step, &attr); + ExpectEQ(ct_data, ct_ref_data, d); + ExpectEQ(ht_data, ht_ref_data, d); + }; TestAllImpls(attr, verifier, xsrc, wp, ct_1, ct_ref, ht_ref, attr); } @@ -484,41 +484,42 @@ void TestKernelLayerNorm() { ref(x_data, outref_data, mean_data, var_data, scale_data, bias_data, left, epsilon, right); - auto verifier = []( - const typename KernelTuple::func_type tgt, const std::vector& x_, - const std::vector& outref_, const std::vector& mean_, - const std::vector& var_, const std::vector& scale, - const std::vector& bias, const int& left, const float& epsilon, - const typename KernelTuple::attr_type& right) { - EXPECT_TRUE(tgt != nullptr); - std::vector outtgt(outref_.size()); - std::vector x(x_.size()); - std::vector mean(mean_.size()); - std::vector var(var_.size()); - std::vector outref(outref_.size()); - std::copy(x_.begin(), x_.end(), x.begin()); - std::copy(mean_.begin(), mean_.end(), mean.begin()); - std::copy(var_.begin(), var_.end(), var.begin()); - std::copy(outref_.begin(), outref_.end(), outref.begin()); - - EXPECT_EQ(x.size(), static_cast(left * right)); - EXPECT_EQ(outref.size(), static_cast(left * right)); - EXPECT_EQ(mean.size(), static_cast(left)); - EXPECT_EQ(var.size(), static_cast(left)); - EXPECT_EQ(scale.size(), static_cast(right)); - EXPECT_EQ(bias.size(), static_cast(right)); - - const T* scale_data = scale.data(); - const T* bias_data = bias.data(); - T* x_data = x.data(); - T* mean_data = mean.data(); - T* var_data = var.data(); - T* outref_data = outref.data(); - T* outtgt_data = outtgt.data(); - tgt(x_data, outtgt_data, mean_data, var_data, scale_data, bias_data, - left, epsilon, right); - ExpectEQ(outtgt_data, outref_data, left * right); - }; + auto verifier = + [](const typename KernelTuple::func_type tgt, + const std::vector& x_, const std::vector& outref_, + const std::vector& mean_, const std::vector& var_, + const std::vector& scale, const std::vector& bias, + const int& left, const float& epsilon, + const typename KernelTuple::attr_type& right) { + EXPECT_TRUE(tgt != nullptr); + std::vector outtgt(outref_.size()); + std::vector x(x_.size()); + std::vector mean(mean_.size()); + std::vector var(var_.size()); + std::vector outref(outref_.size()); + std::copy(x_.begin(), x_.end(), x.begin()); + std::copy(mean_.begin(), mean_.end(), mean.begin()); + std::copy(var_.begin(), var_.end(), var.begin()); + std::copy(outref_.begin(), outref_.end(), outref.begin()); + + EXPECT_EQ(x.size(), static_cast(left * right)); + EXPECT_EQ(outref.size(), static_cast(left * right)); + EXPECT_EQ(mean.size(), static_cast(left)); + EXPECT_EQ(var.size(), static_cast(left)); + EXPECT_EQ(scale.size(), static_cast(right)); + EXPECT_EQ(bias.size(), static_cast(right)); + + const T* scale_data = scale.data(); + const T* bias_data = bias.data(); + T* x_data = x.data(); + T* mean_data = mean.data(); + T* var_data = var.data(); + T* outref_data = outref.data(); + T* outtgt_data = outtgt.data(); + tgt(x_data, outtgt_data, mean_data, var_data, scale_data, + bias_data, left, epsilon, right); + ExpectEQ(outtgt_data, outref_data, left * right); + }; TestAllImpls(right, verifier, x, outref, mean, var, scale, bias, left, epsilon, right); @@ -548,11 +549,12 @@ void TestKernelCRFDecoding() { ref(seq_len, (const T*)x.data(), (const T*)w.data(), alpharef.data(), trackref.data(), tag_num); - auto verifier = []( - const typename KernelTuple::func_type tgt, const int& seq_len, - const std::vector& x, const std::vector& w, - const std::vector& alpharef, const std::vector& trackref, - const typename KernelTuple::attr_type& tag_num) { + auto verifier = [](const typename KernelTuple::func_type tgt, + const int& seq_len, const std::vector& x, + const std::vector& w, + const std::vector& alpharef, + const std::vector& trackref, + const typename KernelTuple::attr_type& tag_num) { constexpr int state_trans_base_idx = 2; EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(x.size(), static_cast(seq_len * tag_num)); @@ -878,12 +880,13 @@ void TestKernelAdam() { mom2.data(), param.data(), mom1_out.data(), mom2_out.data(), param_out.data()); - auto verifier = []( - const typename KernelTuple::func_type tgt, T beta1, T beta2, T lr, T eps, - int64_t numel, const std::vector& grad, const std::vector& mom1, - const std::vector& mom2, const std::vector& param, - const std::vector& ref_mom1_out, const std::vector& ref_mom2_out, - const std::vector& ref_param_out) { + auto verifier = [](const typename KernelTuple::func_type tgt, T beta1, + T beta2, T lr, T eps, int64_t numel, + const std::vector& grad, const std::vector& mom1, + const std::vector& mom2, const std::vector& param, + const std::vector& ref_mom1_out, + const std::vector& ref_mom2_out, + const std::vector& ref_param_out) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(param.size(), static_cast(numel)); EXPECT_EQ(grad.size(), static_cast(numel)); @@ -944,30 +947,31 @@ void TestKernelAdamW() { grad.data(), mom1.data(), mom2.data(), param.data(), mom1_out.data(), mom2_out.data(), param_out.data()); - auto verifier = []( - const typename KernelTuple::func_type tgt, T beta1, T beta2, T lr, T eps, - T old_lr, T lr_ratio, T coeff, int64_t numel, const std::vector& grad, - const std::vector& mom1, const std::vector& mom2, - const std::vector& param, const std::vector& ref_mom1_out, - const std::vector& ref_mom2_out, const std::vector& ref_param_out) { - EXPECT_TRUE(tgt != nullptr); - EXPECT_EQ(param.size(), static_cast(numel)); - EXPECT_EQ(grad.size(), static_cast(numel)); - EXPECT_EQ(mom1.size(), static_cast(numel)); - EXPECT_EQ(mom2.size(), static_cast(numel)); - - std::vector jit_mom1_out(ref_mom1_out.size()); - std::vector jit_mom2_out(ref_mom2_out.size()); - std::vector jit_param_out(ref_param_out.size()); - - tgt(beta1, beta2, -lr, eps, old_lr, lr_ratio, coeff, numel, grad.data(), - mom1.data(), mom2.data(), param.data(), jit_mom1_out.data(), - jit_mom2_out.data(), jit_param_out.data()); - - ExpectEQ(ref_mom1_out.data(), jit_mom1_out.data(), numel); - ExpectEQ(ref_mom2_out.data(), jit_mom2_out.data(), numel); - ExpectEQ(ref_param_out.data(), jit_param_out.data(), numel); - }; + auto verifier = + [](const typename KernelTuple::func_type tgt, T beta1, T beta2, T lr, + T eps, T old_lr, T lr_ratio, T coeff, int64_t numel, + const std::vector& grad, const std::vector& mom1, + const std::vector& mom2, const std::vector& param, + const std::vector& ref_mom1_out, const std::vector& ref_mom2_out, + const std::vector& ref_param_out) { + EXPECT_TRUE(tgt != nullptr); + EXPECT_EQ(param.size(), static_cast(numel)); + EXPECT_EQ(grad.size(), static_cast(numel)); + EXPECT_EQ(mom1.size(), static_cast(numel)); + EXPECT_EQ(mom2.size(), static_cast(numel)); + + std::vector jit_mom1_out(ref_mom1_out.size()); + std::vector jit_mom2_out(ref_mom2_out.size()); + std::vector jit_param_out(ref_param_out.size()); + + tgt(beta1, beta2, -lr, eps, old_lr, lr_ratio, coeff, numel, grad.data(), + mom1.data(), mom2.data(), param.data(), jit_mom1_out.data(), + jit_mom2_out.data(), jit_param_out.data()); + + ExpectEQ(ref_mom1_out.data(), jit_mom1_out.data(), numel); + ExpectEQ(ref_mom2_out.data(), jit_mom2_out.data(), numel); + ExpectEQ(ref_param_out.data(), jit_param_out.data(), numel); + }; TestAllImpls( 1, verifier, beta1, beta2, learning_rate, eps, old_lr, lr_ratio, coeff, @@ -988,8 +992,9 @@ void TestKernelSgd() { "and n-1 is %d.", static_cast(upper - lower), n - 1)); PADDLE_ENFORCE_GT( - n, 0, paddle::platform::errors::InvalidArgument( - "The Sgd size should be larger than 0. But the n is %d.", n)); + n, 0, + paddle::platform::errors::InvalidArgument( + "The Sgd size should be larger than 0. But the n is %d.", n)); std::vector all, out; for (int i = 0; i < n; ++i) { all.push_back(i); @@ -1031,11 +1036,12 @@ void TestKernelSgd() { grad_w); } - auto verifier = []( - const typename KernelTuple::func_type tgt, const T lr, - const std::vector& param, const std::vector& grad, - const std::vector& rows, const std::vector& oref, - const typename KernelTuple::attr_type& attr) { + auto verifier = [](const typename KernelTuple::func_type tgt, + const T lr, const std::vector& param, + const std::vector& grad, + const std::vector& rows, + const std::vector& oref, + const typename KernelTuple::attr_type& attr) { EXPECT_TRUE(tgt != nullptr); EXPECT_EQ(param.size(), static_cast(attr.param_height * attr.param_width)); diff --git a/paddle/fluid/operators/kernel_primitives/kernel_primitives.h b/paddle/fluid/operators/kernel_primitives/kernel_primitives.h index 169befc88f2..82de4c82d11 100644 --- a/paddle/fluid/operators/kernel_primitives/kernel_primitives.h +++ b/paddle/fluid/operators/kernel_primitives/kernel_primitives.h @@ -19,4 +19,4 @@ namespace paddle { namespace operators { namespace kernel_primitives = phi::kps; } -} +} // namespace paddle diff --git a/paddle/fluid/operators/kldiv_loss_op.cc b/paddle/fluid/operators/kldiv_loss_op.cc index 67c1942ea0b..8597c21b3ec 100644 --- a/paddle/fluid/operators/kldiv_loss_op.cc +++ b/paddle/fluid/operators/kldiv_loss_op.cc @@ -11,6 +11,7 @@ #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/infermeta/binary.h" diff --git a/paddle/fluid/operators/kldiv_loss_op_npu.cc b/paddle/fluid/operators/kldiv_loss_op_npu.cc index eac181489aa..41499f3f7bf 100644 --- a/paddle/fluid/operators/kldiv_loss_op_npu.cc +++ b/paddle/fluid/operators/kldiv_loss_op_npu.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the Licnse. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" diff --git a/paddle/fluid/operators/kthvalue_op.cc b/paddle/fluid/operators/kthvalue_op.cc index 4c679d30263..1ff9ab796e9 100644 --- a/paddle/fluid/operators/kthvalue_op.cc +++ b/paddle/fluid/operators/kthvalue_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/l1_norm_op.cc b/paddle/fluid/operators/l1_norm_op.cc index ddd0554add5..7a6a28a33c1 100644 --- a/paddle/fluid/operators/l1_norm_op.cc +++ b/paddle/fluid/operators/l1_norm_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/l1_norm_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/label_smooth_op.cc b/paddle/fluid/operators/label_smooth_op.cc index 7e07610db28..e14e6100647 100644 --- a/paddle/fluid/operators/label_smooth_op.cc +++ b/paddle/fluid/operators/label_smooth_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/layer_norm_kernel.cu.h b/paddle/fluid/operators/layer_norm_kernel.cu.h index 0c5946b4ae4..ac20a5962f3 100644 --- a/paddle/fluid/operators/layer_norm_kernel.cu.h +++ b/paddle/fluid/operators/layer_norm_kernel.cu.h @@ -661,7 +661,7 @@ __global__ __launch_bounds__(THREADS_PER_CTA) void fused_ln_bwd_1024_kernel( * output is [1, 1024]. * #blocks: 32 * #threads: 512 -*/ + */ // todo(@limin29): to think if there are better impl strategies template < typename U, typename ScaleT = U, int VecSize = 1, int WARPS_M = 16, @@ -783,16 +783,16 @@ __global__ __launch_bounds__(THREADS_PER_CTA) void ln_bwd_1024_final_kernel( } /* This function support two kinds of computations (only for float and fp16 -* type): -* -* Case-1: compute layer_norm_grad for layernorm op by setting mask_ptr and -* d_dropout_src_ptr to nullptr. Here, d_x_ptr returns the grad of layernorm -* input. -* -* Case-2: compute layer_norm_grad + residual_grad + dropout_grad for -* fused_dropout_residual_layernorm op. Here, dx_ptr returns residual_grad. -* -*/ + * type): + * + * Case-1: compute layer_norm_grad for layernorm op by setting mask_ptr and + * d_dropout_src_ptr to nullptr. Here, d_x_ptr returns the grad of layernorm + * input. + * + * Case-2: compute layer_norm_grad + residual_grad + dropout_grad for + * fused_dropout_residual_layernorm op. Here, dx_ptr returns residual_grad. + * + */ template void ln_bwd_1024_kernel_driver(const phi::GPUContext &dev_ctx, const int rows, @@ -839,19 +839,19 @@ void ln_bwd_1024_kernel_driver(const phi::GPUContext &dev_ctx, const int rows, "To compute fused_dropout_residual_ln grad, d_dropout_src_ptr " "can't be null")); } - fused_ln_bwd_1024_kernel< - true, T, U, ScaleT, MaskType, VecSize, WARPS_M, WARPS_N, - BYTES_PER_LDG><<>>( - rows, epsilon, x_ptr, scale_ptr, mean_ptr, var_ptr, dout_ptr, - dscale_temp_ptr, dbias_temp_ptr, dx_ptr, mask_ptr, factor, - d_dropout_src_ptr); + fused_ln_bwd_1024_kernel + <<>>( + rows, epsilon, x_ptr, scale_ptr, mean_ptr, var_ptr, dout_ptr, + dscale_temp_ptr, dbias_temp_ptr, dx_ptr, mask_ptr, factor, + d_dropout_src_ptr); } else { - fused_ln_bwd_1024_kernel< - false, T, U, ScaleT, MaskType, VecSize, WARPS_M, WARPS_N, - BYTES_PER_LDG><<>>( - rows, epsilon, x_ptr, scale_ptr, mean_ptr, var_ptr, dout_ptr, - dscale_temp_ptr, dbias_temp_ptr, dx_ptr); + fused_ln_bwd_1024_kernel + <<>>( + rows, epsilon, x_ptr, scale_ptr, mean_ptr, var_ptr, dout_ptr, + dscale_temp_ptr, dbias_temp_ptr, dx_ptr); } const int WARPS_M_2 = 16; const int WARPS_N_2 = 1; @@ -873,10 +873,10 @@ void ln_bwd_1024_kernel_driver(const phi::GPUContext &dev_ctx, const int rows, PADDLE_THROW(platform::errors::InvalidArgument( "Only support float and fp16 type")); } else { - ln_bwd_1024_final_kernel< - U, ScaleT, VecSize_2, WARPS_M_2, WARPS_N_2, - BYTES_PER_LDG_2><<>>( - gridx, dscale_temp_ptr, dbias_temp_ptr, dscale_ptr, dbias_ptr); + ln_bwd_1024_final_kernel + <<>>( + gridx, dscale_temp_ptr, dbias_temp_ptr, dscale_ptr, dbias_ptr); } } else { PADDLE_THROW(platform::errors::InvalidArgument( @@ -1387,16 +1387,17 @@ static void LayerNormBackward( if (gradient_flag == 0) return; if (batch_size == 1) { - LayerNormBackwardWhenBatchSizeIsOne<<< - (feature_size + kMaxBlockDim - 1) / kMaxBlockDim, kMaxBlockDim, 0, - stream>>>(x, d_y, d_x, d_scale, d_bias, mean, var, scale, epsilon, - feature_size); + LayerNormBackwardWhenBatchSizeIsOne + <<<(feature_size + kMaxBlockDim - 1) / kMaxBlockDim, kMaxBlockDim, 0, + stream>>>(x, d_y, d_x, d_scale, d_bias, mean, var, scale, epsilon, + feature_size); if (d_x != nullptr) { switch (GetDesiredBlockDim(feature_size)) { - FIXED_BLOCK_DIM_CASE(LayerNormBackwardPostProcessToCalculateDX< - T, U, kBlockDim><<<1, kBlockDim, 0, stream>>>( - x, d_x, mean, var, epsilon, feature_size)); + FIXED_BLOCK_DIM_CASE( + LayerNormBackwardPostProcessToCalculateDX + <<<1, kBlockDim, 0, stream>>>(x, d_x, mean, var, epsilon, + feature_size)); } } return; @@ -1408,9 +1409,9 @@ static void LayerNormBackward( switch (block_dim) { FIXED_BLOCK_DIM_FIXED_BLOCK_NUM_CASE( feature_size, kMaxBlockNum, - LayerNormBackwardGradientScaleOrBias< - T, U, kBlockDim, false, false, - ScaleBiasWithSameTypeX><<>>( + LayerNormBackwardGradientScaleOrBias + <<>>( x, d_y, d_scale, d_bias, d_x, mean, var, scale, epsilon, batch_size, feature_size, col_offset)); } @@ -1419,9 +1420,9 @@ static void LayerNormBackward( switch (block_dim) { FIXED_BLOCK_DIM_FIXED_BLOCK_NUM_CASE( feature_size, kMaxBlockNum, - LayerNormBackwardGradientScaleOrBias< - T, U, kBlockDim, false, true, - ScaleBiasWithSameTypeX><<>>( + LayerNormBackwardGradientScaleOrBias + <<>>( x, d_y, d_scale, d_bias, d_x, mean, var, scale, epsilon, batch_size, feature_size, col_offset)); } @@ -1430,9 +1431,9 @@ static void LayerNormBackward( switch (block_dim) { FIXED_BLOCK_DIM_FIXED_BLOCK_NUM_CASE( feature_size, kMaxBlockNum, - LayerNormBackwardGradientAll< - T, U, kBlockDim, false, - ScaleBiasWithSameTypeX><<>>( + LayerNormBackwardGradientAll + <<>>( x, d_y, d_scale, d_bias, d_x, mean, var, scale, epsilon, batch_size, feature_size, col_offset)); } @@ -1440,9 +1441,9 @@ static void LayerNormBackward( case 4: // d_x != nullptr, d_scale == nullptr, d_bias == nullptr switch (GetDesiredBlockDim(feature_size)) { FIXED_BLOCK_DIM_CASE( - LayerNormBackwardGradientOnlyDX< - T, U, kBlockDim, - ScaleBiasWithSameTypeX><<>>( + LayerNormBackwardGradientOnlyDX + <<>>( x, d_y, d_x, mean, var, scale, epsilon, feature_size)); } break; @@ -1450,34 +1451,34 @@ static void LayerNormBackward( switch (block_dim) { FIXED_BLOCK_DIM_FIXED_BLOCK_NUM_CASE( feature_size, kMaxBlockNum, - LayerNormBackwardGradientScaleOrBias< - T, U, kBlockDim, true, false, - ScaleBiasWithSameTypeX><<>>( + LayerNormBackwardGradientScaleOrBias + <<>>( x, d_y, d_scale, d_bias, d_x, mean, var, scale, epsilon, batch_size, feature_size, col_offset)); } switch (GetDesiredBlockDim(feature_size)) { FIXED_BLOCK_DIM_CASE( - LayerNormBackwardPostProcessToCalculateDX< - T, U, kBlockDim><<>>( - x, d_x, mean, var, epsilon, feature_size)); + LayerNormBackwardPostProcessToCalculateDX + <<>>(x, d_x, mean, var, epsilon, + feature_size)); } break; case 6: // d_x != nullptr, d_scale != nullptr, d_bias == nullptr switch (block_dim) { FIXED_BLOCK_DIM_FIXED_BLOCK_NUM_CASE( feature_size, kMaxBlockNum, - LayerNormBackwardGradientScaleOrBias< - T, U, kBlockDim, true, true, - ScaleBiasWithSameTypeX><<>>( + LayerNormBackwardGradientScaleOrBias + <<>>( x, d_y, d_scale, d_bias, d_x, mean, var, scale, epsilon, batch_size, feature_size, col_offset)); } switch (GetDesiredBlockDim(feature_size)) { FIXED_BLOCK_DIM_CASE( - LayerNormBackwardPostProcessToCalculateDX< - T, U, kBlockDim><<>>( - x, d_x, mean, var, epsilon, feature_size)); + LayerNormBackwardPostProcessToCalculateDX + <<>>(x, d_x, mean, var, epsilon, + feature_size)); } break; case 7: // d_x != nullptr, d_scale != nullptr, d_bias != nullptr @@ -1511,29 +1512,30 @@ static void LayerNormBackward( U *part_grad_gamma = reinterpret_cast(part_grad_gamma_ptr->ptr()); U *part_grad_beta = reinterpret_cast(part_grad_beta_ptr->ptr()); - LayerNormBackwardPartGradGammaBeta< - T, U, BDIMX2, BDIMY2, VPT><<>>( - d_y, x, batch_size, feature_size, mean, var, epsilon, - part_grad_gamma, - part_grad_beta); // compute part_grad_gamma, beta + LayerNormBackwardPartGradGammaBeta + <<>>( + d_y, x, batch_size, feature_size, mean, var, epsilon, + part_grad_gamma, + part_grad_beta); // compute part_grad_gamma, beta constexpr int BDIMX3 = 32; constexpr int BDIMY3 = 8; dim3 threads3(BDIMX3, BDIMY3, 1); const dim3 blocks3((feature_size + BDIMX2 - 1) / BDIMX2, 1, 1); - LayerNormBackwardSumGradGammaBeta< - T, U, BDIMX3, BDIMY3, - ScaleBiasWithSameTypeX><<>>( - part_grad_gamma, part_grad_beta, part_size, batch_size, - feature_size, d_scale, d_bias); + LayerNormBackwardSumGradGammaBeta + <<>>(part_grad_gamma, part_grad_beta, + part_size, batch_size, + feature_size, d_scale, d_bias); constexpr int BDIMX1 = 32; constexpr int BDIMY1 = 4; dim3 threads1(BDIMX1, BDIMY1, 1); - LayerNormBackwardComputeGradInput< - T, U, BDIMX1, BDIMY1, - ScaleBiasWithSameTypeX><<>>( - d_y, x, batch_size, feature_size, mean, var, epsilon, scale, d_x); + LayerNormBackwardComputeGradInput + <<>>(d_y, x, batch_size, + feature_size, mean, var, + epsilon, scale, d_x); #ifdef PADDLE_WITH_CUDA } #endif diff --git a/paddle/fluid/operators/layer_norm_op.cc b/paddle/fluid/operators/layer_norm_op.cc index 224ab748dab..3d1e563ef1a 100644 --- a/paddle/fluid/operators/layer_norm_op.cc +++ b/paddle/fluid/operators/layer_norm_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" #ifdef PADDLE_WITH_MKLDNN diff --git a/paddle/fluid/operators/layer_norm_op_xpu.cc b/paddle/fluid/operators/layer_norm_op_xpu.cc index 3b21a55f8df..a27952c57f7 100644 --- a/paddle/fluid/operators/layer_norm_op_xpu.cc +++ b/paddle/fluid/operators/layer_norm_op_xpu.cc @@ -88,8 +88,9 @@ class LayerNormGradXPUKernel : public framework::OpKernel { auto* dscale_data = (dscale == nullptr ? nullptr : dscale->mutable_data(ctx.GetPlace())); - auto* dbias_data = (dbias == nullptr ? nullptr : dbias->mutable_data( - ctx.GetPlace())); + auto* dbias_data = + (dbias == nullptr ? nullptr + : dbias->mutable_data(ctx.GetPlace())); auto* dx_data = (dx == nullptr ? nullptr : dx->mutable_data(ctx.GetPlace())); auto& dev_ctx = ctx.template device_context(); diff --git a/paddle/fluid/operators/layout_utils.h b/paddle/fluid/operators/layout_utils.h index e304f33d045..f058afdb4ad 100644 --- a/paddle/fluid/operators/layout_utils.h +++ b/paddle/fluid/operators/layout_utils.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/linear_chain_crf_op.h b/paddle/fluid/operators/linear_chain_crf_op.h index 7308363b9fe..a6ef87d43e2 100644 --- a/paddle/fluid/operators/linear_chain_crf_op.h +++ b/paddle/fluid/operators/linear_chain_crf_op.h @@ -28,9 +28,10 @@ static inline T NormalizeL1(T* x, size_t len) { // Right now, we just bet that sum won't be zero. If this really happens, we // will figure out what should be done then. PADDLE_ENFORCE_GT( - sum, 0., platform::errors::InvalidArgument( - "The unnormalized probabilities of all possible unfinished " - "sequences must be greater than 0.")); + sum, 0., + platform::errors::InvalidArgument( + "The unnormalized probabilities of all possible unfinished " + "sequences must be greater than 0.")); T s = 1. / sum; for (size_t i = 0; i < len; ++i) x[i] *= s; return sum; @@ -44,8 +45,8 @@ struct ScalarMul { T scalar; }; -using framework::LoDTensor; using framework::LoD; +using framework::LoDTensor; using framework::Tensor; template diff --git a/paddle/fluid/operators/linspace_op.cc b/paddle/fluid/operators/linspace_op.cc index e9375be1706..5e451d99dbc 100644 --- a/paddle/fluid/operators/linspace_op.cc +++ b/paddle/fluid/operators/linspace_op.cc @@ -77,10 +77,9 @@ REGISTER_OPERATOR( paddle::framework::EmptyGradOpMaker, LinspaceInferShapeFunctor); -REGISTER_OP_VERSION(linspace) - .AddCheckpoint( - R"ROC( +REGISTER_OP_VERSION(linspace).AddCheckpoint( + R"ROC( Upgrade linspace to add a new attribute [dtype]. )ROC", - paddle::framework::compatible::OpVersionDesc().NewAttr( - "dtype", "In order to change output data type ", 5)); + paddle::framework::compatible::OpVersionDesc().NewAttr( + "dtype", "In order to change output data type ", 5)); diff --git a/paddle/fluid/operators/lite/lite_engine_op.cc b/paddle/fluid/operators/lite/lite_engine_op.cc index 7a879c1e216..0ec1c55f7ab 100644 --- a/paddle/fluid/operators/lite/lite_engine_op.cc +++ b/paddle/fluid/operators/lite/lite_engine_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/lite/lite_engine_op.h" + #include #include diff --git a/paddle/fluid/operators/lite/lite_engine_op.h b/paddle/fluid/operators/lite/lite_engine_op.h index 5d2a1683d38..240f6b06325 100644 --- a/paddle/fluid/operators/lite/lite_engine_op.h +++ b/paddle/fluid/operators/lite/lite_engine_op.h @@ -26,11 +26,10 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/inference/analysis/helper.h" -#include "paddle/fluid/platform/device/gpu/gpu_info.h" - #include "paddle/fluid/inference/lite/engine.h" #include "paddle/fluid/inference/lite/tensor_utils.h" #include "paddle/fluid/inference/utils/singleton.h" +#include "paddle/fluid/platform/device/gpu/gpu_info.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/lite/lite_engine_op_test.cc b/paddle/fluid/operators/lite/lite_engine_op_test.cc index 01583cea312..c38386365f3 100644 --- a/paddle/fluid/operators/lite/lite_engine_op_test.cc +++ b/paddle/fluid/operators/lite/lite_engine_op_test.cc @@ -12,6 +12,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "paddle/fluid/operators/lite/lite_engine_op.h" + #include #include "paddle/fluid/framework/block_desc.h" @@ -19,13 +21,12 @@ #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" #include "paddle/fluid/inference/utils/singleton.h" -#include "paddle/fluid/operators/lite/lite_engine_op.h" #include "paddle/fluid/operators/lite/ut_helper.h" USE_NO_KERNEL_OP(lite_engine) -using paddle::inference::lite::AddTensorToBlockDesc; using paddle::inference::lite::AddFetchListToBlockDesc; +using paddle::inference::lite::AddTensorToBlockDesc; using paddle::inference::lite::CreateTensor; using paddle::inference::lite::serialize_params; namespace paddle { diff --git a/paddle/fluid/operators/load_combine_op.cc b/paddle/fluid/operators/load_combine_op.cc index 374bfa73f21..94797b08ade 100644 --- a/paddle/fluid/operators/load_combine_op.cc +++ b/paddle/fluid/operators/load_combine_op.cc @@ -12,11 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/load_combine_op.h" + #include #include -#include "paddle/fluid/operators/load_combine_op.h" - namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/load_op.cc b/paddle/fluid/operators/load_op.cc index ba19aee9b8d..196792707eb 100644 --- a/paddle/fluid/operators/load_op.cc +++ b/paddle/fluid/operators/load_op.cc @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include - #include "paddle/fluid/operators/load_op.h" +#include + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/lod_reset_op.cc b/paddle/fluid/operators/lod_reset_op.cc index 56163096833..616aad2b976 100644 --- a/paddle/fluid/operators/lod_reset_op.cc +++ b/paddle/fluid/operators/lod_reset_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/lod_reset_op.h" + #include #include diff --git a/paddle/fluid/operators/lod_reset_op.h b/paddle/fluid/operators/lod_reset_op.h index 642c8bcd9ae..f6f7155f37c 100644 --- a/paddle/fluid/operators/lod_reset_op.h +++ b/paddle/fluid/operators/lod_reset_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/log_loss_op.cc b/paddle/fluid/operators/log_loss_op.cc index 883e3597d8a..11edbc84a19 100644 --- a/paddle/fluid/operators/log_loss_op.cc +++ b/paddle/fluid/operators/log_loss_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/fluid/operators/log_loss_op_npu.cc b/paddle/fluid/operators/log_loss_op_npu.cc index f103a69707a..99ccad1ca76 100644 --- a/paddle/fluid/operators/log_loss_op_npu.cc +++ b/paddle/fluid/operators/log_loss_op_npu.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" diff --git a/paddle/fluid/operators/log_loss_op_xpu.cc b/paddle/fluid/operators/log_loss_op_xpu.cc index fee1f56ebdc..1ba0a0f3b3d 100644 --- a/paddle/fluid/operators/log_loss_op_xpu.cc +++ b/paddle/fluid/operators/log_loss_op_xpu.cc @@ -11,6 +11,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/device_wrapper.h" namespace paddle { diff --git a/paddle/fluid/operators/log_softmax_op.cc b/paddle/fluid/operators/log_softmax_op.cc index da38f906b9b..95ebeedaf79 100644 --- a/paddle/fluid/operators/log_softmax_op.cc +++ b/paddle/fluid/operators/log_softmax_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/common_infer_shape_functions.h" diff --git a/paddle/fluid/operators/lookup_table_op.cu b/paddle/fluid/operators/lookup_table_op.cu index 29079b8b138..c519e0845f7 100644 --- a/paddle/fluid/operators/lookup_table_op.cu +++ b/paddle/fluid/operators/lookup_table_op.cu @@ -113,26 +113,22 @@ class LookupTableCUDAKernel : public framework::OpKernel { dim3 grids(8, 1); #ifdef PADDLE_WITH_HIP if (padding_idx == -1) - LookupTable< - T, 64, 4, 8, - false><<>>( - output, table, ids, N, K, D, padding_idx); + LookupTable + <<>>( + output, table, ids, N, K, D, padding_idx); else - LookupTable< - T, 64, 4, 8, - true><<>>( - output, table, ids, N, K, D, padding_idx); + LookupTable + <<>>( + output, table, ids, N, K, D, padding_idx); #else if (padding_idx == -1) - LookupTable< - T, 128, 8, 8, - false><<>>( - output, table, ids, N, K, D, padding_idx); + LookupTable + <<>>( + output, table, ids, N, K, D, padding_idx); else - LookupTable< - T, 128, 8, 8, - true><<>>( - output, table, ids, N, K, D, padding_idx); + LookupTable + <<>>( + output, table, ids, N, K, D, padding_idx); #endif // PADDLE_WITH_HIP } }; diff --git a/paddle/fluid/operators/lookup_table_v2_op.cc b/paddle/fluid/operators/lookup_table_v2_op.cc index 48ae080783d..65aeca1e499 100644 --- a/paddle/fluid/operators/lookup_table_v2_op.cc +++ b/paddle/fluid/operators/lookup_table_v2_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/lookup_table_v2_op.h" #include + #include "paddle/fluid/framework/no_need_buffer_vars_inference.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/var_type_inference.h" diff --git a/paddle/fluid/operators/lookup_table_v2_op_npu.cc b/paddle/fluid/operators/lookup_table_v2_op_npu.cc index c2df6dff5b5..c47ea64e24c 100644 --- a/paddle/fluid/operators/lookup_table_v2_op_npu.cc +++ b/paddle/fluid/operators/lookup_table_v2_op_npu.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" diff --git a/paddle/fluid/operators/lookup_table_v2_op_xpu.cc b/paddle/fluid/operators/lookup_table_v2_op_xpu.cc index 521d3ab571e..223bf2cc867 100644 --- a/paddle/fluid/operators/lookup_table_v2_op_xpu.cc +++ b/paddle/fluid/operators/lookup_table_v2_op_xpu.cc @@ -12,11 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/lookup_table_v2_op.h" #include + #include "paddle/fluid/framework/no_need_buffer_vars_inference.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/var_type_inference.h" +#include "paddle/fluid/operators/lookup_table_v2_op.h" #include "paddle/fluid/platform/device/device_wrapper.h" #ifdef PADDLE_WITH_XPU namespace paddle { diff --git a/paddle/fluid/operators/lrn_op.cc b/paddle/fluid/operators/lrn_op.cc index 88d70d9bb7d..17c5f08c66c 100644 --- a/paddle/fluid/operators/lrn_op.cc +++ b/paddle/fluid/operators/lrn_op.cc @@ -13,9 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/lrn_op.h" + #include #include #include + #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/math_function.h" #ifdef PADDLE_WITH_MKLDNN @@ -174,20 +176,23 @@ class LRNOp : public framework::OperatorWithKernel { OP_INOUT_CHECK(ctx->HasOutput("MidOut"), "Output", "MidOut", "LRN"); auto x_dim = ctx->GetInputDim("X"); - PADDLE_ENFORCE_EQ(x_dim.size(), 4, platform::errors::InvalidArgument( - "Input(input) rank should be 4, " - "but received input rank (%d) != 4", - x_dim.size())); + PADDLE_ENFORCE_EQ( + x_dim.size(), 4, + platform::errors::InvalidArgument("Input(input) rank should be 4, " + "but received input rank (%d) != 4", + x_dim.size())); int n = ctx->Attrs().Get("n"); - PADDLE_ENFORCE_GT(n, 0UL, platform::errors::InvalidArgument( - "Argument(n) should be positive, " - "but received n(%d) not greater than 0", - n)); - PADDLE_ENFORCE_EQ(n % 2, 1UL, platform::errors::InvalidArgument( - "Argument(n) should be odd value, " - "but received n(%d) is not an odd value", - n)); + PADDLE_ENFORCE_GT(n, 0UL, + platform::errors::InvalidArgument( + "Argument(n) should be positive, " + "but received n(%d) not greater than 0", + n)); + PADDLE_ENFORCE_EQ(n % 2, 1UL, + platform::errors::InvalidArgument( + "Argument(n) should be odd value, " + "but received n(%d) is not an odd value", + n)); ctx->SetOutputDim("Out", x_dim); ctx->ShareLoD("X", /*->*/ "Out"); diff --git a/paddle/fluid/operators/lrn_op.h b/paddle/fluid/operators/lrn_op.h index f2d72d07405..671055caa16 100644 --- a/paddle/fluid/operators/lrn_op.h +++ b/paddle/fluid/operators/lrn_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" @@ -68,18 +69,21 @@ class LRNKernel : public framework::OpKernel { T beta = ctx.Attr("beta"); T k = ctx.Attr("k"); - PADDLE_ENFORCE_GE(alpha, 0UL, platform::errors::InvalidArgument( - "Argument(alpha) should >= 0.0, " - "but received alpha(%d) less than 0", - alpha)); - PADDLE_ENFORCE_GE(beta, 0UL, platform::errors::InvalidArgument( - "Argument(beta) should >= 0.0, " - "but received beta(%d) less than 0", - beta)); - PADDLE_ENFORCE_GE(k, 0UL, platform::errors::InvalidArgument( - "Argument(k) should >= 0.0, " - "but received k(%d) less than 0", - k)); + PADDLE_ENFORCE_GE( + alpha, 0UL, + platform::errors::InvalidArgument("Argument(alpha) should >= 0.0, " + "but received alpha(%d) less than 0", + alpha)); + PADDLE_ENFORCE_GE( + beta, 0UL, + platform::errors::InvalidArgument("Argument(beta) should >= 0.0, " + "but received beta(%d) less than 0", + beta)); + PADDLE_ENFORCE_GE( + k, 0UL, + platform::errors::InvalidArgument("Argument(k) should >= 0.0, " + "but received k(%d) less than 0", + k)); LRNFunctor f; f(ctx, x, out, mid, N, C, H, W, n, k, alpha, beta, data_layout); diff --git a/paddle/fluid/operators/lstm_op.cc b/paddle/fluid/operators/lstm_op.cc index 2ec9de3e3bb..21a0fce2893 100644 --- a/paddle/fluid/operators/lstm_op.cc +++ b/paddle/fluid/operators/lstm_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/lstm_op.h" + #include #include diff --git a/paddle/fluid/operators/lstm_op.h b/paddle/fluid/operators/lstm_op.h index 4ec3072a96d..1e1aaf3ea53 100644 --- a/paddle/fluid/operators/lstm_op.h +++ b/paddle/fluid/operators/lstm_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/detail/activation_functions.h" @@ -272,9 +273,10 @@ class LSTMGradKernel : public framework::OpKernel { phi::funcs::LoDTensor2BatchFunctor to_batch; - auto ToBatch = [&batch_gate, &to_batch]( - const DeviceContext& ctx, const framework::LoDTensor& src, - const framework::DDim& dims, framework::LoDTensor& dst) { + auto ToBatch = [&batch_gate, &to_batch](const DeviceContext& ctx, + const framework::LoDTensor& src, + const framework::DDim& dims, + framework::LoDTensor& dst) { dst.mutable_data(dims, ctx.GetPlace()); dst.set_lod(batch_gate->lod()); to_batch(ctx, src, &dst, false); diff --git a/paddle/fluid/operators/lstm_unit_op.cc b/paddle/fluid/operators/lstm_unit_op.cc index 917482589fc..235a4bd689b 100644 --- a/paddle/fluid/operators/lstm_unit_op.cc +++ b/paddle/fluid/operators/lstm_unit_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/lstm_unit_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/lstmp_op.cc b/paddle/fluid/operators/lstmp_op.cc index 562f7755591..7ecf294433e 100644 --- a/paddle/fluid/operators/lstmp_op.cc +++ b/paddle/fluid/operators/lstmp_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/lstmp_op.h" + #include #include diff --git a/paddle/fluid/operators/lstmp_op.h b/paddle/fluid/operators/lstmp_op.h index 5d24c0b70d3..5e68259852c 100644 --- a/paddle/fluid/operators/lstmp_op.h +++ b/paddle/fluid/operators/lstmp_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/activation_op.h" @@ -371,9 +372,10 @@ class LSTMPGradKernel : public framework::OpKernel { phi::funcs::LoDTensor2BatchFunctor to_batch; - auto ToBatch = [&batch_gate, &to_batch]( - const DeviceContext& ctx, const framework::LoDTensor& src, - const framework::DDim& dims, framework::LoDTensor& dst) { + auto ToBatch = [&batch_gate, &to_batch](const DeviceContext& ctx, + const framework::LoDTensor& src, + const framework::DDim& dims, + framework::LoDTensor& dst) { dst.mutable_data(dims, ctx.GetPlace()); dst.set_lod(batch_gate->lod()); to_batch(ctx, src, &dst, false); diff --git a/paddle/fluid/operators/lstsq_op.cc b/paddle/fluid/operators/lstsq_op.cc index f060125620f..e093e4d8c01 100644 --- a/paddle/fluid/operators/lstsq_op.cc +++ b/paddle/fluid/operators/lstsq_op.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/operators/lstsq_op.h" + #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { @@ -139,4 +141,4 @@ REGISTER_OPERATOR(lstsq, ops::LstsqOp, ops::LstsqOpMaker) REGISTER_OP_CPU_KERNEL( lstsq, ops::LstsqCPUKernel, - ops::LstsqCPUKernel); \ No newline at end of file + ops::LstsqCPUKernel); diff --git a/paddle/fluid/operators/lstsq_op.cu b/paddle/fluid/operators/lstsq_op.cu index 10e2867bf29..53c78fef7b5 100644 --- a/paddle/fluid/operators/lstsq_op.cu +++ b/paddle/fluid/operators/lstsq_op.cu @@ -17,6 +17,7 @@ #include #include + #include "paddle/fluid/framework/phi_utils.h" #include "paddle/fluid/operators/lstsq_op.h" #include "paddle/fluid/operators/qr_op.h" diff --git a/paddle/fluid/operators/lstsq_op.h b/paddle/fluid/operators/lstsq_op.h index 520722dafcb..7955b3b7df9 100644 --- a/paddle/fluid/operators/lstsq_op.h +++ b/paddle/fluid/operators/lstsq_op.h @@ -15,8 +15,10 @@ #pragma once #include + #include #include + #include "paddle/fluid/operators/eig_op.h" #include "paddle/fluid/operators/math/eigen_values_vectors.h" #include "paddle/fluid/operators/math/matrix_solve.h" diff --git a/paddle/fluid/operators/lu_op.cc b/paddle/fluid/operators/lu_op.cc index fc8673181c4..0894323015e 100644 --- a/paddle/fluid/operators/lu_op.cc +++ b/paddle/fluid/operators/lu_op.cc @@ -45,8 +45,9 @@ class LUOp : public framework::OperatorWithKernel { bool pivots = context->Attrs().Get("pivots"); auto x_dims = context->GetInputDim("X"); int x_rank = x_dims.size(); - PADDLE_ENFORCE_GE(x_rank, 2, platform::errors::InvalidArgument( - "the rank of input must greater than 2")); + PADDLE_ENFORCE_GE(x_rank, 2, + platform::errors::InvalidArgument( + "the rank of input must greater than 2")); context->SetOutputDim("Out", x_dims); int m = x_dims[x_rank - 1]; int n = x_dims[x_rank - 2]; diff --git a/paddle/fluid/operators/lu_unpack_op.cc b/paddle/fluid/operators/lu_unpack_op.cc index e38a4703f64..e3b4263b4ff 100644 --- a/paddle/fluid/operators/lu_unpack_op.cc +++ b/paddle/fluid/operators/lu_unpack_op.cc @@ -53,8 +53,9 @@ class LU_UnpackOp : public framework::OperatorWithKernel { auto x_dims = context->GetInputDim("X"); int x_rank = x_dims.size(); - PADDLE_ENFORCE_GE(x_rank, 2, platform::errors::InvalidArgument( - "the rank of input must greater than 2")); + PADDLE_ENFORCE_GE(x_rank, 2, + platform::errors::InvalidArgument( + "the rank of input must greater than 2")); // context->SetOutputDim("Out", x_dims); int m = x_dims[x_rank - 1]; diff --git a/paddle/fluid/operators/margin_cross_entropy_op.cu b/paddle/fluid/operators/margin_cross_entropy_op.cu index fd5ba1952ca..1cef3705973 100644 --- a/paddle/fluid/operators/margin_cross_entropy_op.cu +++ b/paddle/fluid/operators/margin_cross_entropy_op.cu @@ -20,6 +20,7 @@ namespace cub = hipcub; #endif #include + #include "paddle/fluid/operators/amp/fp16_type_traits.h" #include "paddle/fluid/operators/margin_cross_entropy_op.h" #include "paddle/fluid/operators/math/softmax_impl.h" @@ -298,16 +299,16 @@ class MarginCrossEntropyOpCUDAKernel : public framework::OpKernel { // save match_logits, used for gradient computation. if (label_type == framework::proto::VarType::INT32) { typedef int32_t LabelT; - AddMarginToPositiveLogitsKernel< - T><<>>( - logits_ptr, labels->data(), margin1, margin2, margin3, rank, - nranks, N, D, class_interval.data()); + AddMarginToPositiveLogitsKernel + <<>>( + logits_ptr, labels->data(), margin1, margin2, margin3, + rank, nranks, N, D, class_interval.data()); } else if (label_type == framework::proto::VarType::INT64) { typedef int64_t LabelT; - AddMarginToPositiveLogitsKernel< - T><<>>( - logits_ptr, labels->data(), margin1, margin2, margin3, rank, - nranks, N, D, class_interval.data()); + AddMarginToPositiveLogitsKernel + <<>>( + logits_ptr, labels->data(), margin1, margin2, margin3, + rank, nranks, N, D, class_interval.data()); } else { PADDLE_THROW(platform::errors::Unimplemented( "margin_cross_entropy label type noly support int32 and int64, " @@ -386,9 +387,9 @@ class MarginCrossEntropyOpCUDAKernel : public framework::OpKernel { #endif // step 5, (logit - logit_max) - log(sum(exp(logit - logit_max))) - LogitsMinusLogSumKernel< - T><<>>( - logits_ptr, sum_exp_logits_buff, N, D); + LogitsMinusLogSumKernel + <<>>( + logits_ptr, sum_exp_logits_buff, N, D); // step 6, prob = exp((logit - logit_max) - log(sum(exp(logit - // logit_max)))) @@ -397,16 +398,16 @@ class MarginCrossEntropyOpCUDAKernel : public framework::OpKernel { dev_ctx, loss, static_cast(0.0)); if (label_type == framework::proto::VarType::INT32) { typedef int32_t LabelT; - HardLabelSoftmaxWithCrossEntropyKernel< - T, LabelT><<>>( - loss_ptr, logits_ptr, labels->data(), rank, N, D, - class_interval.data()); + HardLabelSoftmaxWithCrossEntropyKernel + <<>>( + loss_ptr, logits_ptr, labels->data(), rank, N, D, + class_interval.data()); } else if (label_type == framework::proto::VarType::INT64) { typedef int64_t LabelT; - HardLabelSoftmaxWithCrossEntropyKernel< - T, LabelT><<>>( - loss_ptr, logits_ptr, labels->data(), rank, N, D, - class_interval.data()); + HardLabelSoftmaxWithCrossEntropyKernel + <<>>( + loss_ptr, logits_ptr, labels->data(), rank, N, D, + class_interval.data()); } #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) diff --git a/paddle/fluid/operators/margin_rank_loss_op.cc b/paddle/fluid/operators/margin_rank_loss_op.cc index b4ff8b6d8dc..31055002993 100644 --- a/paddle/fluid/operators/margin_rank_loss_op.cc +++ b/paddle/fluid/operators/margin_rank_loss_op.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/margin_rank_loss_op.h" + #include + #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/fluid/operators/marker_op.cu b/paddle/fluid/operators/marker_op.cu index cfa5c6dc7a9..fe61aefe0bb 100644 --- a/paddle/fluid/operators/marker_op.cu +++ b/paddle/fluid/operators/marker_op.cu @@ -48,8 +48,8 @@ class MarkerOpCUDAKernel : public framework::OpKernel { "MarkerCUDA", "marker_" + marker_role + "_" + marker_pos, platform::TracerEventType::OperatorInner, 1, platform::EventRole::kInnerOp); - SimpleMarkerKernel<<<1, 32, 0, dev_ctx.stream()>>>(in_temp, out_temp, - 32); + SimpleMarkerKernel + <<<1, 32, 0, dev_ctx.stream()>>>(in_temp, out_temp, 32); } }; diff --git a/paddle/fluid/operators/match_matrix_tensor_op.cc b/paddle/fluid/operators/match_matrix_tensor_op.cc index d32ab65509e..2ae4fbdbe10 100644 --- a/paddle/fluid/operators/match_matrix_tensor_op.cc +++ b/paddle/fluid/operators/match_matrix_tensor_op.cc @@ -12,13 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/match_matrix_tensor_op.h" + #include #include #include #include #include -#include "paddle/fluid/operators/match_matrix_tensor_op.h" #include "paddle/fluid/operators/search_compute.h" namespace paddle { diff --git a/paddle/fluid/operators/math.h b/paddle/fluid/operators/math.h index d4b9e35bcce..47281fb0280 100644 --- a/paddle/fluid/operators/math.h +++ b/paddle/fluid/operators/math.h @@ -14,11 +14,10 @@ #pragma once +#include "math.h" // NOLINT #include "paddle/fluid/platform/float16.h" #include "paddle/phi/core/hostdevice.h" -#include "math.h" // NOLINT - namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/beam_search.cu b/paddle/fluid/operators/math/beam_search.cu index 486979aa0a8..7a21f2f6497 100644 --- a/paddle/fluid/operators/math/beam_search.cu +++ b/paddle/fluid/operators/math/beam_search.cu @@ -348,11 +348,10 @@ class BeamSearchFunctor { float* selected_scores_data = selected_scores->mutable_data(selected_dims, context.GetPlace()); int* parent_idx_data = - parent_idx - ? parent_idx->mutable_data( - {static_cast(num_seqs * beam_size)}, - context.GetPlace()) - : nullptr; + parent_idx ? parent_idx->mutable_data( + {static_cast(num_seqs * beam_size)}, + context.GetPlace()) + : nullptr; framework::LoD selected_lod(2); selected_lod[0].assign(abs_lod[level].begin(), abs_lod[level].end()); @@ -369,8 +368,8 @@ class BeamSearchFunctor { static_cast(beam_size)); switch (platform::RoundToPowerOfTwo(beam_size * seq_width)) { CUDA_LAUNCH_KERNEL_HELPER( - BeamSearchKernelSingle<<< - 1, kMaxThreadsPerSeq, 0, context.stream()>>>( + BeamSearchKernelSingle + <<<1, kMaxThreadsPerSeq, 0, context.stream()>>>( selected_ids_data, selected_scores_data, parent_idx_data, selected_offsets, pre_ids_data, pre_scores_data, ids_data, scores_data, seq_length, static_cast(seq_width), @@ -387,8 +386,8 @@ class BeamSearchFunctor { static_cast(beam_size)); switch (platform::RoundToPowerOfTwo(beam_size * num_seqs * 32)) { CUDA_LAUNCH_KERNEL_HELPER( - BeamSearchKernel<<< - 1, num_seqs * kMaxThreadsPerSeq, 0, context.stream()>>>( + BeamSearchKernel + <<<1, num_seqs * kMaxThreadsPerSeq, 0, context.stream()>>>( selected_ids_data, selected_scores_data, parent_idx_data, selected_offsets, pre_ids_data, pre_scores_data, ids_data, scores_data, seq_offsets, static_cast(num_seqs), diff --git a/paddle/fluid/operators/math/beam_search.h b/paddle/fluid/operators/math/beam_search.h index 4474e7ea52a..c0d39aa2d8f 100644 --- a/paddle/fluid/operators/math/beam_search.h +++ b/paddle/fluid/operators/math/beam_search.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/platform/device_context.h" diff --git a/paddle/fluid/operators/math/beam_search_test.cc b/paddle/fluid/operators/math/beam_search_test.cc index b0547ef9d95..7cf4c867db7 100644 --- a/paddle/fluid/operators/math/beam_search_test.cc +++ b/paddle/fluid/operators/math/beam_search_test.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/math/beam_search.h" #include + #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/operators/math/bert_encoder_functor.cu b/paddle/fluid/operators/math/bert_encoder_functor.cu index 0cdad6beeb9..4aba6f3c0b9 100644 --- a/paddle/fluid/operators/math/bert_encoder_functor.cu +++ b/paddle/fluid/operators/math/bert_encoder_functor.cu @@ -815,23 +815,23 @@ void SkipLayerNormFunctor::operator()(const int num, const int hidden, const int threads = 256; if (hidden % 2 == 0) { if (std::is_same::value) { - SkipLayerNormKernel2<<>>( - num, hidden / 2, reinterpret_cast(input1), - reinterpret_cast(input2), - reinterpret_cast(output), - reinterpret_cast(scale), - reinterpret_cast(bias), eps); + SkipLayerNormKernel2 + <<>>( + num, hidden / 2, reinterpret_cast(input1), + reinterpret_cast(input2), + reinterpret_cast(output), + reinterpret_cast(scale), + reinterpret_cast(bias), eps); // HIP defined __HIP_NO_HALF_CONVERSIONS__ in hip.cmake #ifndef __HIPCC__ } else if (std::is_same::value) { - SkipLayerNormKernel2<__half, __half2, - threads><<>>( - num, hidden / 2, reinterpret_cast(input1), - reinterpret_cast(input2), - reinterpret_cast<__half2 *>(output), - reinterpret_cast(scale), - reinterpret_cast(bias), eps); + SkipLayerNormKernel2<__half, __half2, threads> + <<>>( + num, hidden / 2, reinterpret_cast(input1), + reinterpret_cast(input2), + reinterpret_cast<__half2 *>(output), + reinterpret_cast(scale), + reinterpret_cast(bias), eps); #endif } else { assert(false); diff --git a/paddle/fluid/operators/math/bert_encoder_functor.h b/paddle/fluid/operators/math/bert_encoder_functor.h index 683606ec733..fd40ac540bf 100644 --- a/paddle/fluid/operators/math/bert_encoder_functor.h +++ b/paddle/fluid/operators/math/bert_encoder_functor.h @@ -17,10 +17,12 @@ limitations under the License. */ #ifdef PADDLE_WITH_CUDA #include #include + #include // NOLINT #endif #ifdef PADDLE_WITH_HIP #include + #include namespace cub = hipcub; #endif diff --git a/paddle/fluid/operators/math/bloomfilter.h b/paddle/fluid/operators/math/bloomfilter.h index fa3d37ed5f4..f16fdd135b5 100644 --- a/paddle/fluid/operators/math/bloomfilter.h +++ b/paddle/fluid/operators/math/bloomfilter.h @@ -16,11 +16,9 @@ limitations under the License. */ #define BLOOMFILTER_MAGIC_NUM_NEW 17070416 #include -#include - #include +#include #include - #include namespace paddle { diff --git a/paddle/fluid/operators/math/concat_and_split.cu b/paddle/fluid/operators/math/concat_and_split.cu index e51631385eb..1ea8cafd25e 100644 --- a/paddle/fluid/operators/math/concat_and_split.cu +++ b/paddle/fluid/operators/math/concat_and_split.cu @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/concat_and_split.h" - #include "paddle/phi/kernels/funcs/concat_and_split_functor.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/math/concat_and_split.h b/paddle/fluid/operators/math/concat_and_split.h index b5b0aae23ac..3b6a12e2402 100644 --- a/paddle/fluid/operators/math/concat_and_split.h +++ b/paddle/fluid/operators/math/concat_and_split.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/lod_tensor.h" diff --git a/paddle/fluid/operators/math/concat_test.cc b/paddle/fluid/operators/math/concat_test.cc index de358bf623e..542dcda963a 100644 --- a/paddle/fluid/operators/math/concat_test.cc +++ b/paddle/fluid/operators/math/concat_test.cc @@ -119,13 +119,13 @@ void ConcatCase1(DeviceContext* context) { } /** - * case 2: - * inputs: - * t_a.shape: [2, 3, 4] - * t_b.shape: [2, 4, 4] - * output: - * out.shape: [2, 7, 4] - */ + * case 2: + * inputs: + * t_a.shape: [2, 3, 4] + * t_b.shape: [2, 4, 4] + * output: + * out.shape: [2, 7, 4] + */ template void ConcatCase2(DeviceContext* context) { paddle::framework::Tensor input_a_cpu; @@ -222,13 +222,13 @@ void ConcatCase2(DeviceContext* context) { } /** - * case 3: - * inputs: - * t_a.shape: [2, 3, 5] - * t_b.shape: [2, 3, 4] - * output: - * out.shape: [2, 3, 9] - */ + * case 3: + * inputs: + * t_a.shape: [2, 3, 5] + * t_b.shape: [2, 3, 4] + * output: + * out.shape: [2, 3, 9] + */ template void ConcatCase3(DeviceContext* context) { paddle::framework::Tensor input_a_cpu; @@ -326,14 +326,14 @@ void ConcatCase3(DeviceContext* context) { } /** - * case 4: - * inputs: - * axis = 1 - * t_a.shape: [2, 3, 4] - * t_b.shape: [2, 3, 4] - * output: - * out.shape: [2, 6, 4] - */ + * case 4: + * inputs: + * axis = 1 + * t_a.shape: [2, 3, 4] + * t_b.shape: [2, 3, 4] + * output: + * out.shape: [2, 6, 4] + */ template void ConcatCase4(DeviceContext* context) { paddle::framework::Tensor input_a_cpu; diff --git a/paddle/fluid/operators/math/cross_entropy.cc b/paddle/fluid/operators/math/cross_entropy.cc index cb2f59182c1..a2b83f99856 100644 --- a/paddle/fluid/operators/math/cross_entropy.cc +++ b/paddle/fluid/operators/math/cross_entropy.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/cross_entropy.h" + #include "paddle/fluid/framework/convert_utils.h" #include "paddle/phi/backends/cpu/cpu_context.h" diff --git a/paddle/fluid/operators/math/cross_entropy.h b/paddle/fluid/operators/math/cross_entropy.h index da7340e4eb0..e562816d6da 100644 --- a/paddle/fluid/operators/math/cross_entropy.h +++ b/paddle/fluid/operators/math/cross_entropy.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/phi/core/hostdevice.h" diff --git a/paddle/fluid/operators/math/eigen_values_vectors.h b/paddle/fluid/operators/math/eigen_values_vectors.h index 1ade2190bb9..22ce162a44c 100644 --- a/paddle/fluid/operators/math/eigen_values_vectors.h +++ b/paddle/fluid/operators/math/eigen_values_vectors.h @@ -42,9 +42,10 @@ static void CheckEighResult(const int batch, const int info) { "tridiagonal form did not converge to zero", batch, info)); PADDLE_ENFORCE_GE( - info, 0, platform::errors::PreconditionNotMet( - "For batch [%d]: the [%d] argument had an illegal value", - batch, info)); + info, 0, + platform::errors::PreconditionNotMet( + "For batch [%d]: the [%d] argument had an illegal value", batch, + info)); } template diff --git a/paddle/fluid/operators/math/gru_compute.cu b/paddle/fluid/operators/math/gru_compute.cu index 9b03895cdef..946a1477c3b 100644 --- a/paddle/fluid/operators/math/gru_compute.cu +++ b/paddle/fluid/operators/math/gru_compute.cu @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/operators/math/detail/gru_gpu_kernel.h" #include "paddle/fluid/operators/math/detail/gru_kernel.h" #include "paddle/fluid/operators/math/gru_compute.h" @@ -36,35 +37,35 @@ struct GRUUnitFunctor { int frame_blocks = (frame_size * 2 + tiled_size - 1) / tiled_size; threads = dim3(tiled_size, 1); grid = dim3(frame_blocks, 1); - detail::KeFastCollectiveGruGate< - T, tiled_size><<>>( - value.gate_value, value.prev_out_value, value.gate_weight, - value.reset_output_value, frame_size, active_gate); + detail::KeFastCollectiveGruGate + <<>>( + value.gate_value, value.prev_out_value, value.gate_weight, + value.reset_output_value, frame_size, active_gate); frame_blocks = (frame_size + tiled_size - 1) / tiled_size; grid = dim3(frame_blocks, 1); - detail::KeFastCollectiveGruOut< - T, tiled_size><<>>( - value.state_weight, value.prev_out_value, value.output_value, - value.gate_value, value.reset_output_value, frame_size, - active_node, origin_mode); + detail::KeFastCollectiveGruOut + <<>>( + value.state_weight, value.prev_out_value, value.output_value, + value.gate_value, value.reset_output_value, frame_size, + active_node, origin_mode); } else { constexpr int tiled_size = 16; int frame_blocks = (frame_size * 2 + tiled_size - 1) / tiled_size; threads = dim3(tiled_size, 1); grid = dim3(frame_blocks, 1); - detail::KeFastCollectiveGruGate< - T, tiled_size><<>>( - value.gate_value, value.prev_out_value, value.gate_weight, - value.reset_output_value, frame_size, active_gate); + detail::KeFastCollectiveGruGate + <<>>( + value.gate_value, value.prev_out_value, value.gate_weight, + value.reset_output_value, frame_size, active_gate); frame_blocks = (frame_size + tiled_size - 1) / tiled_size; grid = dim3(frame_blocks, 1); - detail::KeFastCollectiveGruOut< - T, tiled_size><<>>( - value.state_weight, value.prev_out_value, value.output_value, - value.gate_value, value.reset_output_value, frame_size, - active_node, origin_mode); + detail::KeFastCollectiveGruOut + <<>>( + value.state_weight, value.prev_out_value, value.output_value, + value.gate_value, value.reset_output_value, frame_size, + active_node, origin_mode); } return; } else { @@ -86,18 +87,18 @@ struct GRUUnitFunctor { if (batch_size == 1) { detail::KeGruForwardResetOutput, - /* is_batch= */ false, - T><<>>( - detail::forward::gru_resetOutput(), value.gate_value, - value.reset_output_value, value.prev_out_value, frame_size, - batch_size, active_gate); + /* is_batch= */ false, T> + <<>>( + detail::forward::gru_resetOutput(), value.gate_value, + value.reset_output_value, value.prev_out_value, frame_size, + batch_size, active_gate); } else { detail::KeGruForwardResetOutput, - /* is_batch= */ true, - T><<>>( - detail::forward::gru_resetOutput(), value.gate_value, - value.reset_output_value, value.prev_out_value, frame_size, - batch_size, active_gate); + /* is_batch= */ true, T> + <<>>( + detail::forward::gru_resetOutput(), value.gate_value, + value.reset_output_value, value.prev_out_value, frame_size, + batch_size, active_gate); } if (value.prev_out_value) { @@ -109,18 +110,18 @@ struct GRUUnitFunctor { if (batch_size == 1) { detail::KeGruForwardFinalOutput, - /* is_batch= */ false, - T><<>>( - detail::forward::gru_finalOutput(), value.gate_value, - value.prev_out_value, value.output_value, frame_size, batch_size, - active_node, origin_mode); + /* is_batch= */ false, T> + <<>>(detail::forward::gru_finalOutput(), + value.gate_value, value.prev_out_value, + value.output_value, frame_size, + batch_size, active_node, origin_mode); } else { detail::KeGruForwardFinalOutput, - /* is_batch= */ true, - T><<>>( - detail::forward::gru_finalOutput(), value.gate_value, - value.prev_out_value, value.output_value, frame_size, batch_size, - active_node, origin_mode); + /* is_batch= */ true, T> + <<>>(detail::forward::gru_finalOutput(), + value.gate_value, value.prev_out_value, + value.output_value, frame_size, + batch_size, active_node, origin_mode); } } }; @@ -147,19 +148,21 @@ struct GRUUnitGradFunctor { } if (batch_size == 1) { - detail::KeGruBackwardStateGrad< - detail::backward::gru_stateGrad, - /* is_batch= */ false><<>>( - detail::backward::gru_stateGrad(), value.gate_value, - grad.gate_grad, value.prev_out_value, grad.prev_out_grad, - grad.output_grad, frame_size, batch_size, active_node, origin_mode); + detail::KeGruBackwardStateGrad, + /* is_batch= */ false> + <<>>( + detail::backward::gru_stateGrad(), value.gate_value, + grad.gate_grad, value.prev_out_value, grad.prev_out_grad, + grad.output_grad, frame_size, batch_size, active_node, + origin_mode); } else { - detail::KeGruBackwardStateGrad< - detail::backward::gru_stateGrad, - /* is_batch= */ true><<>>( - detail::backward::gru_stateGrad(), value.gate_value, - grad.gate_grad, value.prev_out_value, grad.prev_out_grad, - grad.output_grad, frame_size, batch_size, active_node, origin_mode); + detail::KeGruBackwardStateGrad, + /* is_batch= */ true> + <<>>( + detail::backward::gru_stateGrad(), value.gate_value, + grad.gate_grad, value.prev_out_value, grad.prev_out_grad, + grad.output_grad, frame_size, batch_size, active_node, + origin_mode); } auto blas = phi::funcs::GetBlas(context); @@ -179,19 +182,19 @@ struct GRUUnitGradFunctor { } if (batch_size == 1) { - detail::KeGruBackwardResetGrad< - detail::backward::gru_resetGrad, - /* is_batch= */ false><<>>( - detail::backward::gru_resetGrad(), value.gate_value, - grad.gate_grad, value.prev_out_value, grad.prev_out_grad, - grad.reset_output_grad, frame_size, batch_size, active_gate); + detail::KeGruBackwardResetGrad, + /* is_batch= */ false> + <<>>( + detail::backward::gru_resetGrad(), value.gate_value, + grad.gate_grad, value.prev_out_value, grad.prev_out_grad, + grad.reset_output_grad, frame_size, batch_size, active_gate); } else { - detail::KeGruBackwardResetGrad< - detail::backward::gru_resetGrad, - /* is_batch= */ true><<>>( - detail::backward::gru_resetGrad(), value.gate_value, - grad.gate_grad, value.prev_out_value, grad.prev_out_grad, - grad.reset_output_grad, frame_size, batch_size, active_gate); + detail::KeGruBackwardResetGrad, + /* is_batch= */ true> + <<>>( + detail::backward::gru_resetGrad(), value.gate_value, + grad.gate_grad, value.prev_out_value, grad.prev_out_grad, + grad.reset_output_grad, frame_size, batch_size, active_gate); } if (grad.prev_out_grad && value.prev_out_value) { diff --git a/paddle/fluid/operators/math/im2col.cc b/paddle/fluid/operators/math/im2col.cc index 8fc6c52122a..1f5f575c7c3 100644 --- a/paddle/fluid/operators/math/im2col.cc +++ b/paddle/fluid/operators/math/im2col.cc @@ -111,16 +111,18 @@ class Col2ImFunctor #include + #include "paddle/fluid/operators/math/im2col.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" @@ -220,16 +221,18 @@ class Col2ImFunctor + #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/device_context.h" diff --git a/paddle/fluid/operators/math/im2col_cfo_cpu.h b/paddle/fluid/operators/math/im2col_cfo_cpu.h index 01f1e220e65..f3755653f28 100644 --- a/paddle/fluid/operators/math/im2col_cfo_cpu.h +++ b/paddle/fluid/operators/math/im2col_cfo_cpu.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/tensor.h" namespace paddle { diff --git a/paddle/fluid/operators/math/im2col_test.cc b/paddle/fluid/operators/math/im2col_test.cc index 0e4032986cf..ff766cfad2c 100644 --- a/paddle/fluid/operators/math/im2col_test.cc +++ b/paddle/fluid/operators/math/im2col_test.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/im2col.h" + #include + #include "paddle/fluid/operators/math/im2col_cfo_cpu.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/operators/math/inclusive_scan.h b/paddle/fluid/operators/math/inclusive_scan.h index b77e2345036..bd170b67404 100644 --- a/paddle/fluid/operators/math/inclusive_scan.h +++ b/paddle/fluid/operators/math/inclusive_scan.h @@ -24,6 +24,7 @@ namespace cub = hipcub; #include #include + #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/platform/enforce.h" @@ -196,15 +197,15 @@ static void InclusiveScanInnerDim(const T *x, T *y, size_t outer_dim, grid_dim = std::min(grid_dim, dev_ctx.GetCUDAMaxGridDimSize()[0]); dim3 thread_dims(kThreadNumX, kThreadNumY); if (reverse) { - InclusiveScanInnerDimCUDAKernel< - T, BinaryOp, kThreadNumX, kThreadNumY, - /*kReverse=*/true><<>>( - x, y, outer_dim, inner_dim, init, op); + InclusiveScanInnerDimCUDAKernel + <<>>(x, y, outer_dim, + inner_dim, init, op); } else { - InclusiveScanInnerDimCUDAKernel< - T, BinaryOp, kThreadNumX, kThreadNumY, - /*kReverse=*/false><<>>( - x, y, outer_dim, inner_dim, init, op); + InclusiveScanInnerDimCUDAKernel + <<>>(x, y, outer_dim, + inner_dim, init, op); } } diff --git a/paddle/fluid/operators/math/math_function.cc b/paddle/fluid/operators/math/math_function.cc index 083d6967ff0..a3c1d23e89b 100644 --- a/paddle/fluid/operators/math/math_function.cc +++ b/paddle/fluid/operators/math/math_function.cc @@ -25,6 +25,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/operators/math/math_function_impl.h" #include "paddle/fluid/platform/bfloat16.h" diff --git a/paddle/fluid/operators/math/matrix_bit_code.h b/paddle/fluid/operators/math/matrix_bit_code.h index 359552a0717..1d6afa50cc9 100644 --- a/paddle/fluid/operators/math/matrix_bit_code.h +++ b/paddle/fluid/operators/math/matrix_bit_code.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/selected_rows_utils.h" diff --git a/paddle/fluid/operators/math/matrix_solve.cc b/paddle/fluid/operators/math/matrix_solve.cc index 7b239b81666..f2b083b8337 100644 --- a/paddle/fluid/operators/math/matrix_solve.cc +++ b/paddle/fluid/operators/math/matrix_solve.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/matrix_solve.h" + #include "Eigen/Core" #include "Eigen/LU" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/fluid/operators/math/matrix_solve.cu.cc b/paddle/fluid/operators/math/matrix_solve.cu.cc index 737196dde1d..59c8c07e6e1 100644 --- a/paddle/fluid/operators/math/matrix_solve.cu.cc +++ b/paddle/fluid/operators/math/matrix_solve.cu.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/matrix_solve.h" + #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/solve_op.h" #include "paddle/fluid/platform/device_context.h" diff --git a/paddle/fluid/operators/math/matrix_solve.h b/paddle/fluid/operators/math/matrix_solve.h index 415d0c6dd8e..cecc3517934 100644 --- a/paddle/fluid/operators/math/matrix_solve.h +++ b/paddle/fluid/operators/math/matrix_solve.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "Eigen/Core" #include "Eigen/LU" #include "paddle/fluid/framework/tensor.h" diff --git a/paddle/fluid/operators/math/sample_prob.cu b/paddle/fluid/operators/math/sample_prob.cu index f86eb103449..1ae0c709e4d 100644 --- a/paddle/fluid/operators/math/sample_prob.cu +++ b/paddle/fluid/operators/math/sample_prob.cu @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include #include + #include #include diff --git a/paddle/fluid/operators/math/sampler.cc b/paddle/fluid/operators/math/sampler.cc index 5f1cd259416..d645e1994f1 100644 --- a/paddle/fluid/operators/math/sampler.cc +++ b/paddle/fluid/operators/math/sampler.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/math/sampler.h" #include + #include "paddle/fluid/framework/generator.h" namespace paddle { diff --git a/paddle/fluid/operators/math/selected_rows_functor.cc b/paddle/fluid/operators/math/selected_rows_functor.cc index e4b033b6c58..7689c31838d 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cc +++ b/paddle/fluid/operators/math/selected_rows_functor.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/selected_rows_functor.h" + #include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/platform/device/device_wrapper.h" diff --git a/paddle/fluid/operators/math/selected_rows_functor.cu b/paddle/fluid/operators/math/selected_rows_functor.cu index db5c66d3197..edcb21cb56a 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cu +++ b/paddle/fluid/operators/math/selected_rows_functor.cu @@ -163,10 +163,10 @@ struct SelectedRowsAddTensor { dim3 threads(block_size, 1); dim3 grid(in1_rows.size(), 1); paddle::framework::MixVector mixv_in1_rows(&in1_rows); - SelectedRowsAddTensorKernel< - T, block_size><<>>( - in1_data, mixv_in1_rows.CUDAData(context.GetPlace()), out_data, - in1_row_numel); + SelectedRowsAddTensorKernel + <<>>( + in1_data, mixv_in1_rows.CUDAData(context.GetPlace()), out_data, + in1_row_numel); auto out_eigen = framework::EigenVector::Flatten(*output); auto in2_eigen = framework::EigenVector::Flatten(input2); @@ -223,10 +223,10 @@ struct SelectedRowsAddTensor { dim3 threads(block_size, 1); dim3 grid(in1_rows.size(), 1); paddle::framework::MixVector mixv_in1_rows(&in1_rows); - SelectedRowsAddTensorKernel< - T, block_size><<>>( - in1_data, mixv_in1_rows.CUDAData(context.GetPlace()), out_data, - in1_row_numel); + SelectedRowsAddTensorKernel + <<>>( + in1_data, mixv_in1_rows.CUDAData(context.GetPlace()), out_data, + in1_row_numel); auto out_eigen = framework::EigenVector::Flatten(*output); auto in2_eigen = framework::EigenVector::Flatten(input2); @@ -343,10 +343,10 @@ struct SelectedRowsAddToTensor { dim3 threads(block_size, 1); dim3 grid(in1_rows.size(), 1); paddle::framework::MixVector mixv_in1_rows(&in1_rows); - SelectedRowsAddToTensorKernel< - T, block_size><<>>( - in1_data, mixv_in1_rows.CUDAData(context.GetPlace()), in2_data, - in1_row_numel); + SelectedRowsAddToTensorKernel + <<>>( + in1_data, mixv_in1_rows.CUDAData(context.GetPlace()), in2_data, + in1_row_numel); } }; @@ -380,10 +380,10 @@ struct SelectedRowsAddToTensor { dim3 threads(block_size, 1); dim3 grid(in1_rows.size(), 1); paddle::framework::MixVector mixv_in1_rows(&in1_rows); - SelectedRowsAddToTensorKernel< - T, block_size><<>>( - in1_data, mixv_in1_rows.CUDAData(context.GetPlace()), in2_data, - in1_row_numel); + SelectedRowsAddToTensorKernel + <<>>( + in1_data, mixv_in1_rows.CUDAData(context.GetPlace()), in2_data, + in1_row_numel); } }; @@ -695,9 +695,9 @@ struct UpdateToTensor { dim3 threads(platform::PADDLE_CUDA_NUM_THREADS, 1); dim3 grid(in1_rows.size(), 1); - UpdateToTensorKernel<<< - grid, threads, 0, context.stream()>>>(in1_data, in1_rows.cuda_data(), - op, in2_data, in1_row_numel); + UpdateToTensorKernel + <<>>(in1_data, in1_rows.cuda_data(), + op, in2_data, in1_row_numel); } }; } // namespace scatter diff --git a/paddle/fluid/operators/math/selected_rows_functor_test.cc b/paddle/fluid/operators/math/selected_rows_functor_test.cc index e0e28f93f36..e6358cda274 100644 --- a/paddle/fluid/operators/math/selected_rows_functor_test.cc +++ b/paddle/fluid/operators/math/selected_rows_functor_test.cc @@ -457,8 +457,9 @@ TEST(selected_rows_functor, cpu_sum_to) { paddle::operators::math::SelectedRowsSumTo sum_to_functor; - sum_to_functor(ctx, std::vector( - {selected_rows1.get(), selected_rows2.get()}), + sum_to_functor(ctx, + std::vector( + {selected_rows1.get(), selected_rows2.get()}), std::vector({0, in1_value->numel()}), output.get()); auto out_height = output->height(); EXPECT_EQ(out_height, height); diff --git a/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc b/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc index 0912a964792..6e1d0bb3670 100644 --- a/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc +++ b/paddle/fluid/operators/math/selected_rows_functor_test.cu.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/selected_rows_functor.h" + #include "gtest/gtest.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/math/sequence_padding.cc b/paddle/fluid/operators/math/sequence_padding.cc index 35ba8c1d118..97e276fff02 100644 --- a/paddle/fluid/operators/math/sequence_padding.cc +++ b/paddle/fluid/operators/math/sequence_padding.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/sequence_padding.h" + #include "paddle/phi/backends/cpu/cpu_context.h" namespace phi { diff --git a/paddle/fluid/operators/math/sequence_padding.cu b/paddle/fluid/operators/math/sequence_padding.cu index 9aca6ad0f5a..ef7981858a9 100644 --- a/paddle/fluid/operators/math/sequence_padding.cu +++ b/paddle/fluid/operators/math/sequence_padding.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/operators/math/sequence_padding.h" #include "paddle/phi/backends/gpu/gpu_context.h" diff --git a/paddle/fluid/operators/math/sequence_padding.h b/paddle/fluid/operators/math/sequence_padding.h index 956a4ff6a2d..687c64fc23e 100644 --- a/paddle/fluid/operators/math/sequence_padding.h +++ b/paddle/fluid/operators/math/sequence_padding.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/platform/device_context.h" @@ -64,13 +65,14 @@ inline static void CheckDims(const framework::DDim& seq_tensor_dims, PADDLE_ENFORCE_EQ( seq_tensor_dims.size() + 1 == pad_tensor_dims.size() || seq_tensor_dims.size() == pad_tensor_dims.size(), - true, platform::errors::InvalidArgument( - "pad_tensor's rank should be 1 greater than seq_tensor's " - "rank, or be equal with it. The pad_tensor's rank is %ld, " - "expected the seq_tensor's rank is %ld or %ld, but got %ld. " - "Please check the input value.", - pad_tensor_dims.size(), pad_tensor_dims.size(), - pad_tensor_dims.size() - 1, seq_tensor_dims.size())); + true, + platform::errors::InvalidArgument( + "pad_tensor's rank should be 1 greater than seq_tensor's " + "rank, or be equal with it. The pad_tensor's rank is %ld, " + "expected the seq_tensor's rank is %ld or %ld, but got %ld. " + "Please check the input value.", + pad_tensor_dims.size(), pad_tensor_dims.size(), + pad_tensor_dims.size() - 1, seq_tensor_dims.size())); } /* diff --git a/paddle/fluid/operators/math/sequence_pooling.cc b/paddle/fluid/operators/math/sequence_pooling.cc index 8312d7cd9b7..9abe9e59888 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cc +++ b/paddle/fluid/operators/math/sequence_pooling.cc @@ -12,10 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/math/sequence_pooling.h" + #include #include "paddle/fluid/operators/jit/kernels.h" -#include "paddle/fluid/operators/math/sequence_pooling.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/math/sequence_pooling.cu b/paddle/fluid/operators/math/sequence_pooling.cu index fa7b0431538..217b29e1b6b 100644 --- a/paddle/fluid/operators/math/sequence_pooling.cu +++ b/paddle/fluid/operators/math/sequence_pooling.cu @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/operators/math/sequence_pooling.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/macros.h" @@ -170,41 +171,41 @@ class SequencePoolFunctor { dim3 grid(std::max(static_cast(lod.size()) - 1, 1), 1); paddle::framework::MixVector mix_vector(&lod); if (pooltype == "MAX") { - sequence_pool_kernel< - T, MaxPoolFunctor><<>>( - MaxPoolFunctor(), input.data(), pad_value, - mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, - output->mutable_data(context.GetPlace()), index->data()); + sequence_pool_kernel> + <<>>( + MaxPoolFunctor(), input.data(), pad_value, + mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, + output->mutable_data(context.GetPlace()), index->data()); } else if (pooltype == "AVERAGE") { - sequence_pool_kernel< - T, AvgPoolFunctor><<>>( - AvgPoolFunctor(), input.data(), pad_value, - mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, - output->mutable_data(context.GetPlace()), nullptr); + sequence_pool_kernel> + <<>>( + AvgPoolFunctor(), input.data(), pad_value, + mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, + output->mutable_data(context.GetPlace()), nullptr); } else if (pooltype == "SUM") { - sequence_pool_kernel< - T, SumPoolFunctor><<>>( - SumPoolFunctor(), input.data(), pad_value, - mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, - output->mutable_data(context.GetPlace()), nullptr); + sequence_pool_kernel> + <<>>( + SumPoolFunctor(), input.data(), pad_value, + mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, + output->mutable_data(context.GetPlace()), nullptr); } else if (pooltype == "SQRT") { - sequence_pool_kernel< - T, SqrtPoolFunctor><<>>( - SqrtPoolFunctor(), input.data(), pad_value, - mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, - output->mutable_data(context.GetPlace()), nullptr); + sequence_pool_kernel> + <<>>( + SqrtPoolFunctor(), input.data(), pad_value, + mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, + output->mutable_data(context.GetPlace()), nullptr); } else if (pooltype == "LAST") { - sequence_pool_kernel< - T, LastPoolFunctor><<>>( - LastPoolFunctor(), input.data(), pad_value, - mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, - output->mutable_data(context.GetPlace()), nullptr); + sequence_pool_kernel> + <<>>( + LastPoolFunctor(), input.data(), pad_value, + mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, + output->mutable_data(context.GetPlace()), nullptr); } else if (pooltype == "FIRST") { - sequence_pool_kernel< - T, FirstPoolFunctor><<>>( - FirstPoolFunctor(), input.data(), pad_value, - mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, - output->mutable_data(context.GetPlace()), nullptr); + sequence_pool_kernel> + <<>>( + FirstPoolFunctor(), input.data(), pad_value, + mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, + output->mutable_data(context.GetPlace()), nullptr); } else { PADDLE_THROW(platform::errors::InvalidArgument( "unsupported pooling pooltype: %s. Only support \"MAX\", " @@ -338,41 +339,41 @@ class SequencePoolGradFunctor { dim3 grid(std::max(static_cast(lod.size()) - 1, 1), 1); paddle::framework::MixVector mix_vector(&lod); if (pooltype == "MAX") { - sequence_pool_grad_kernel< - T, MaxPoolGradFunctor><<>>( - MaxPoolGradFunctor(), out_grad.data(), - mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, - in_grad->mutable_data(context.GetPlace()), index->data()); + sequence_pool_grad_kernel> + <<>>( + MaxPoolGradFunctor(), out_grad.data(), + mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, + in_grad->mutable_data(context.GetPlace()), index->data()); } else if (pooltype == "AVERAGE") { - sequence_pool_grad_kernel< - T, AvgPoolGradFunctor><<>>( - AvgPoolGradFunctor(), out_grad.data(), - mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, - in_grad->mutable_data(context.GetPlace()), nullptr); + sequence_pool_grad_kernel> + <<>>( + AvgPoolGradFunctor(), out_grad.data(), + mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, + in_grad->mutable_data(context.GetPlace()), nullptr); } else if (pooltype == "SUM") { - sequence_pool_grad_kernel< - T, SumPoolGradFunctor><<>>( - SumPoolGradFunctor(), out_grad.data(), - mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, - in_grad->mutable_data(context.GetPlace()), nullptr); + sequence_pool_grad_kernel> + <<>>( + SumPoolGradFunctor(), out_grad.data(), + mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, + in_grad->mutable_data(context.GetPlace()), nullptr); } else if (pooltype == "SQRT") { - sequence_pool_grad_kernel< - T, SqrtPoolGradFunctor><<>>( - SqrtPoolGradFunctor(), out_grad.data(), - mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, - in_grad->mutable_data(context.GetPlace()), nullptr); + sequence_pool_grad_kernel> + <<>>( + SqrtPoolGradFunctor(), out_grad.data(), + mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, + in_grad->mutable_data(context.GetPlace()), nullptr); } else if (pooltype == "LAST") { - sequence_pool_grad_kernel< - T, LastPoolGradFunctor><<>>( - LastPoolGradFunctor(), out_grad.data(), - mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, - in_grad->mutable_data(context.GetPlace()), nullptr); + sequence_pool_grad_kernel> + <<>>( + LastPoolGradFunctor(), out_grad.data(), + mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, + in_grad->mutable_data(context.GetPlace()), nullptr); } else if (pooltype == "FIRST") { - sequence_pool_grad_kernel< - T, FirstPoolGradFunctor><<>>( - FirstPoolGradFunctor(), out_grad.data(), - mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, - in_grad->mutable_data(context.GetPlace()), nullptr); + sequence_pool_grad_kernel> + <<>>( + FirstPoolGradFunctor(), out_grad.data(), + mix_vector.CUDAData(context.GetPlace()), lod.size(), item_dim, + in_grad->mutable_data(context.GetPlace()), nullptr); } else { PADDLE_THROW(platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/math/sequence_pooling.h b/paddle/fluid/operators/math/sequence_pooling.h index 847d0bca951..f5b6701b46e 100644 --- a/paddle/fluid/operators/math/sequence_pooling.h +++ b/paddle/fluid/operators/math/sequence_pooling.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/device_context.h" diff --git a/paddle/fluid/operators/math/sequence_pooling_test.cc b/paddle/fluid/operators/math/sequence_pooling_test.cc index 815d221e255..6d9c75f9550 100644 --- a/paddle/fluid/operators/math/sequence_pooling_test.cc +++ b/paddle/fluid/operators/math/sequence_pooling_test.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/sequence_pooling.h" + #include template diff --git a/paddle/fluid/operators/math/sequence_scale.cc b/paddle/fluid/operators/math/sequence_scale.cc index bc8832a1bbc..8f954e068c0 100644 --- a/paddle/fluid/operators/math/sequence_scale.cc +++ b/paddle/fluid/operators/math/sequence_scale.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/sequence_scale.h" + #include "paddle/phi/backends/cpu/cpu_context.h" namespace phi { diff --git a/paddle/fluid/operators/math/sequence_scale.cu b/paddle/fluid/operators/math/sequence_scale.cu index 253a67c2c8c..c0b97497cc7 100644 --- a/paddle/fluid/operators/math/sequence_scale.cu +++ b/paddle/fluid/operators/math/sequence_scale.cu @@ -53,10 +53,10 @@ class ScaleLoDTensorFunctor { seq_data, mix_vector.CUDAMutableData(context.GetPlace()), scales, seq_width); #else - SequenceScaleKernel<<< - num_seq, PADDLE_CUDA_NUM_THREADS, 0, context.stream()>>>( - seq_data, mix_vector.CUDAMutableData(context.GetPlace()), scales, - seq_width); + SequenceScaleKernel + <<>>( + seq_data, mix_vector.CUDAMutableData(context.GetPlace()), scales, + seq_width); #endif mix_vector.CopyToCPU(); } @@ -82,10 +82,10 @@ class ScaleLoDTensorFunctor { seq_data, mix_vector.CUDAMutableData(context.GetPlace()), scales, seq_width); #else - SequenceScaleKernel<<< - num_seq, PADDLE_CUDA_NUM_THREADS, 0, context.stream()>>>( - seq_data, mix_vector.CUDAMutableData(context.GetPlace()), scales, - seq_width); + SequenceScaleKernel + <<>>( + seq_data, mix_vector.CUDAMutableData(context.GetPlace()), scales, + seq_width); #endif mix_vector.CopyToCPU(); } diff --git a/paddle/fluid/operators/math/softmax.cc b/paddle/fluid/operators/math/softmax.cc index c855cb763a9..adea86a6c5a 100644 --- a/paddle/fluid/operators/math/softmax.cc +++ b/paddle/fluid/operators/math/softmax.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/math/softmax.h" + #include "paddle/fluid/operators/math/softmax_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" diff --git a/paddle/fluid/operators/math/softmax_impl.h b/paddle/fluid/operators/math/softmax_impl.h index 69642c81942..33da631d27b 100644 --- a/paddle/fluid/operators/math/softmax_impl.h +++ b/paddle/fluid/operators/math/softmax_impl.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/jit/kernels.h" @@ -66,34 +67,32 @@ class SoftmaxEigen { if (num_remain == 1) { // axis == -1, axis and class in same dimension, calculate along // class dimension directly for higher performance - softmax.device(*context.eigen_device()) = (logits - - logits.maximum(along_axis) - .eval() - .reshape(batch_by_one) - .broadcast(one_by_class)) - .unaryExpr(ValueClip()); + softmax.device(*context.eigen_device()) = + (logits - logits.maximum(along_axis) + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class)) + .unaryExpr(ValueClip()); } else { // axis != -1, class dimension split into (axis, remain), max and sum // should be calculated along axis dimension softmax.device(*context.eigen_device()) = - (logits.reshape(batch_axis_remain) - - logits.reshape(batch_axis_remain) - .maximum(along_axis) - .eval() - .reshape(batch_one_remain) - .broadcast(one_axis_one) - .reshape(batch_classes)) + (logits.reshape(batch_axis_remain) - logits.reshape(batch_axis_remain) + .maximum(along_axis) + .eval() + .reshape(batch_one_remain) + .broadcast(one_axis_one) + .reshape(batch_classes)) .unaryExpr(ValueClip()); } softmax.device(*context.eigen_device()) = softmax.exp(); softmax.device(*context.eigen_device()) = - (softmax * - softmax.reshape(batch_axis_remain) - .sum(along_axis) - .inverse() - .eval() - .broadcast(one_axis)); + (softmax * softmax.reshape(batch_axis_remain) + .sum(along_axis) + .inverse() + .eval() + .broadcast(one_axis)); } }; @@ -128,31 +127,28 @@ class SoftmaxEigen { // axis == -1, axis and class in same dimension, calculate along // class dimension directly for higher performance softmax.device(*context.eigen_device()) = - (logits - - logits.maximum(along_axis) - .reshape(batch_by_one) - .broadcast(one_by_class)) + (logits - logits.maximum(along_axis) + .reshape(batch_by_one) + .broadcast(one_by_class)) .unaryExpr(ValueClip()); } else { // axis != -1, class dimension split into (axis, remain), max and sum // should be calculated along axis dimension softmax.device(*context.eigen_device()) = - (logits.reshape(batch_axis_remain) - - logits.reshape(batch_axis_remain) - .maximum(along_axis) - .reshape(batch_one_remain) - .broadcast(one_axis_one) - .reshape(batch_classes)) + (logits.reshape(batch_axis_remain) - logits.reshape(batch_axis_remain) + .maximum(along_axis) + .reshape(batch_one_remain) + .broadcast(one_axis_one) + .reshape(batch_classes)) .unaryExpr(ValueClip()); } softmax.device(*context.eigen_device()) = softmax.exp(); softmax.device(*context.eigen_device()) = - (softmax * - softmax.reshape(batch_axis_remain) - .sum(along_axis) - .inverse() - .broadcast(one_axis)); + (softmax * softmax.reshape(batch_axis_remain) + .sum(along_axis) + .inverse() + .broadcast(one_axis)); } }; @@ -187,31 +183,28 @@ class SoftmaxEigen { // axis == -1, axis and class in same dimension, calculate along // class dimension directly for higher performance softmax.device(*context.eigen_device()) = - (logits - - logits.maximum(along_axis) - .reshape(batch_by_one) - .broadcast(one_by_class)) + (logits - logits.maximum(along_axis) + .reshape(batch_by_one) + .broadcast(one_by_class)) .unaryExpr(ValueClip()); } else { // axis != -1, class dimension split into (axis, remain), max and sum // should be calculated along axis dimension softmax.device(*context.eigen_device()) = - (logits.reshape(batch_axis_remain) - - logits.reshape(batch_axis_remain) - .maximum(along_axis) - .reshape(batch_one_remain) - .broadcast(one_axis_one) - .reshape(batch_classes)) + (logits.reshape(batch_axis_remain) - logits.reshape(batch_axis_remain) + .maximum(along_axis) + .reshape(batch_one_remain) + .broadcast(one_axis_one) + .reshape(batch_classes)) .unaryExpr(ValueClip()); } softmax.device(*context.eigen_device()) = softmax.exp(); softmax.device(*context.eigen_device()) = - (softmax * - softmax.reshape(batch_axis_remain) - .sum(along_axis) - .inverse() - .broadcast(one_axis)); + (softmax * softmax.reshape(batch_axis_remain) + .sum(along_axis) + .inverse() + .broadcast(one_axis)); } }; diff --git a/paddle/fluid/operators/math/sparse_impl.cu.h b/paddle/fluid/operators/math/sparse_impl.cu.h index dd2d256dd73..03f94ed5736 100644 --- a/paddle/fluid/operators/math/sparse_impl.cu.h +++ b/paddle/fluid/operators/math/sparse_impl.cu.h @@ -14,11 +14,10 @@ #pragma once +#include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/dynload/cusparse.h" #include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/fluid/platform/device/gpu/gpu_info.h" - namespace paddle { namespace operators { namespace math { diff --git a/paddle/fluid/operators/math/tree2col.cc b/paddle/fluid/operators/math/tree2col.cc index cd1fa13001c..8ad0a17c27e 100644 --- a/paddle/fluid/operators/math/tree2col.cc +++ b/paddle/fluid/operators/math/tree2col.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/math/tree2col.h" + #include #include diff --git a/paddle/fluid/operators/math/tree2col.cu b/paddle/fluid/operators/math/tree2col.cu index bdaab212ab1..c8bba20a423 100644 --- a/paddle/fluid/operators/math/tree2col.cu +++ b/paddle/fluid/operators/math/tree2col.cu @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "paddle/fluid/operators/math/tree2col.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/math/tree2col.h b/paddle/fluid/operators/math/tree2col.h index 88104b858ba..df4b233a763 100644 --- a/paddle/fluid/operators/math/tree2col.h +++ b/paddle/fluid/operators/math/tree2col.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/framework/tensor.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/math/vol2col.cu b/paddle/fluid/operators/math/vol2col.cu index fb61a36a8e1..d8581d731e8 100644 --- a/paddle/fluid/operators/math/vol2col.cu +++ b/paddle/fluid/operators/math/vol2col.cu @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/operators/math/vol2col.h" #include "paddle/fluid/platform/device/gpu/gpu_launch_config.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" diff --git a/paddle/fluid/operators/math/vol2col.h b/paddle/fluid/operators/math/vol2col.h index 3122828b2ee..cddcb0af467 100644 --- a/paddle/fluid/operators/math/vol2col.h +++ b/paddle/fluid/operators/math/vol2col.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/device_context.h" diff --git a/paddle/fluid/operators/math/vol2col_test.cc b/paddle/fluid/operators/math/vol2col_test.cc index 210cf10d887..4889817cd9e 100644 --- a/paddle/fluid/operators/math/vol2col_test.cc +++ b/paddle/fluid/operators/math/vol2col_test.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/fluid/operators/math/vol2col.h" #include + #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/operators/matmul_op.cc b/paddle/fluid/operators/matmul_op.cc index 9d381e1f22b..2c16774e324 100644 --- a/paddle/fluid/operators/matmul_op.cc +++ b/paddle/fluid/operators/matmul_op.cc @@ -12,6 +12,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/phi/kernels/funcs/blas/blas.h" @@ -258,13 +259,14 @@ class MatMulGradKernel : public framework::OpKernel { MatMul(context, a, trans_a, b, trans_b, out); } else { auto &ctx = context.template device_context(); - MatMul(context, is_fold_init_dims_a - ? FoldInitDims(a) - : FoldHeadAndLastDims(ctx, a), - trans_a, is_fold_init_dims_b - ? FoldInitDims(b) - : FoldHeadAndLastDims(ctx, b), - trans_b, out); + MatMul( + context, + is_fold_init_dims_a ? FoldInitDims(a) + : FoldHeadAndLastDims(ctx, a), + trans_a, + is_fold_init_dims_b ? FoldInitDims(b) + : FoldHeadAndLastDims(ctx, b), + trans_b, out); } } @@ -425,13 +427,14 @@ class MatMulDoubleGradKernel : public framework::OpKernel { MatMul(context, a, trans_a, b, trans_b, flag, out); } else { auto &ctx = context.template device_context(); - MatMul(context, is_fold_init_dims_a - ? FoldInitDims(a) - : FoldHeadAndLastDims(ctx, a), - trans_a, is_fold_init_dims_b - ? FoldInitDims(b) - : FoldHeadAndLastDims(ctx, b), - trans_b, flag, out); + MatMul( + context, + is_fold_init_dims_a ? FoldInitDims(a) + : FoldHeadAndLastDims(ctx, a), + trans_a, + is_fold_init_dims_b ? FoldInitDims(b) + : FoldHeadAndLastDims(ctx, b), + trans_b, flag, out); } } @@ -602,12 +605,13 @@ class MatMulOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( mat_dim_x.batch_size_ == mat_dim_y.batch_size_ || mat_dim_x.batch_size_ == 0 || mat_dim_y.batch_size_ == 0, - true, platform::errors::InvalidArgument( - "The batch size of the two matrices should be equal, or " - "at least one is zero.\n" - "But received X's shape: %s, Y's shape: %s.", - DumpMatrixShape(mat_dim_x).c_str(), - DumpMatrixShape(mat_dim_y).c_str())); + true, + platform::errors::InvalidArgument( + "The batch size of the two matrices should be equal, or " + "at least one is zero.\n" + "But received X's shape: %s, Y's shape: %s.", + DumpMatrixShape(mat_dim_x).c_str(), + DumpMatrixShape(mat_dim_y).c_str())); } int64_t dim_out_y = mat_dim_y.width_; #if defined(PADDLE_WITH_MKLML) && !defined(PADDLE_WITH_CUDA) && \ @@ -996,13 +1000,12 @@ REGISTER_OP_CUDA_KERNEL( ops::MatMulDoubleGradKernel); #endif -REGISTER_OP_VERSION(matmul) - .AddCheckpoint( - R"ROC(Register matmul for adding the attribute of +REGISTER_OP_VERSION(matmul).AddCheckpoint( + R"ROC(Register matmul for adding the attribute of fused_reshape_Y)ROC", - paddle::framework::compatible::OpVersionDesc().NewAttr( - "fused_reshape_Y", - "In order to support the function of fused the input Y " - " and input X into the input X when " - "using the operator of matmul, and get raw shape of input Y.", - std::vector{})); + paddle::framework::compatible::OpVersionDesc().NewAttr( + "fused_reshape_Y", + "In order to support the function of fused the input Y " + " and input X into the input X when " + "using the operator of matmul, and get raw shape of input Y.", + std::vector{})); diff --git a/paddle/fluid/operators/matmul_op_xpu.cc b/paddle/fluid/operators/matmul_op_xpu.cc index 80d4492e049..3477715d6d3 100644 --- a/paddle/fluid/operators/matmul_op_xpu.cc +++ b/paddle/fluid/operators/matmul_op_xpu.cc @@ -315,14 +315,15 @@ class MatMulGradXPUKernel : public framework::OpKernel { MatMul(context, a, trans_a, b, trans_b, out); } else { auto &dev_ctx = context.template device_context(); - MatMul( - context, is_fold_init_dims_a - ? FoldInitDims(a) - : XPUFoldHeadAndLastDims(dev_ctx, a), - trans_a, is_fold_init_dims_b - ? FoldInitDims(b) - : XPUFoldHeadAndLastDims(dev_ctx, b), - trans_b, out); + MatMul(context, + is_fold_init_dims_a + ? FoldInitDims(a) + : XPUFoldHeadAndLastDims(dev_ctx, a), + trans_a, + is_fold_init_dims_b + ? FoldInitDims(b) + : XPUFoldHeadAndLastDims(dev_ctx, b), + trans_b, out); } } diff --git a/paddle/fluid/operators/matmul_v2_op.cc b/paddle/fluid/operators/matmul_v2_op.cc index 162ebdafec1..168a3dbfeaa 100644 --- a/paddle/fluid/operators/matmul_v2_op.cc +++ b/paddle/fluid/operators/matmul_v2_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/matmul_v2_op.h" + #include #include diff --git a/paddle/fluid/operators/matmul_v2_op.h b/paddle/fluid/operators/matmul_v2_op.h index 34a8e97af2e..b47cdf6e8cb 100644 --- a/paddle/fluid/operators/matmul_v2_op.h +++ b/paddle/fluid/operators/matmul_v2_op.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/dot_op.h" diff --git a/paddle/fluid/operators/matmul_v2_op_xpu.cc b/paddle/fluid/operators/matmul_v2_op_xpu.cc index 87df75ac465..f85e714ce95 100644 --- a/paddle/fluid/operators/matmul_v2_op_xpu.cc +++ b/paddle/fluid/operators/matmul_v2_op_xpu.cc @@ -14,10 +14,10 @@ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/matmul_v2_op.h" #include #include +#include "paddle/fluid/operators/matmul_v2_op.h" #include "paddle/fluid/operators/xpu_api_wrapper.h" namespace paddle { diff --git a/paddle/fluid/operators/matrix_power_op.cc b/paddle/fluid/operators/matrix_power_op.cc index 56f65340ea9..ffbb8538d94 100644 --- a/paddle/fluid/operators/matrix_power_op.cc +++ b/paddle/fluid/operators/matrix_power_op.cc @@ -14,6 +14,7 @@ #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" diff --git a/paddle/fluid/operators/matrix_rank_op.cc b/paddle/fluid/operators/matrix_rank_op.cc index e7d08b65973..fddfaa3526a 100644 --- a/paddle/fluid/operators/matrix_rank_op.cc +++ b/paddle/fluid/operators/matrix_rank_op.cc @@ -14,6 +14,7 @@ #include #include + #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/fluid/operators/svd_helper.h" #include "paddle/phi/kernels/funcs/compare_functors.h" diff --git a/paddle/fluid/operators/mean_iou_op.h b/paddle/fluid/operators/mean_iou_op.h index 9fa00e60e05..1cf9f4433bc 100644 --- a/paddle/fluid/operators/mean_iou_op.h +++ b/paddle/fluid/operators/mean_iou_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/mean_op_xpu.cc b/paddle/fluid/operators/mean_op_xpu.cc index ef96fe2f03b..811b138c8d1 100644 --- a/paddle/fluid/operators/mean_op_xpu.cc +++ b/paddle/fluid/operators/mean_op_xpu.cc @@ -56,8 +56,9 @@ class MeanGradXPUKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { auto OG = context.Input(framework::GradVarName("Out")); - PADDLE_ENFORCE_EQ(OG->numel(), 1, platform::errors::InvalidArgument( - "Mean Gradient should be scalar")); + PADDLE_ENFORCE_EQ( + OG->numel(), 1, + platform::errors::InvalidArgument("Mean Gradient should be scalar")); auto IG = context.Output(framework::GradVarName("X")); IG->mutable_data(context.GetPlace()); auto& dev_ctx = context.template device_context(); diff --git a/paddle/fluid/operators/merge_lod_tensor_op.cc b/paddle/fluid/operators/merge_lod_tensor_op.cc index e2b86bd0e3b..0d4c2f7b3b4 100644 --- a/paddle/fluid/operators/merge_lod_tensor_op.cc +++ b/paddle/fluid/operators/merge_lod_tensor_op.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" - #include "paddle/phi/core/lod_utils.h" namespace phi { diff --git a/paddle/fluid/operators/merge_selected_rows_op.cc b/paddle/fluid/operators/merge_selected_rows_op.cc index ea223ad1b32..cfb8aa1f8a7 100644 --- a/paddle/fluid/operators/merge_selected_rows_op.cc +++ b/paddle/fluid/operators/merge_selected_rows_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/merge_selected_rows_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/merge_selected_rows_op.h b/paddle/fluid/operators/merge_selected_rows_op.h index 4c87a4a6411..d0f18b22b27 100644 --- a/paddle/fluid/operators/merge_selected_rows_op.h +++ b/paddle/fluid/operators/merge_selected_rows_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" diff --git a/paddle/fluid/operators/meshgrid_op.cc b/paddle/fluid/operators/meshgrid_op.cc index 5a6862f380d..cc57a25a1fb 100644 --- a/paddle/fluid/operators/meshgrid_op.cc +++ b/paddle/fluid/operators/meshgrid_op.cc @@ -16,10 +16,9 @@ #include #include +#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" - -#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/multiary.h" diff --git a/paddle/fluid/operators/miopen_lstm_cache.h b/paddle/fluid/operators/miopen_lstm_cache.h index c307218baa4..045f917de70 100644 --- a/paddle/fluid/operators/miopen_lstm_cache.h +++ b/paddle/fluid/operators/miopen_lstm_cache.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" diff --git a/paddle/fluid/operators/miopen_rnn_cache.h b/paddle/fluid/operators/miopen_rnn_cache.h index 38cea39abd5..438163cd77e 100644 --- a/paddle/fluid/operators/miopen_rnn_cache.h +++ b/paddle/fluid/operators/miopen_rnn_cache.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/device/gpu/gpu_dnn.h" diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc index 393247644c2..db74b24b405 100644 --- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc @@ -30,11 +30,11 @@ class MKLDNNDeviceContext; namespace paddle { namespace operators { -using framework::DataLayout; -using framework::Tensor; using dnnl::memory; using dnnl::primitive; using dnnl::stream; +using framework::DataLayout; +using framework::Tensor; using platform::GetMKLDNNFormat; using platform::MKLDNNDeviceContext; using platform::to_void_cast; diff --git a/paddle/fluid/operators/mkldnn/axpy_handler.cc b/paddle/fluid/operators/mkldnn/axpy_handler.cc index ee630fe186a..80f74195d8e 100644 --- a/paddle/fluid/operators/mkldnn/axpy_handler.cc +++ b/paddle/fluid/operators/mkldnn/axpy_handler.cc @@ -12,13 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/mkldnn/axpy_handler.h" + #include #include #include #include #include "dnnl.hpp" -#include "paddle/fluid/operators/mkldnn/axpy_handler.h" #include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/mkldnn_helper.h" @@ -82,7 +83,7 @@ static void naive_axpy(int n, T alpha, const T *x, T *y) { } } -} // anonnymouse namespace +} // namespace template class OneDNNAXPYHandler::Impl { diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index 5095fa06719..0881baa6f8e 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/operators/concat_op.h" #include "paddle/fluid/operators/utils.h" #include "paddle/fluid/platform/mkldnn_helper.h" @@ -21,13 +22,13 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::DataLayout; -using framework::Tensor; -using framework::LoDTensor; +using dnnl::concat; using dnnl::memory; using dnnl::primitive; -using dnnl::concat; using dnnl::stream; +using framework::DataLayout; +using framework::LoDTensor; +using framework::Tensor; using platform::to_void_cast; template diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc index fba17d303f2..65092e059f4 100644 --- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc @@ -203,8 +203,9 @@ class ConvMKLDNNHandlerT dnnl::memory::desc src_md, weights_md; if (platform::is_int8()) { src_md = platform::MKLDNNMemDesc( - src_tz, framework::ToMKLDNNDataType( - framework::TransToProtoVarType(input->dtype())), + src_tz, + framework::ToMKLDNNDataType( + framework::TransToProtoVarType(input->dtype())), chosen_memory_format); weights_md = platform::MKLDNNMemDesc( weights_tz, dnnl::memory::data_type::s8, chosen_memory_format); @@ -459,13 +460,12 @@ class ConvMKLDNNHandlerT auto scale_weights_data = ctx.Attr>("Scale_weights"); bool is_multi_channel = scale_weights_data.size() > 1; bool has_activation = !ctx.Attr("fuse_activation").empty(); - float activation_scale = - force_fp32_output ? 1.0f : has_activation ? ctx.Attr("Scale_out") - : 1.0f; - auto scale_out_data = - force_fp32_output ? 1.0f : has_activation - ? 1.0f - : ctx.Attr("Scale_out"); + float activation_scale = force_fp32_output ? 1.0f + : has_activation ? ctx.Attr("Scale_out") + : 1.0f; + auto scale_out_data = force_fp32_output ? 1.0f + : has_activation ? 1.0f + : ctx.Attr("Scale_out"); float sum_scale = fuse_residual_conn ? scale_out_data / scale_in_eltwise_data : 1.0f; int count = diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc index 747e4603d7f..e507b2429b7 100644 --- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc @@ -28,8 +28,8 @@ using dnnl::primitive; using dnnl::reorder; using platform::to_void_cast; using Tensor = framework::Tensor; -using framework::DataLayout; using dnnl::stream; +using framework::DataLayout; using platform::GetMKLDNNFormat; template diff --git a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc index 91dccbee0ae..035add5fd83 100644 --- a/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/expand_v2_mkldnn_op.cc @@ -18,11 +18,11 @@ limitations under the License. */ namespace { -using paddle::framework::Tensor; -using phi::vectorize; -using paddle::framework::GradVarName; using paddle::framework::ExecutionContext; +using paddle::framework::GradVarName; +using paddle::framework::Tensor; using paddle::platform::MKLDNNDeviceContext; +using phi::vectorize; template class ExpandMKLDNNKernel : public paddle::framework::OpKernel { diff --git a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc index 4078d012fce..5cbcad5d965 100644 --- a/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/fc_mkldnn_op.cc @@ -31,19 +31,19 @@ class MKLDNNDeviceContext; namespace paddle { namespace operators { +using dnnl::inner_product_forward; +using dnnl::memory; +using dnnl::primitive; +using dnnl::prop_kind; +using dnnl::stream; using framework::DataLayout; -using framework::Tensor; -using framework::LoDTensor; using framework::DDim; using framework::ExecutionContext; +using framework::LoDTensor; +using framework::Tensor; +using platform::GetMKLDNNFormat; using platform::MKLDNNDeviceContext; using platform::to_void_cast; -using platform::GetMKLDNNFormat; -using dnnl::memory; -using dnnl::inner_product_forward; -using dnnl::primitive; -using dnnl::stream; -using dnnl::prop_kind; template class FCPrimitiveFactory { diff --git a/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc index 37d6c072903..a53a30b737d 100644 --- a/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/interpolate_mkldnn_op.cc @@ -19,12 +19,12 @@ namespace paddle { namespace operators { -using framework::DataLayout; using dnnl::memory; using dnnl::primitive; using dnnl::reorder; -using dnnl::stream; using dnnl::resampling_forward; +using dnnl::stream; +using framework::DataLayout; using platform::GetMKLDNNFormat; using platform::to_void_cast; @@ -114,9 +114,10 @@ class InterpolateMKLDNNKernel : public framework::OpKernel { PADDLE_ENFORCE_GT(std::all_of(out_dims.begin(), out_dims.end(), [](int i) { return i > 0; }), - 0, platform::errors::InvalidArgument( - "out_d, out_h, out_w of Op(interpolate) " - "should be greater than 0.")); + 0, + platform::errors::InvalidArgument( + "out_d, out_h, out_w of Op(interpolate) " + "should be greater than 0.")); const std::vector nc_dims = {in_dims[0], in_dims[1]}; out_dims.insert(out_dims.begin(), nc_dims.begin(), nc_dims.end()); diff --git a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc index e9abe84e679..8921db6cbce 100644 --- a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.cc @@ -13,19 +13,21 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/mkldnn/matmul_mkldnn_op.h" + #include + #include "paddle/fluid/framework/convert_utils.h" using dnnl::memory; using dnnl::primitive; using paddle::framework::DataLayout; using paddle::framework::ExecutionContext; -using phi::vectorize; using paddle::platform::GetMKLDNNFormat; -using paddle::platform::MKLDNNFormatForSize; using paddle::platform::MKLDNNDeviceContext; +using paddle::platform::MKLDNNFormatForSize; using paddle::platform::MKLDNNGetDataType; using paddle::platform::to_void_cast; +using phi::vectorize; using Tensor = paddle::framework::Tensor; namespace { diff --git a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.h b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.h index 583dcd04018..07cb2173a7e 100644 --- a/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.h +++ b/paddle/fluid/operators/mkldnn/matmul_mkldnn_op.h @@ -22,8 +22,8 @@ limitations under the License. */ namespace paddle { namespace operators { -using platform::MKLDNNDeviceContext; using framework::ExecutionContext; +using platform::MKLDNNDeviceContext; using Tensor = framework::Tensor; template diff --git a/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc index 6e7ba59cf1a..424faf30d3a 100644 --- a/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/matmul_v2_mkldnn_op.cc @@ -20,8 +20,8 @@ using dnnl::memory; using dnnl::primitive; using paddle::framework::DataLayout; using paddle::framework::ExecutionContext; -using paddle::platform::MatMulV2MKLDNNHandler; using paddle::platform::GetMKLDNNFormat; +using paddle::platform::MatMulV2MKLDNNHandler; using paddle::platform::MKLDNNDeviceContext; using paddle::platform::MKLDNNGetDataType; using paddle::platform::to_void_cast; @@ -206,11 +206,12 @@ class MatMulV2MKLDNNKernel : public paddle::framework::OpKernel { PADDLE_ENFORCE_EQ( x_bd_dims[i] == y_bd_dims[i] || x_bd_dims[i] == 1 || y_bd_dims[i] == 1, - true, paddle::platform::errors::InvalidArgument( - "Tensor dimensions are incorrect for broadcasting." - "Dimensions in X and Y must be same or equal to 1, but " - "received x_dim[%d]=%d and y_dims[%d]= %d", - i, x_bd_dims[i], i, y_bd_dims[i])); + true, + paddle::platform::errors::InvalidArgument( + "Tensor dimensions are incorrect for broadcasting." + "Dimensions in X and Y must be same or equal to 1, but " + "received x_dim[%d]=%d and y_dims[%d]= %d", + i, x_bd_dims[i], i, y_bd_dims[i])); out_dims[i] = std::max(x_bd_dims[i], y_bd_dims[i]); } out->Resize(phi::make_ddim(out_dims)); diff --git a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc index 77763531c82..dbf3adcdad0 100644 --- a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc @@ -20,14 +20,14 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::DataLayout; -using framework::Tensor; using dnnl::memory; using dnnl::pooling_backward; using dnnl::pooling_forward; using dnnl::primitive; using dnnl::reorder; using dnnl::stream; +using framework::DataLayout; +using framework::Tensor; using platform::to_void_cast; template diff --git a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc index 8cbe46bee48..8f3a3e8ba65 100644 --- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc @@ -27,8 +27,8 @@ using dnnl::primitive; using dnnl::reorder; using platform::to_void_cast; using Tensor = framework::Tensor; -using framework::DataLayout; using dnnl::stream; +using framework::DataLayout; using platform::GetMKLDNNFormat; template diff --git a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc index 9a7ac6d5055..778a33f27af 100644 --- a/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/requantize_mkldnn_op.cc @@ -46,10 +46,12 @@ class ReQuantOpKernel : public framework::OpKernel { bool with_shift = shift_in != 0.0f || shift_out != 0.0f; auto* output = ctx.Output("Output"); - PADDLE_ENFORCE_NE(scale_in, 0.0f, platform::errors::InvalidArgument( - "Scale of input cannot be 0.0")); - PADDLE_ENFORCE_NE(scale_out, 0.0f, platform::errors::InvalidArgument( - "Scale of output cannot be 0.0")); + PADDLE_ENFORCE_NE( + scale_in, 0.0f, + platform::errors::InvalidArgument("Scale of input cannot be 0.0")); + PADDLE_ENFORCE_NE( + scale_out, 0.0f, + platform::errors::InvalidArgument("Scale of output cannot be 0.0")); if (shift_in != 0.0f) { PADDLE_ENFORCE_EQ( framework::TransToProtoVarType(input->dtype()), diff --git a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc index a21034d48ba..f1c5153240e 100644 --- a/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/reshape_mkldnn_op.cc @@ -31,8 +31,8 @@ namespace paddle { namespace operators { using paddle::framework::LoDTensor; -using platform::to_void_cast; using platform::GetMKLDNNFormat; +using platform::to_void_cast; static std::vector extract_shape( const std::vector& list_new_shape_tensor) { diff --git a/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc index 28a00be5fa4..798fe51901d 100644 --- a/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/stack_mkldnn_op.cc @@ -17,13 +17,13 @@ limitations under the License. */ namespace paddle { namespace operators { -using framework::DataLayout; -using framework::Tensor; -using framework::LoDTensor; +using dnnl::concat; using dnnl::memory; using dnnl::primitive; -using dnnl::concat; using dnnl::stream; +using framework::DataLayout; +using framework::LoDTensor; +using framework::Tensor; using platform::to_void_cast; template diff --git a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc index de21c2687bd..b564602fdaa 100644 --- a/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/sum_mkldnn_op.cc @@ -116,8 +116,9 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel { const auto& mkldnn_engine = dev_ctx.GetEngine(); auto in_vars = ctx.MultiInputVar("X"); - PADDLE_ENFORCE_NE(in_vars.empty(), true, platform::errors::InvalidArgument( - "Input variable is empty.")); + PADDLE_ENFORCE_NE( + in_vars.empty(), true, + platform::errors::InvalidArgument("Input variable is empty.")); auto& input0 = in_vars[0]->Get(); LoDTensor* output = ctx.Output("Out"); diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc index b5fb0c54c78..1e04cc8a8a5 100644 --- a/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc +++ b/paddle/fluid/operators/mkldnn/test_mkldnn_caching.cc @@ -16,6 +16,7 @@ #include #include #include + #include "gtest/gtest.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" @@ -121,8 +122,9 @@ void RunOperator(const platform::Place &place, const std::string &op_type, auto op = num_inputs[op_type] > 1 ? framework::OpRegistry::CreateOp( - op_type, {{first_input_var_name, {first_input}}, - {second_input_var_name, {"x1"}}}, + op_type, + {{first_input_var_name, {first_input}}, + {second_input_var_name, {"x1"}}}, {{output_var_name, {output_name}}}, {{"use_mkldnn", {true}}}) : framework::OpRegistry::CreateOp( op_type, {{first_input_var_name, {first_input}}}, diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc index 4090d5ffca8..a1acf3706c5 100644 --- a/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc +++ b/paddle/fluid/operators/mkldnn/test_mkldnn_op_inplace.cc @@ -16,6 +16,7 @@ #include #include #include + #include "gtest/gtest.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc index b9866ba8c36..f4b79a02163 100644 --- a/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc +++ b/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc @@ -16,6 +16,7 @@ #include #include #include + #include "gtest/gtest.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc index ee992277314..13f9dba9eeb 100644 --- a/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/transpose_mkldnn_op.cc @@ -66,7 +66,7 @@ class TransposeMKLDNNHandler { protected: dnnl::memory::desc Axis2MemoryDesc(std::vector& nchw_tz, // NOLINT std::vector& axis // NOLINT - ) { + ) { size_t ndims = axis.size(); std::vector strides(ndims); diff --git a/paddle/fluid/operators/mlu/mlu_baseop.cc b/paddle/fluid/operators/mlu/mlu_baseop.cc index 9d3b8e2407f..1ff27454013 100644 --- a/paddle/fluid/operators/mlu/mlu_baseop.cc +++ b/paddle/fluid/operators/mlu/mlu_baseop.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/mlu/mlu_baseop.h" + #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/framework.pb.h" diff --git a/paddle/fluid/operators/mode_op.cc b/paddle/fluid/operators/mode_op.cc index 9c16ccb138f..d946f177545 100644 --- a/paddle/fluid/operators/mode_op.cc +++ b/paddle/fluid/operators/mode_op.cc @@ -13,10 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/generator.h" +#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" - -#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/unary.h" diff --git a/paddle/fluid/operators/modified_huber_loss_op.cc b/paddle/fluid/operators/modified_huber_loss_op.cc index 9a53c7162ff..4216ee097be 100644 --- a/paddle/fluid/operators/modified_huber_loss_op.cc +++ b/paddle/fluid/operators/modified_huber_loss_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/modified_huber_loss_op.h" + #include namespace paddle { @@ -29,10 +30,11 @@ class ModifiedHuberLossOp : public framework::OperatorWithKernel { auto x_dims = ctx->GetInputDim("X"); auto y_dims = ctx->GetInputDim("Y"); - PADDLE_ENFORCE_EQ(x_dims.size(), 2, platform::errors::InvalidArgument( - "Input(input) rank should be 2, " - "but received input rank(%d) != 2", - x_dims.size())); + PADDLE_ENFORCE_EQ( + x_dims.size(), 2, + platform::errors::InvalidArgument("Input(input) rank should be 2, " + "but received input rank(%d) != 2", + x_dims.size())); if (ctx->IsRuntime() || (phi::product(x_dims) > 0 && phi::product(y_dims) > 0)) { diff --git a/paddle/fluid/operators/modified_huber_loss_op.cu b/paddle/fluid/operators/modified_huber_loss_op.cu index 8f1894b5af0..ad34a54a9bf 100644 --- a/paddle/fluid/operators/modified_huber_loss_op.cu +++ b/paddle/fluid/operators/modified_huber_loss_op.cu @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/modified_huber_loss_op.h" #include "paddle/phi/core/hostdevice.h" diff --git a/paddle/fluid/operators/mul_op.cc b/paddle/fluid/operators/mul_op.cc index ef04d5582d3..b31935cefc2 100644 --- a/paddle/fluid/operators/mul_op.cc +++ b/paddle/fluid/operators/mul_op.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" diff --git a/paddle/fluid/operators/mul_op_xpu.cc b/paddle/fluid/operators/mul_op_xpu.cc index 7410b3b607c..9f52dc8559d 100644 --- a/paddle/fluid/operators/mul_op_xpu.cc +++ b/paddle/fluid/operators/mul_op_xpu.cc @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/xpu_api_wrapper.h" #include "paddle/fluid/platform/device/device_wrapper.h" diff --git a/paddle/fluid/operators/multiplex_op.cc b/paddle/fluid/operators/multiplex_op.cc index 4e6ad35e612..72243b408f4 100644 --- a/paddle/fluid/operators/multiplex_op.cc +++ b/paddle/fluid/operators/multiplex_op.cc @@ -17,7 +17,6 @@ limitations under the License. */ #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" - #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/multiary.h" diff --git a/paddle/fluid/operators/nanmedian_op.cc b/paddle/fluid/operators/nanmedian_op.cc index 23a497bdb1d..63bfea650ac 100644 --- a/paddle/fluid/operators/nanmedian_op.cc +++ b/paddle/fluid/operators/nanmedian_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/operators/nccl/nccl_gpu_common.cc b/paddle/fluid/operators/nccl/nccl_gpu_common.cc index bcbc96ea1b6..8a0112fa11d 100644 --- a/paddle/fluid/operators/nccl/nccl_gpu_common.cc +++ b/paddle/fluid/operators/nccl/nccl_gpu_common.cc @@ -25,7 +25,7 @@ size_t last_num_gpus = -1; // TODO(panyx0718): Need to decide whether Paddle supports parallel // runs with different number GPUs. If true, current solution is not enough. std::mutex comm_mu; -} +} // namespace int Communicator::GetCommId(int device_id) const { std::lock_guard guard(comm_mu); diff --git a/paddle/fluid/operators/nccl/nccl_op.cu.cc b/paddle/fluid/operators/nccl/nccl_op.cu.cc index 65c3447ff23..b99800ecd64 100644 --- a/paddle/fluid/operators/nccl/nccl_op.cu.cc +++ b/paddle/fluid/operators/nccl/nccl_op.cu.cc @@ -19,9 +19,9 @@ limitations under the License. */ namespace paddle { namespace operators { +using framework::LoDTensor; using framework::Tensor; using platform::Communicator; -using framework::LoDTensor; template class NCCLTypeWrapper; diff --git a/paddle/fluid/operators/nccl/nccl_op_test.cu.cc b/paddle/fluid/operators/nccl/nccl_op_test.cu.cc index 80144c6f258..21649bfcd37 100644 --- a/paddle/fluid/operators/nccl/nccl_op_test.cu.cc +++ b/paddle/fluid/operators/nccl/nccl_op_test.cu.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include #include // NOLINT #include // NOLINT diff --git a/paddle/fluid/operators/nce_op.h b/paddle/fluid/operators/nce_op.h index c8af2415594..38c9b809eb6 100644 --- a/paddle/fluid/operators/nce_op.h +++ b/paddle/fluid/operators/nce_op.h @@ -15,11 +15,13 @@ limitations under the License. */ #pragma once #include + #include #include #include #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/selected_rows_utils.h" diff --git a/paddle/fluid/operators/nll_loss_op.cc b/paddle/fluid/operators/nll_loss_op.cc index 8f14bc10d50..d3cbec495fd 100644 --- a/paddle/fluid/operators/nll_loss_op.cc +++ b/paddle/fluid/operators/nll_loss_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/infermeta/backward.h" diff --git a/paddle/fluid/operators/norm_op.cc b/paddle/fluid/operators/norm_op.cc index 51daccce0e8..0a1f647627a 100644 --- a/paddle/fluid/operators/norm_op.cc +++ b/paddle/fluid/operators/norm_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/infermeta/unary.h" diff --git a/paddle/fluid/operators/norm_utils.cu.h b/paddle/fluid/operators/norm_utils.cu.h index 0ed1f2719de..18ae152a689 100644 --- a/paddle/fluid/operators/norm_utils.cu.h +++ b/paddle/fluid/operators/norm_utils.cu.h @@ -450,27 +450,27 @@ void NormDoubleGradFunctor(const DeviceContext &ctx, set_constant(ctx, dX, static_cast(0)); if (use_global_stats) { if (data_layout == DataLayout::kNHWC) { - DoubleGradComputeDXWithGlobal< - T, DataLayout::kNHWC><<>>( - dy_data, ddscale_data, variance_data, epsilon, C, sample_size, num, - dx_data); + DoubleGradComputeDXWithGlobal + <<>>(dy_data, ddscale_data, + variance_data, epsilon, C, + sample_size, num, dx_data); } else { - DoubleGradComputeDXWithGlobal< - T, DataLayout::kNCHW><<>>( - dy_data, ddscale_data, variance_data, epsilon, C, sample_size, num, - dx_data); + DoubleGradComputeDXWithGlobal + <<>>(dy_data, ddscale_data, + variance_data, epsilon, C, + sample_size, num, dx_data); } } else { if (data_layout == DataLayout::kNHWC) { - DoubleGradComputeDX< - T, block, DataLayout::kNHWC><<>>( - x_data, mean_data, variance_data, ddx_data, dy_data, scale_data, - ddscale_data, N, C, sample_size, epsilon, dx_data); + DoubleGradComputeDX + <<>>( + x_data, mean_data, variance_data, ddx_data, dy_data, scale_data, + ddscale_data, N, C, sample_size, epsilon, dx_data); } else { - DoubleGradComputeDX< - T, block, DataLayout::kNCHW><<>>( - x_data, mean_data, variance_data, ddx_data, dy_data, scale_data, - ddscale_data, N, C, sample_size, epsilon, dx_data); + DoubleGradComputeDX + <<>>( + x_data, mean_data, variance_data, ddx_data, dy_data, scale_data, + ddscale_data, N, C, sample_size, epsilon, dx_data); } } } @@ -479,27 +479,27 @@ void NormDoubleGradFunctor(const DeviceContext &ctx, set_constant(ctx, dScale, static_cast(0)); if (use_global_stats) { if (data_layout == DataLayout::kNHWC) { - DoubleGradComputeDScaleWithGlobal< - T, block, DataLayout::kNHWC><<>>( - ddx_data, variance_data, dy_data, epsilon, N, C, sample_size, - dscale_data); + DoubleGradComputeDScaleWithGlobal + <<>>(ddx_data, variance_data, dy_data, + epsilon, N, C, sample_size, + dscale_data); } else { - DoubleGradComputeDScaleWithGlobal< - T, block, DataLayout::kNCHW><<>>( - ddx_data, variance_data, dy_data, epsilon, N, C, sample_size, - dscale_data); + DoubleGradComputeDScaleWithGlobal + <<>>(ddx_data, variance_data, dy_data, + epsilon, N, C, sample_size, + dscale_data); } } else { if (data_layout == DataLayout::kNHWC) { - DoubleGradComputeDScale< - T, block, DataLayout::kNHWC><<>>( - x_data, mean_data, variance_data, ddx_data, dy_data, N, C, - sample_size, epsilon, dscale_data); + DoubleGradComputeDScale + <<>>( + x_data, mean_data, variance_data, ddx_data, dy_data, N, C, + sample_size, epsilon, dscale_data); } else { - DoubleGradComputeDScale< - T, block, DataLayout::kNCHW><<>>( - x_data, mean_data, variance_data, ddx_data, dy_data, N, C, - sample_size, epsilon, dscale_data); + DoubleGradComputeDScale + <<>>( + x_data, mean_data, variance_data, ddx_data, dy_data, N, C, + sample_size, epsilon, dscale_data); } } } @@ -508,27 +508,29 @@ void NormDoubleGradFunctor(const DeviceContext &ctx, set_constant(ctx, ddY, static_cast(0)); if (use_global_stats) { if (data_layout == DataLayout::kNHWC) { - DoubleGradComputeDDYWithGlobal< - T, DataLayout::kNHWC><<>>( - ddx_data, scale_data, mean_data, variance_data, x_data, ddbias_data, - ddscale_data, epsilon, C, sample_size, num, ddy_data); + DoubleGradComputeDDYWithGlobal + <<>>( + ddx_data, scale_data, mean_data, variance_data, x_data, + ddbias_data, ddscale_data, epsilon, C, sample_size, num, + ddy_data); } else { - DoubleGradComputeDDYWithGlobal< - T, DataLayout::kNCHW><<>>( - ddx_data, scale_data, mean_data, variance_data, x_data, ddbias_data, - ddscale_data, epsilon, C, sample_size, num, ddy_data); + DoubleGradComputeDDYWithGlobal + <<>>( + ddx_data, scale_data, mean_data, variance_data, x_data, + ddbias_data, ddscale_data, epsilon, C, sample_size, num, + ddy_data); } } else { if (data_layout == DataLayout::kNHWC) { - DoubleGradComputeDDY< - T, block, DataLayout::kNHWC><<>>( - x_data, mean_data, variance_data, ddscale_data, ddbias_data, - ddx_data, scale_data, N, C, sample_size, epsilon, ddy_data); + DoubleGradComputeDDY + <<>>( + x_data, mean_data, variance_data, ddscale_data, ddbias_data, + ddx_data, scale_data, N, C, sample_size, epsilon, ddy_data); } else { - DoubleGradComputeDDY< - T, block, DataLayout::kNCHW><<>>( - x_data, mean_data, variance_data, ddscale_data, ddbias_data, - ddx_data, scale_data, N, C, sample_size, epsilon, ddy_data); + DoubleGradComputeDDY + <<>>( + x_data, mean_data, variance_data, ddscale_data, ddbias_data, + ddx_data, scale_data, N, C, sample_size, epsilon, ddy_data); } } } diff --git a/paddle/fluid/operators/norm_utils.h b/paddle/fluid/operators/norm_utils.h index fee06fe5dd4..36370245922 100644 --- a/paddle/fluid/operators/norm_utils.h +++ b/paddle/fluid/operators/norm_utils.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/number_count_op.cu b/paddle/fluid/operators/number_count_op.cu index 923d89c2485..2fc180fe678 100644 --- a/paddle/fluid/operators/number_count_op.cu +++ b/paddle/fluid/operators/number_count_op.cu @@ -97,13 +97,13 @@ class NumberCountOpCUDAKernel : public framework::OpKernel { auto out_data = number_count->mutable_data(out_dims, place); const T* gate_data = numbers->data(); - initialize_zero_kernel< - T><<>>( - out_data, upper_range); + initialize_zero_kernel + <<>>( + out_data, upper_range); - NumberCount< - T><<>>( - gate_data, out_data, batch_size, upper_range); + NumberCount + <<>>( + gate_data, out_data, batch_size, upper_range); } }; diff --git a/paddle/fluid/operators/one_hot_op.cc b/paddle/fluid/operators/one_hot_op.cc index 64323e588c6..e6b6320898f 100644 --- a/paddle/fluid/operators/one_hot_op.cc +++ b/paddle/fluid/operators/one_hot_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/one_hot_op.h" + #include #include diff --git a/paddle/fluid/operators/one_hot_op_npu.cc b/paddle/fluid/operators/one_hot_op_npu.cc index 24b506ebf8a..4e11cbb3888 100644 --- a/paddle/fluid/operators/one_hot_op_npu.cc +++ b/paddle/fluid/operators/one_hot_op_npu.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/one_hot_op.h" - #include "paddle/fluid/platform/device/npu/npu_op_runner.h" namespace paddle { diff --git a/paddle/fluid/operators/one_hot_v2_op.cc b/paddle/fluid/operators/one_hot_v2_op.cc index 122b6a8a80a..cb7b9963bbd 100644 --- a/paddle/fluid/operators/one_hot_v2_op.cc +++ b/paddle/fluid/operators/one_hot_v2_op.cc @@ -14,6 +14,7 @@ #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/fluid/operators/one_hot_v2_op_npu.cc b/paddle/fluid/operators/one_hot_v2_op_npu.cc index e5702a37bb2..dcf098f105c 100644 --- a/paddle/fluid/operators/one_hot_v2_op_npu.cc +++ b/paddle/fluid/operators/one_hot_v2_op_npu.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" - #include "paddle/fluid/platform/device/npu/npu_op_runner.h" namespace paddle { diff --git a/paddle/fluid/operators/optimizers/adagrad_op.cc b/paddle/fluid/operators/optimizers/adagrad_op.cc index 91bad143061..64f22cced3b 100644 --- a/paddle/fluid/operators/optimizers/adagrad_op.cc +++ b/paddle/fluid/operators/optimizers/adagrad_op.cc @@ -15,13 +15,12 @@ limitations under the License. */ #include #include +#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/multiary.h" +#include "paddle/phi/kernels/funcs/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/optimizers/adam_op_npu.cc b/paddle/fluid/operators/optimizers/adam_op_npu.cc index 1ea91f6ebfa..e13805f694b 100644 --- a/paddle/fluid/operators/optimizers/adam_op_npu.cc +++ b/paddle/fluid/operators/optimizers/adam_op_npu.cc @@ -183,16 +183,25 @@ class AdamNPUKernel : public framework::OpKernel { auto stream = ctx.template device_context() .stream(); - const auto& runner = - NpuOpRunner("ApplyAdamD", - { - *param, *mom1, *mom2, *beta1_pow, *beta2_pow, *lr, - *beta1_tensor, *beta2_tensor, *epsilon_tensor, *grad, - }, - { - *param_out, *mom1_out, *mom2_out, - }, - {}); + const auto& runner = NpuOpRunner("ApplyAdamD", + { + *param, + *mom1, + *mom2, + *beta1_pow, + *beta2_pow, + *lr, + *beta1_tensor, + *beta2_tensor, + *epsilon_tensor, + *grad, + }, + { + *param_out, + *mom1_out, + *mom2_out, + }, + {}); runner.Run(stream); // NOTE(zhiqiu): ApplyAdamD updates params inplace, so diff --git a/paddle/fluid/operators/optimizers/adam_op_xpu.cc b/paddle/fluid/operators/optimizers/adam_op_xpu.cc index 6ea0b2054cd..37467c7ba96 100644 --- a/paddle/fluid/operators/optimizers/adam_op_xpu.cc +++ b/paddle/fluid/operators/optimizers/adam_op_xpu.cc @@ -306,8 +306,9 @@ class AdamOpXPUKernel : public framework::OpKernel { } xpu_wait(dev_ctx.x_context()->xpu_stream); } else { - PADDLE_ENFORCE_EQ(1, 2, platform::errors::InvalidArgument( - "Variable type not supported by adam_op")); + PADDLE_ENFORCE_EQ(1, 2, + platform::errors::InvalidArgument( + "Variable type not supported by adam_op")); } } }; diff --git a/paddle/fluid/operators/optimizers/adamw_op.cc b/paddle/fluid/operators/optimizers/adamw_op.cc index e2670625d4e..43e9dc0cae8 100644 --- a/paddle/fluid/operators/optimizers/adamw_op.cc +++ b/paddle/fluid/operators/optimizers/adamw_op.cc @@ -12,9 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/operators/optimizers/adam_op.h" - #include "paddle/fluid/framework/infershape_utils.h" +#include "paddle/fluid/operators/optimizers/adam_op.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/multiary.h" diff --git a/paddle/fluid/operators/optimizers/adamw_op_xpu.cc b/paddle/fluid/operators/optimizers/adamw_op_xpu.cc index d86d2bd2ffb..57a6b744fd6 100644 --- a/paddle/fluid/operators/optimizers/adamw_op_xpu.cc +++ b/paddle/fluid/operators/optimizers/adamw_op_xpu.cc @@ -205,8 +205,9 @@ class AdamwOpXPUKernel : public framework::OpKernel { } } } else { - PADDLE_ENFORCE_EQ(1, 2, platform::errors::InvalidArgument( - "Variable type not supported by adamw_op")); + PADDLE_ENFORCE_EQ(1, 2, + platform::errors::InvalidArgument( + "Variable type not supported by adamw_op")); } } }; diff --git a/paddle/fluid/operators/optimizers/cast_with_ptr.h b/paddle/fluid/operators/optimizers/cast_with_ptr.h index a3fbb0e59e2..eb031ae0c93 100644 --- a/paddle/fluid/operators/optimizers/cast_with_ptr.h +++ b/paddle/fluid/operators/optimizers/cast_with_ptr.h @@ -43,9 +43,9 @@ static void VecCastKernel(const platform::CUDADeviceContext &ctx, const InT *x, in_arr[0] = reinterpret_cast(x); phi::Array<_ptr_ OutT *, 1> out_arr; out_arr[0] = y; - phi::funcs::VectorizedElementwiseKernel< - OutT, FunctorT, 1, 1, VecSize><<>>( - in_arr, out_arr, n, main_offset, FunctorT()); + phi::funcs::VectorizedElementwiseKernel + <<>>(in_arr, out_arr, n, main_offset, + FunctorT()); } } // namespace details diff --git a/paddle/fluid/operators/optimizers/dgc_momentum_op.cc b/paddle/fluid/operators/optimizers/dgc_momentum_op.cc index 7f0b2b7d064..40ac044e647 100644 --- a/paddle/fluid/operators/optimizers/dgc_momentum_op.cc +++ b/paddle/fluid/operators/optimizers/dgc_momentum_op.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include - #include "paddle/fluid/operators/optimizers/dgc_momentum_op.h" +#include + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/optimizers/distributed_fused_lamb_init_op.cu b/paddle/fluid/operators/optimizers/distributed_fused_lamb_init_op.cu index 3688b8067c2..7cbc52f4235 100644 --- a/paddle/fluid/operators/optimizers/distributed_fused_lamb_init_op.cu +++ b/paddle/fluid/operators/optimizers/distributed_fused_lamb_init_op.cu @@ -83,10 +83,12 @@ static void GetParamGradShardInfo(const std::vector &infos, VLOG(10) << "start_size = " << start_size << " , end_size = " << end_size; if (infos.empty()) { - PADDLE_ENFORCE_EQ(start_size, 0, platform::errors::InvalidArgument( - "start_size should be 0.")); - PADDLE_ENFORCE_EQ(end_size, 0, platform::errors::InvalidArgument( - "end_size should be 0.")); + PADDLE_ENFORCE_EQ( + start_size, 0, + platform::errors::InvalidArgument("start_size should be 0.")); + PADDLE_ENFORCE_EQ( + end_size, 0, + platform::errors::InvalidArgument("end_size should be 0.")); *start_idx = 0; *end_idx = 0; *start_numel_offset = 0; @@ -104,15 +106,17 @@ static void GetParamGradShardInfo(const std::vector &infos, infos.begin()); if (i == n || infos[i].numel_offset != start_size) { PADDLE_ENFORCE_GT( - i, 0, platform::errors::InvalidArgument( - "Cannot find suitable sharding which is between [%d, %d)", - start_size, end_size)); + i, 0, + platform::errors::InvalidArgument( + "Cannot find suitable sharding which is between [%d, %d)", + start_size, end_size)); --i; } PADDLE_ENFORCE_LT( - i, n, platform::errors::InvalidArgument( - "Cannot find suitable sharding which is between [%d, %d)", - start_size, end_size)); + i, n, + platform::errors::InvalidArgument( + "Cannot find suitable sharding which is between [%d, %d)", start_size, + end_size)); *start_idx = i; *start_numel_offset = start_size - infos[i].numel_offset; auto j = static_cast( @@ -450,8 +454,9 @@ class DistributedFusedLambInitOpKernel platform::errors::InvalidArgument( "The attr(alignment) should be the power of 2.")); PADDLE_ENFORCE_GE( - rank, 0, platform::errors::InvalidArgument( - "The attr(rank) should be equal to or larger than 0.")); + rank, 0, + platform::errors::InvalidArgument( + "The attr(rank) should be equal to or larger than 0.")); PADDLE_ENFORCE_LT( rank, nranks, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cu b/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cu index c857c6de4d0..eb354ef6d75 100644 --- a/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cu +++ b/paddle/fluid/operators/optimizers/distributed_fused_lamb_op.cu @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "paddle/fluid/memory/buffer.h" #include "paddle/fluid/operators/amp/fp16_type_traits.h" #include "paddle/fluid/operators/optimizers/cast_with_ptr.h" @@ -32,6 +33,7 @@ #ifdef __HIPCC__ #include + #include "math.h" // NOLINT namespace cub = hipcub; #endif @@ -190,9 +192,8 @@ static void MultiTensorL2Norm(const platform::CUDAPlace &place, PD_VEC_LAUNCH_KERNEL(vec_size, PD_LAUNCH_MULTI_TENSOR_APPLY_L2_NORM_KERNEL); #undef PD_LAUNCH_MULTI_TENSOR_APPLY_L2_NORM_KERNEL - MultiTensorL2NormReduceAgainCUDAKernel< - MT, OutT, kBlockDim><<>>(tmp_out_ptr, y, - max_chunk_num); + MultiTensorL2NormReduceAgainCUDAKernel + <<>>(tmp_out_ptr, y, max_chunk_num); } template @@ -508,14 +509,14 @@ static void MultiTensorUpdateLambMomentAndTrustRatioDiv( "Output(Step) cannot be nullptr.")); } -#define PD_LAUNCH_LAMB_MOM_TRUST_RATIO_DIV_KERNEL \ - do { \ - UpdateLambMomentAndTrustRatioDivCUDAKernel<<< \ - config.block_per_grid, config.thread_per_block, 0, stream>>>( \ - param_p, grad_p, square_grad_norm_p, global_scale, beta1pow_p, \ - beta2pow_p, mom1_p, mom2_p, trust_ratio_div_p, found_inf_p, step, \ - weight_decay, weight_decay_end_numel, beta1, beta2, epsilon, \ - max_global_grad_norm, numel, rescale_grad); \ +#define PD_LAUNCH_LAMB_MOM_TRUST_RATIO_DIV_KERNEL \ + do { \ + UpdateLambMomentAndTrustRatioDivCUDAKernel \ + <<>>( \ + param_p, grad_p, square_grad_norm_p, global_scale, beta1pow_p, \ + beta2pow_p, mom1_p, mom2_p, trust_ratio_div_p, found_inf_p, step, \ + weight_decay, weight_decay_end_numel, beta1, beta2, epsilon, \ + max_global_grad_norm, numel, rescale_grad); \ } while (0) PD_VEC_LAUNCH_KERNEL(vec_size, PD_LAUNCH_LAMB_MOM_TRUST_RATIO_DIV_KERNEL); @@ -705,8 +706,9 @@ static void MultiTensorUpdateLambParamAndBetaPows( PADDLE_ENFORCE_NOT_NULL(beta2pow, platform::errors::InvalidArgument( "Beta2Pow should not be nullptr.")); } else { - PADDLE_ENFORCE_EQ(beta2pow, nullptr, platform::errors::InvalidArgument( - "Beta2Pow should be nullptr.")); + PADDLE_ENFORCE_EQ( + beta2pow, nullptr, + platform::errors::InvalidArgument("Beta2Pow should be nullptr.")); } const int block_dim = 512; @@ -744,21 +746,21 @@ static void MultiTensorUpdateLambParamAndBetaPows( betapow_helper); \ } while (0) -#define PD_LAUNCH_VEC_MULTI_TENSOR_UPDATE_PARAM_BETAPOW_CASE \ - do { \ - auto callback = [&]( \ - const MultiTensorLauncher &launcher, \ - int launch_n) { \ - if (has_beta_pow && launch_n == 0) { \ - PD_LAUNCH_MULTI_TENSOR_UPDATE_PARAM_BETAPOW(true); \ - beta1pow = nullptr; \ - beta2pow = nullptr; \ - } else { \ - PD_LAUNCH_MULTI_TENSOR_UPDATE_PARAM_BETAPOW(false); \ - } \ - }; \ - MultiTensorApplyWithCallback( \ - stream, offsets, n, chunk_size, block_dim, callback); \ +#define PD_LAUNCH_VEC_MULTI_TENSOR_UPDATE_PARAM_BETAPOW_CASE \ + do { \ + auto callback = \ + [&](const MultiTensorLauncher &launcher, \ + int launch_n) { \ + if (has_beta_pow && launch_n == 0) { \ + PD_LAUNCH_MULTI_TENSOR_UPDATE_PARAM_BETAPOW(true); \ + beta1pow = nullptr; \ + beta2pow = nullptr; \ + } else { \ + PD_LAUNCH_MULTI_TENSOR_UPDATE_PARAM_BETAPOW(false); \ + } \ + }; \ + MultiTensorApplyWithCallback( \ + stream, offsets, n, chunk_size, block_dim, callback); \ } while (0) PD_VEC_LAUNCH_KERNEL(vec_size, @@ -793,11 +795,11 @@ static void LaunchScaleKernel(const platform::CUDADeviceContext &dev_ctx, int vec_size = std::min(GetChunkedVecSize(x, 0), GetChunkedVecSize(y, 0)); auto config = platform::GetGpuLaunchConfig1D(dev_ctx, n, vec_size); -#define PD_LAMB_VEC_SCALE_KERNEL_CASE \ - do { \ - ScaleCUDAKernel<<>>( \ - x, scale, y, n); \ +#define PD_LAMB_VEC_SCALE_KERNEL_CASE \ + do { \ + ScaleCUDAKernel \ + <<>>( \ + x, scale, y, n); \ } while (0) PD_VEC_LAUNCH_KERNEL(vec_size, PD_LAMB_VEC_SCALE_KERNEL_CASE); @@ -1015,7 +1017,7 @@ static void CheckHasNanInfGrad(const float *fp32_grad, int fp32_numel, if (fp32_numel > 0) { fp32_has_nan_inf = reinterpret_cast(nan_inf_flag + 1); cub::TransformInputIterator, const float *> - iter(fp32_grad, IsNanInfFunctor()); + iter(fp32_grad, IsNanInfFunctor()); CubDeviceReduce(iter, fp32_has_nan_inf, fp32_numel, OrFunctor(), false, stream, cub_tmp_buffer); } @@ -1082,11 +1084,11 @@ static void LaunchElementwiseAddWithCastKernel( GetChunkedVecSize(z, 0)); auto config = platform::GetGpuLaunchConfig1D(dev_ctx, n, vec_size); -#define PD_LAUNCH_ELEMENTWISE_ADD_WITH_CAST_KERNEL \ - do { \ - ElementwiseAddWithCastCUDAKernel<<< \ - config.block_per_grid, config.thread_per_block, 0, stream>>>(x, y, z, \ - n); \ +#define PD_LAUNCH_ELEMENTWISE_ADD_WITH_CAST_KERNEL \ + do { \ + ElementwiseAddWithCastCUDAKernel \ + <<>>(x, y, \ + z, n); \ } while (0) PD_VEC_LAUNCH_KERNEL(vec_size, PD_LAUNCH_ELEMENTWISE_ADD_WITH_CAST_KERNEL); @@ -1445,10 +1447,10 @@ class DistributedFusedLambOpKernel if (is_grad_scaled_by_nranks) { clip_scale *= num_devices; } - CalcGradNormClipBeforeAllReduceScale< - float, platform::float16><<<1, 1, 0, stream>>>( - global_scale, max_global_grad_norm, fp32_square_grad_norm, - fp32_scale, fp16_scale, clip_scale); + CalcGradNormClipBeforeAllReduceScale + <<<1, 1, 0, stream>>>(global_scale, max_global_grad_norm, + fp32_square_grad_norm, fp32_scale, fp16_scale, + clip_scale); if (fp32_scale) { VLOG(1) << "Grad scale: " << FlattenToString(fp32_scale, 1, place); } else { @@ -1567,11 +1569,12 @@ class DistributedFusedLambOpKernel fp16_partial_fused_offsets, fp16_local_param_num, param_square_norm + fp16_local_start_idx); } else { - MultiTensorL2Norm( - place, stream, fp16_param + fused_offsets[fp16_local_start_idx] - - fused_offsets[fp32_global_param_num], - fused_offsets + fp16_local_start_idx, fp16_local_param_num, - param_square_norm + fp16_local_start_idx); + MultiTensorL2Norm(place, stream, + fp16_param + fused_offsets[fp16_local_start_idx] - + fused_offsets[fp32_global_param_num], + fused_offsets + fp16_local_start_idx, + fp16_local_param_num, + param_square_norm + fp16_local_start_idx); } MultiTensorL2Norm(place, stream, trust_ratio_div, diff --git a/paddle/fluid/operators/optimizers/dpsgd_op.h b/paddle/fluid/operators/optimizers/dpsgd_op.h index 688a7f1ad84..69a853c5d18 100644 --- a/paddle/fluid/operators/optimizers/dpsgd_op.h +++ b/paddle/fluid/operators/optimizers/dpsgd_op.h @@ -15,7 +15,9 @@ limitations under the License. */ #pragma once #include #include + #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" @@ -108,9 +110,8 @@ class DpsgdOpKernel : public framework::OpKernel { // update parameters for (int64_t i = 0; i < grad->numel(); ++i) { - out_data[i] = - param_data[i] - - lr[0] * (grad_data[i] / scale + gaussian_noise / batch_size); + out_data[i] = param_data[i] - lr[0] * (grad_data[i] / scale + + gaussian_noise / batch_size); } // CCS16 - Deep Learning with Differential Privacy. // [https://arxiv.org/abs/1607.00133] diff --git a/paddle/fluid/operators/optimizers/ftrl_op.h b/paddle/fluid/operators/optimizers/ftrl_op.h index 596ed05df3f..73fd7ceb67b 100644 --- a/paddle/fluid/operators/optimizers/ftrl_op.h +++ b/paddle/fluid/operators/optimizers/ftrl_op.h @@ -74,9 +74,8 @@ class SparseFTRLFunctor { l_acc_out_[j] += g - (std::sqrt(new_acc) - std::sqrt(s_acc)) / lr * p; } else { l_acc_out_[j] += - g - - (std::pow(new_acc, -lr_power_) - std::pow(s_acc, -lr_power_)) / lr * - p; + g - (std::pow(new_acc, -lr_power_) - std::pow(s_acc, -lr_power_)) / + lr * p; } auto l_acc = l_acc_out_[j]; diff --git a/paddle/fluid/operators/optimizers/lamb_op.cc b/paddle/fluid/operators/optimizers/lamb_op.cc index 48ceba3695f..fb2a78d28ed 100644 --- a/paddle/fluid/operators/optimizers/lamb_op.cc +++ b/paddle/fluid/operators/optimizers/lamb_op.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/optimizers/lamb_op.h" + #include + #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { @@ -227,13 +229,12 @@ REGISTER_OP_CPU_KERNEL( ops::LambOpKernel); /* ========================== register checkpoint ===========================*/ -REGISTER_OP_VERSION(lamb) - .AddCheckpoint( - R"ROC(Upgrade lamb, add two new outputs [Beta1PowOut] and [Beta2PowOut].)ROC", - paddle::framework::compatible::OpVersionDesc() - .NewInput("Beta1PowOut", - "The Output beta1 power accumulator. 'Beta1PowOut' is " - "dispensable.") - .NewInput("Beta2PowOut", - "The Output beta2 power accumulator. 'Beta2PowOut' is " - "dispensable.")); +REGISTER_OP_VERSION(lamb).AddCheckpoint( + R"ROC(Upgrade lamb, add two new outputs [Beta1PowOut] and [Beta2PowOut].)ROC", + paddle::framework::compatible::OpVersionDesc() + .NewInput("Beta1PowOut", + "The Output beta1 power accumulator. 'Beta1PowOut' is " + "dispensable.") + .NewInput("Beta2PowOut", + "The Output beta2 power accumulator. 'Beta2PowOut' is " + "dispensable.")); diff --git a/paddle/fluid/operators/optimizers/lamb_op.cu b/paddle/fluid/operators/optimizers/lamb_op.cu index b46fa19ea13..a9f880fdbb6 100644 --- a/paddle/fluid/operators/optimizers/lamb_op.cu +++ b/paddle/fluid/operators/optimizers/lamb_op.cu @@ -16,7 +16,8 @@ limitations under the License. */ namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL( - lamb, ops::LambOpKernel, + lamb, + ops::LambOpKernel, ops::LambOpKernel, ops::LambOpKernel); diff --git a/paddle/fluid/operators/optimizers/lamb_op.h b/paddle/fluid/operators/optimizers/lamb_op.h index 45acf2b3e48..2956ff20467 100644 --- a/paddle/fluid/operators/optimizers/lamb_op.h +++ b/paddle/fluid/operators/optimizers/lamb_op.h @@ -14,8 +14,10 @@ limitations under the License. */ #pragma once #include // for sqrt in CPU and CUDA + #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/buffer.h" #include "paddle/fluid/operators/amp/fp16_type_traits.h" @@ -655,10 +657,10 @@ class LambOpKernel : public framework::OpKernel { // TODO(zengjinle): remove the following Eigen operations when // *skip_update == true. memory::Buffer buffer(dev_ctx.GetPlace()); - math::SquaredL2Norm( - dev_ctx, reinterpret_cast(IsMultiPrecision ? master_param_ptr - : param_ptr), - p_norm_ptr, numel, &buffer); + math::SquaredL2Norm(dev_ctx, + reinterpret_cast( + IsMultiPrecision ? master_param_ptr : param_ptr), + p_norm_ptr, numel, &buffer); math::SquaredL2Norm(dev_ctx, trust_ratio_div_ptr, trust_ratio_div_norm_ptr, numel, &buffer); @@ -675,12 +677,12 @@ class LambOpKernel : public framework::OpKernel { #define CALL_PADDLE_UPDATE_LAMB_PARAM_FUNC(__should_update_beta_pow) \ do { \ LambParamUpateFunctor \ - param_update_functor( \ - lr.template data(), static_cast(param_ptr), \ - static_cast(master_param_ptr), p_norm_ptr, \ - trust_ratio_div_ptr, trust_ratio_div_norm_ptr, \ - static_cast(param_out_ptr), \ - static_cast(master_param_out_ptr), skip_update_flag); \ + param_update_functor( \ + lr.template data(), static_cast(param_ptr), \ + static_cast(master_param_ptr), p_norm_ptr, \ + trust_ratio_div_ptr, trust_ratio_div_norm_ptr, \ + static_cast(param_out_ptr), \ + static_cast(master_param_out_ptr), skip_update_flag); \ if (__should_update_beta_pow) { \ param_update_functor.SetBetaPows(beta1_pow_ptr, beta2_pow_ptr, \ beta1_pow_out_ptr, beta2_pow_out_ptr, \ diff --git a/paddle/fluid/operators/optimizers/lamb_op_xpu.cc b/paddle/fluid/operators/optimizers/lamb_op_xpu.cc index 7aa5783a01b..ef224382cd0 100644 --- a/paddle/fluid/operators/optimizers/lamb_op_xpu.cc +++ b/paddle/fluid/operators/optimizers/lamb_op_xpu.cc @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/optimizers/lamb_op.h" #include "gflags/gflags.h" +#include "paddle/fluid/operators/optimizers/lamb_op.h" #include "paddle/fluid/platform/device/device_wrapper.h" namespace paddle { diff --git a/paddle/fluid/operators/optimizers/lars_momentum_op.cu b/paddle/fluid/operators/optimizers/lars_momentum_op.cu index 5b883a11e57..553ac69edca 100644 --- a/paddle/fluid/operators/optimizers/lars_momentum_op.cu +++ b/paddle/fluid/operators/optimizers/lars_momentum_op.cu @@ -129,8 +129,9 @@ __device__ inline void VectorizeLarsUpdate( for (int i = tid + tail_offset; i < numel; i += grid_stride) { MT grad_val = static_cast(grad[i]) * rescale_grad; MT param_val = param[i]; - MT velocity_tmp = Fma(velocity[i], mu, local_lr * Fma(lars_weight_decay, - param_val, grad_val)); + MT velocity_tmp = + Fma(velocity[i], mu, + local_lr * Fma(lars_weight_decay, param_val, grad_val)); MT param_tmp = param_val - velocity_tmp; param_out[i] = static_cast(param_tmp); velocity_out[i] = velocity_tmp; @@ -314,10 +315,10 @@ inline void SeparatedLarsMomentumOpCUDAKernel( const MT rescale_grad, const int64_t numel, const MT* master_param_data, MT* master_out_data, const bool is_amp) { LarsThreadConfig lars_thread_config(numel); - L2NormKernel<<>>( - param_data, grad_data, p_buffer, g_buffer, numel, - lars_thread_config.repeat_times, rescale_grad); + L2NormKernel + <<>>(param_data, grad_data, p_buffer, g_buffer, numel, + lars_thread_config.repeat_times, rescale_grad); MomentumLarsKernel<<>>( diff --git a/paddle/fluid/operators/optimizers/merged_momentum_op_mlu.cc b/paddle/fluid/operators/optimizers/merged_momentum_op_mlu.cc index 54ead6d3df7..280c0930e91 100644 --- a/paddle/fluid/operators/optimizers/merged_momentum_op_mlu.cc +++ b/paddle/fluid/operators/optimizers/merged_momentum_op_mlu.cc @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/operators/optimizers/merged_momentum_op.h" #include "paddle/fluid/operators/mlu/mlu_baseop.h" +#include "paddle/fluid/operators/optimizers/merged_momentum_op.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/optimizers/merged_momentum_op_npu.cc b/paddle/fluid/operators/optimizers/merged_momentum_op_npu.cc index 5fad5eca9af..d405500d607 100644 --- a/paddle/fluid/operators/optimizers/merged_momentum_op_npu.cc +++ b/paddle/fluid/operators/optimizers/merged_momentum_op_npu.cc @@ -13,7 +13,6 @@ // limitations under the License. #include "paddle/fluid/operators/optimizers/merged_momentum_op.h" - #include "paddle/fluid/platform/device/npu/npu_op_runner.h" #include "paddle/phi/kernels/impl/momentum_kernel_impl.h" @@ -151,10 +150,11 @@ class NPUMergedMomentumOpKernel : public framework::OpKernel { framework::TensorCopy(*param, ctx.GetPlace(), dev_ctx, param_out); framework::TensorCopy(*velocity, ctx.GetPlace(), dev_ctx, velocity_out); // NOTE: ApplyMomentum will change the input - const auto& runner = NpuOpRunner( - "ApplyMomentum", {*param_out, *velocity_out, *learning_rate, - regularized_grad, mu_tensor}, - {*param_out}, {{"use_nesterov", use_nesterov}}); + const auto& runner = + NpuOpRunner("ApplyMomentum", + {*param_out, *velocity_out, *learning_rate, + regularized_grad, mu_tensor}, + {*param_out}, {{"use_nesterov", use_nesterov}}); runner.Run(dev_ctx.stream()); } } diff --git a/paddle/fluid/operators/optimizers/momentum_op.cc b/paddle/fluid/operators/optimizers/momentum_op.cc index 50d2c946f3a..94fb4c156ef 100644 --- a/paddle/fluid/operators/optimizers/momentum_op.cc +++ b/paddle/fluid/operators/optimizers/momentum_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/optimizers/momentum_op.h" + #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { @@ -109,28 +110,26 @@ REGISTER_OPERATOR( paddle::framework::EmptyGradOpMaker, ops::MomentumOpInferVarType); -REGISTER_OP_VERSION(momentum) - .AddCheckpoint( - R"ROC( +REGISTER_OP_VERSION(momentum).AddCheckpoint( + R"ROC( Upgrade momentum add 4 attributes [regularization_method, regularization_coeff, multi_precision, rescale_grad]. )ROC", - paddle::framework::compatible::OpVersionDesc() - .NewInput("MasterParam", "FP32 master weight for AMP.") - .NewOutput("MasterParamOut", - "The updated FP32 master weight for AMP. " - "It shared memory with Input(MasterParam).") - .NewAttr("regularization_method", - "(string) regularization_method, right now only support " - "l2decay or none", - std::string("")) - .NewAttr("regularization_coeff", "(float) regularization_coeff", - 0.0f) - .NewAttr( - "multi_precision", - "(bool) Whether to use multi-precision during weight updating.", - false) - .NewAttr("rescale_grad", - "(float) Multiply the gradient with `rescale_grad`" - "before updating. Often choose to be `1.0/batch_size`.", - 1.0f)); + paddle::framework::compatible::OpVersionDesc() + .NewInput("MasterParam", "FP32 master weight for AMP.") + .NewOutput("MasterParamOut", + "The updated FP32 master weight for AMP. " + "It shared memory with Input(MasterParam).") + .NewAttr("regularization_method", + "(string) regularization_method, right now only support " + "l2decay or none", + std::string("")) + .NewAttr("regularization_coeff", "(float) regularization_coeff", 0.0f) + .NewAttr( + "multi_precision", + "(bool) Whether to use multi-precision during weight updating.", + false) + .NewAttr("rescale_grad", + "(float) Multiply the gradient with `rescale_grad`" + "before updating. Often choose to be `1.0/batch_size`.", + 1.0f)); diff --git a/paddle/fluid/operators/optimizers/momentum_op.h b/paddle/fluid/operators/optimizers/momentum_op.h index 017f33d7458..2f6a9758a2c 100644 --- a/paddle/fluid/operators/optimizers/momentum_op.h +++ b/paddle/fluid/operators/optimizers/momentum_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/amp/fp16_type_traits.h" diff --git a/paddle/fluid/operators/optimizers/momentum_op_mlu.cc b/paddle/fluid/operators/optimizers/momentum_op_mlu.cc index b8fa81b2e71..417f89410cf 100644 --- a/paddle/fluid/operators/optimizers/momentum_op_mlu.cc +++ b/paddle/fluid/operators/optimizers/momentum_op_mlu.cc @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/optimizers/momentum_op.h" #include "paddle/fluid/operators/mlu/mlu_baseop.h" +#include "paddle/fluid/operators/optimizers/momentum_op.h" #include "paddle/phi/kernels/impl/momentum_kernel_impl.h" namespace paddle { @@ -77,8 +77,9 @@ class MLUMomentumOpKernel : public framework::OpKernel { GetBasePtr(learning_rate), GetBasePtr(&mu_tensor), GetBasePtr(param_out), GetBasePtr(velocity_out)); } else if (grad_var->IsType()) { - PADDLE_ENFORCE_EQ(false, true, platform::errors::PermissionDenied( - "Unsupport SparseMomentum")); + PADDLE_ENFORCE_EQ( + false, true, + platform::errors::PermissionDenied("Unsupport SparseMomentum")); } else { PADDLE_ENFORCE_EQ(false, true, platform::errors::PermissionDenied( diff --git a/paddle/fluid/operators/optimizers/momentum_op_npu.cc b/paddle/fluid/operators/optimizers/momentum_op_npu.cc index 2d73766b973..d3ffeb18be7 100644 --- a/paddle/fluid/operators/optimizers/momentum_op_npu.cc +++ b/paddle/fluid/operators/optimizers/momentum_op_npu.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/optimizers/momentum_op.h" - #include "paddle/fluid/operators/optimizers/sgd_op.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" #include "paddle/phi/kernels/impl/momentum_kernel_impl.h" @@ -70,14 +69,16 @@ class NPUMomentumOpKernel : public framework::OpKernel { framework::TensorCopy(*param, ctx.GetPlace(), dev_ctx, param_out); framework::TensorCopy(*velocity, ctx.GetPlace(), dev_ctx, velocity_out); // NOTE: ApplyMomentum will change the input - const auto& runner = NpuOpRunner( - "ApplyMomentum", {*param_out, *velocity_out, *learning_rate, - regularized_grad, mu_tensor}, - {*param_out}, {{"use_nesterov", use_nesterov}}); + const auto& runner = + NpuOpRunner("ApplyMomentum", + {*param_out, *velocity_out, *learning_rate, + regularized_grad, mu_tensor}, + {*param_out}, {{"use_nesterov", use_nesterov}}); runner.Run(dev_ctx.stream()); } else if (grad_var->IsType()) { - PADDLE_ENFORCE_EQ(false, true, platform::errors::PermissionDenied( - "Unsupport SparseMomentum")); + PADDLE_ENFORCE_EQ( + false, true, + platform::errors::PermissionDenied("Unsupport SparseMomentum")); } else { PADDLE_ENFORCE_EQ(false, true, platform::errors::PermissionDenied( diff --git a/paddle/fluid/operators/optimizers/momentum_op_xpu.cc b/paddle/fluid/operators/optimizers/momentum_op_xpu.cc index 6897213c91a..749d38f315e 100644 --- a/paddle/fluid/operators/optimizers/momentum_op_xpu.cc +++ b/paddle/fluid/operators/optimizers/momentum_op_xpu.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #ifdef PADDLE_WITH_XPU #include + #include "paddle/fluid/operators/optimizers/sgd_op.h" #include "paddle/fluid/platform/device/device_wrapper.h" namespace paddle { diff --git a/paddle/fluid/operators/optimizers/multi_tensor_apply.h b/paddle/fluid/operators/optimizers/multi_tensor_apply.h index 179e8f45254..98850aa816b 100644 --- a/paddle/fluid/operators/optimizers/multi_tensor_apply.h +++ b/paddle/fluid/operators/optimizers/multi_tensor_apply.h @@ -15,6 +15,7 @@ #pragma once #include + #include "math.h" // NOLINT namespace paddle { @@ -108,11 +109,11 @@ class MultiTensorLauncher { stream_(stream) {} template - void Launch(Functor &&functor, Args &&... args) const { - MultiTensorApplyCUDAKernel< - Functor, MaxTensorNumPerLaunch, - MaxChunkNumPerLaunch><<>>( - functor, meta_, chunk_size_, args...); + void Launch(Functor &&functor, Args &&...args) const { + MultiTensorApplyCUDAKernel + <<>>(functor, meta_, chunk_size_, + args...); } private: @@ -189,7 +190,7 @@ template static void MultiTensorApply(Functor functor, gpuStream_t stream, const int *offsets, int n, int chunk_size, - int block_dim, Args &&... args) { + int block_dim, Args &&...args) { auto callback = [&](const MultiTensorLauncher &launcher, int i) { launcher.Launch(functor, args...); }; diff --git a/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.cc b/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.cc index 6893e5d6b9b..5eeeb735307 100644 --- a/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.cc +++ b/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.h" + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/float16.h" diff --git a/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.h b/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.h index 74cf7627450..353d8777a84 100644 --- a/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.h +++ b/paddle/fluid/operators/optimizers/pow2_decay_with_linear_warmup_op.h @@ -47,9 +47,8 @@ struct Pow2DecayWithLinearWarmupFunctor { auto new_lr = static_cast(step) / warmup_steps_ * base_lr_; *lr_ = static_cast(new_lr); } else if (step < total_steps_) { - auto factor = 1 - - static_cast(step - warmup_steps_) / - (total_steps_ - warmup_steps_); + auto factor = 1 - static_cast(step - warmup_steps_) / + (total_steps_ - warmup_steps_); auto new_lr = static_cast(base_lr_ - end_lr_) * (factor * factor) + end_lr_; *lr_ = static_cast(new_lr); @@ -76,9 +75,10 @@ class Pow2DecayWithLinearWarmupOpKernel : public framework::OpKernel { auto *lr_out = ctx.Output("LearningRateOut"); auto *step_out = ctx.Output("StepOut"); PADDLE_ENFORCE_EQ( - lr, lr_out, platform::errors::InvalidArgument("Input(LearningRate) and " - "Output(LearningRateOut) " - "must be the same.")); + lr, lr_out, + platform::errors::InvalidArgument("Input(LearningRate) and " + "Output(LearningRateOut) " + "must be the same.")); PADDLE_ENFORCE_NOT_NULL(lr, platform::errors::InvalidArgument( "Input(LearingRate) should not be nullptr.")); diff --git a/paddle/fluid/operators/optimizers/rmsprop_op.cc b/paddle/fluid/operators/optimizers/rmsprop_op.cc index b3458724482..874e21cc6cc 100644 --- a/paddle/fluid/operators/optimizers/rmsprop_op.cc +++ b/paddle/fluid/operators/optimizers/rmsprop_op.cc @@ -12,9 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/framework/op_registry.h" - #include "paddle/fluid/framework/infershape_utils.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/multiary.h" diff --git a/paddle/fluid/operators/optimizers/rmsprop_op_xpu.cc b/paddle/fluid/operators/optimizers/rmsprop_op_xpu.cc index b53d51686cf..7f4810ea420 100644 --- a/paddle/fluid/operators/optimizers/rmsprop_op_xpu.cc +++ b/paddle/fluid/operators/optimizers/rmsprop_op_xpu.cc @@ -15,7 +15,9 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include + #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/device_wrapper.h" diff --git a/paddle/fluid/operators/optimizers/sgd_op.cc b/paddle/fluid/operators/optimizers/sgd_op.cc index a2af131cb50..b5822fd5c44 100644 --- a/paddle/fluid/operators/optimizers/sgd_op.cc +++ b/paddle/fluid/operators/optimizers/sgd_op.cc @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include - #include "paddle/fluid/operators/optimizers/sgd_op.h" + +#include #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif @@ -76,10 +76,11 @@ class SGDOpInferVarType : public framework::VarTypeInference { auto in_var_type = ctx->GetInputType("Param"); PADDLE_ENFORCE_EQ(in_var_type == framework::proto::VarType::SELECTED_ROWS || in_var_type == framework::proto::VarType::LOD_TENSOR, - true, platform::errors::InvalidArgument( - "The input Var's type should be LoDtensor or " - "SelectedRows, but the received type is %s", - in_var_type)); + true, + platform::errors::InvalidArgument( + "The input Var's type should be LoDtensor or " + "SelectedRows, but the received type is %s", + in_var_type)); ctx->SetOutputType("ParamOut", in_var_type, framework::ALL_ELEMENTS); } diff --git a/paddle/fluid/operators/optimizers/sgd_op.cu b/paddle/fluid/operators/optimizers/sgd_op.cu index 222244a2fd1..ba2e84a6a78 100644 --- a/paddle/fluid/operators/optimizers/sgd_op.cu +++ b/paddle/fluid/operators/optimizers/sgd_op.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/operators/amp/fp16_type_traits.h" #include "paddle/fluid/operators/optimizers/sgd_op.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" @@ -106,11 +107,11 @@ class SGDOpKernel int block = 512; int grid = (param->numel() + block - 1) / block; - SGDKernelMT< - T, MPDType><<>>( - param->data(), grad->data(), learning_rate->data(), - param->numel(), param_out->mutable_data(ctx.GetPlace()), - master_in_data, master_out_data); + SGDKernelMT + <<>>( + param->data(), grad->data(), learning_rate->data(), + param->numel(), param_out->mutable_data(ctx.GetPlace()), + master_in_data, master_out_data); } else if (grad_var->IsType()) { // TODO(qijun): In Sparse SGD operator, in-place update is enforced. diff --git a/paddle/fluid/operators/optimizers/sgd_op_xpu.cc b/paddle/fluid/operators/optimizers/sgd_op_xpu.cc index e7c03be95ca..7203357db10 100644 --- a/paddle/fluid/operators/optimizers/sgd_op_xpu.cc +++ b/paddle/fluid/operators/optimizers/sgd_op_xpu.cc @@ -12,8 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/optimizers/sgd_op.h" #include + +#include "paddle/fluid/operators/optimizers/sgd_op.h" #include "paddle/fluid/platform/device/device_wrapper.h" namespace paddle { diff --git a/paddle/fluid/operators/optimizers/sparse_momentum_op.cc b/paddle/fluid/operators/optimizers/sparse_momentum_op.cc index c38545df173..0c4fa916f43 100644 --- a/paddle/fluid/operators/optimizers/sparse_momentum_op.cc +++ b/paddle/fluid/operators/optimizers/sparse_momentum_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/optimizers/sparse_momentum_op.h" + #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/optimizers/sparse_momentum_op.h b/paddle/fluid/operators/optimizers/sparse_momentum_op.h index 08b2d3764fe..296a3d5b889 100644 --- a/paddle/fluid/operators/optimizers/sparse_momentum_op.h +++ b/paddle/fluid/operators/optimizers/sparse_momentum_op.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/p_norm_op.cc b/paddle/fluid/operators/p_norm_op.cc index c7c8ebf562b..21254521fa9 100644 --- a/paddle/fluid/operators/p_norm_op.cc +++ b/paddle/fluid/operators/p_norm_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" @@ -124,11 +125,10 @@ REGISTER_OPERATOR(p_norm, ops::PnormOp, ops::PnormOpMaker, PNormInferShapeFunctor); REGISTER_OPERATOR(p_norm_grad, ops::PnormOpGrad, PNormGradInferShapeFunctor); -REGISTER_OP_VERSION(p_norm) - .AddCheckpoint( - R"ROC( +REGISTER_OP_VERSION(p_norm).AddCheckpoint( + R"ROC( Upgrade p_norm, add 1 attribute [asvector]. )ROC", - paddle::framework::compatible::OpVersionDesc().NewAttr( - "asvector", - "Compute as vector when axis is None and input is matrix", false)); + paddle::framework::compatible::OpVersionDesc().NewAttr( + "asvector", "Compute as vector when axis is None and input is matrix", + false)); diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc index 38fa3316a6e..6d27433512e 100644 --- a/paddle/fluid/operators/pad2d_op.cc +++ b/paddle/fluid/operators/pad2d_op.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/pad2d_op.cu b/paddle/fluid/operators/pad2d_op.cu index 80931fea90f..b7f9977f3ed 100644 --- a/paddle/fluid/operators/pad2d_op.cu +++ b/paddle/fluid/operators/pad2d_op.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc index e4952a24326..b7a638d7ce9 100644 --- a/paddle/fluid/operators/pad3d_op.cc +++ b/paddle/fluid/operators/pad3d_op.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/infermeta/unary.h" diff --git a/paddle/fluid/operators/pad_constant_like_op.cc b/paddle/fluid/operators/pad_constant_like_op.cc index 087b8ecba6e..61a2120e1e4 100644 --- a/paddle/fluid/operators/pad_constant_like_op.cc +++ b/paddle/fluid/operators/pad_constant_like_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/pad_constant_like_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/pad_constant_like_op.h b/paddle/fluid/operators/pad_constant_like_op.h index 0aedd800e1a..cc7c39d12cd 100644 --- a/paddle/fluid/operators/pad_constant_like_op.h +++ b/paddle/fluid/operators/pad_constant_like_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" diff --git a/paddle/fluid/operators/pad_op.cc b/paddle/fluid/operators/pad_op.cc index dc162ae5782..eaf343dde0f 100644 --- a/paddle/fluid/operators/pad_op.cc +++ b/paddle/fluid/operators/pad_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/complex.h" diff --git a/paddle/fluid/operators/partial_concat_op.cc b/paddle/fluid/operators/partial_concat_op.cc index fedadc7581e..e0e6ec31e41 100644 --- a/paddle/fluid/operators/partial_concat_op.cc +++ b/paddle/fluid/operators/partial_concat_op.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/partial_concat_op.h" + #include #include #include @@ -93,8 +94,9 @@ class PartialConcatOp : public framework::OperatorWithKernel { break; } } - PADDLE_ENFORCE_EQ(flag, 1, platform::errors::InvalidArgument( - "All Inputs of PartialSum OP are Empty!")); + PADDLE_ENFORCE_EQ(flag, 1, + platform::errors::InvalidArgument( + "All Inputs of PartialSum OP are Empty!")); return framework::OpKernelType(input_data_type, ctx.GetPlace()); } diff --git a/paddle/fluid/operators/partial_concat_op.cu b/paddle/fluid/operators/partial_concat_op.cu index 322e84ae8b9..d36a7303715 100644 --- a/paddle/fluid/operators/partial_concat_op.cu +++ b/paddle/fluid/operators/partial_concat_op.cu @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/partial_concat_op.h" diff --git a/paddle/fluid/operators/partial_concat_op.h b/paddle/fluid/operators/partial_concat_op.h index 20a6639e233..b12cb0a0293 100644 --- a/paddle/fluid/operators/partial_concat_op.h +++ b/paddle/fluid/operators/partial_concat_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/concat_and_split.h" #include "paddle/fluid/operators/strided_memcpy.h" diff --git a/paddle/fluid/operators/partial_sum_op.cc b/paddle/fluid/operators/partial_sum_op.cc index 72630998d43..a3ce78054ac 100644 --- a/paddle/fluid/operators/partial_sum_op.cc +++ b/paddle/fluid/operators/partial_sum_op.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/partial_sum_op.h" + #include #include #include @@ -96,8 +97,9 @@ class PartialSumOp : public framework::OperatorWithKernel { } } - PADDLE_ENFORCE_EQ(flag, 1, platform::errors::InvalidArgument( - "All Inputs of PartialSum OP are Empty!")); + PADDLE_ENFORCE_EQ(flag, 1, + platform::errors::InvalidArgument( + "All Inputs of PartialSum OP are Empty!")); return framework::OpKernelType(input_data_type, platform::CPUPlace()); } }; diff --git a/paddle/fluid/operators/partial_sum_op.cu b/paddle/fluid/operators/partial_sum_op.cu index 63d140d6769..b363483fe69 100644 --- a/paddle/fluid/operators/partial_sum_op.cu +++ b/paddle/fluid/operators/partial_sum_op.cu @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/partial_sum_op.h" diff --git a/paddle/fluid/operators/partial_sum_op.h b/paddle/fluid/operators/partial_sum_op.h index d9c6fd758f4..21c16ed2f62 100644 --- a/paddle/fluid/operators/partial_sum_op.h +++ b/paddle/fluid/operators/partial_sum_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/pixel_shuffle_op.cc b/paddle/fluid/operators/pixel_shuffle_op.cc index 1724aedbe9b..026a1749c39 100644 --- a/paddle/fluid/operators/pixel_shuffle_op.cc +++ b/paddle/fluid/operators/pixel_shuffle_op.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/operators/poisson_op.cc b/paddle/fluid/operators/poisson_op.cc index d5896c41059..b964d8fe116 100644 --- a/paddle/fluid/operators/poisson_op.cc +++ b/paddle/fluid/operators/poisson_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/fluid/operators/pool_op.cc b/paddle/fluid/operators/pool_op.cc index 44f3d8090e5..30ead84d1a9 100644 --- a/paddle/fluid/operators/pool_op.cc +++ b/paddle/fluid/operators/pool_op.cc @@ -15,13 +15,13 @@ limitations under the License. */ #include "paddle/fluid/operators/pool_op.h" #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/backward.h" #include "paddle/phi/infermeta/unary.h" - -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" #endif diff --git a/paddle/fluid/operators/pool_op_xpu.cc b/paddle/fluid/operators/pool_op_xpu.cc index f178a966e1e..d2ec4089f9d 100644 --- a/paddle/fluid/operators/pool_op_xpu.cc +++ b/paddle/fluid/operators/pool_op_xpu.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" @@ -112,11 +113,12 @@ class PoolGradXPUKernel : public framework::OpKernel { bool exclusive = context.Attr("exclusive"); bool adaptive = context.Attr("adaptive"); const int* index_data = nullptr; - PADDLE_ENFORCE_EQ(ksize.size(), 2, platform::errors::InvalidArgument( - "The Pool2d XPU OP only support 2 " - "dimension pooling!, but received " - "%d-dimension pool kernel size", - ksize.size())); + PADDLE_ENFORCE_EQ( + ksize.size(), 2, + platform::errors::InvalidArgument("The Pool2d XPU OP only support 2 " + "dimension pooling!, but received " + "%d-dimension pool kernel size", + ksize.size())); PADDLE_ENFORCE_EQ(!adaptive || (ksize[0] * ksize[1] == 1), true, platform::errors::InvalidArgument( "The Pool2d XPU OP does not support (adaptive == " diff --git a/paddle/fluid/operators/pool_with_index_op.cc b/paddle/fluid/operators/pool_with_index_op.cc index e0341f4a4b4..8619cc28d50 100644 --- a/paddle/fluid/operators/pool_with_index_op.cc +++ b/paddle/fluid/operators/pool_with_index_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/fluid/operators/positive_negative_pair_op.cc b/paddle/fluid/operators/positive_negative_pair_op.cc index cbe58644f53..02273b7943a 100644 --- a/paddle/fluid/operators/positive_negative_pair_op.cc +++ b/paddle/fluid/operators/positive_negative_pair_op.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/positive_negative_pair_op.h" + #include "paddle/fluid/platform/enforce.h" namespace paddle { @@ -41,11 +42,12 @@ class PositiveNegativePairOp : public framework::OperatorWithKernel { ctx->HasInput("AccumulatePositivePair") && ctx->HasInput("AccumulateNegativePair") && ctx->HasInput("AccumulateNeutralPair"), - true, platform::errors::InvalidArgument( - "All optional inputs(AccumulatePositivePair, " - "AccumulateNegativePair, AccumulateNeutralPair) of " - "PositiveNegativePairOp are required if one of them " - "is specified.")); + true, + platform::errors::InvalidArgument( + "All optional inputs(AccumulatePositivePair, " + "AccumulateNegativePair, AccumulateNeutralPair) of " + "PositiveNegativePairOp are required if one of them " + "is specified.")); PADDLE_ENFORCE_EQ( ctx->GetInputDim("AccumulatePositivePair"), scalar_dim, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/positive_negative_pair_op.h b/paddle/fluid/operators/positive_negative_pair_op.h index a47deb18b6f..972258350bf 100644 --- a/paddle/fluid/operators/positive_negative_pair_op.h +++ b/paddle/fluid/operators/positive_negative_pair_op.h @@ -12,6 +12,7 @@ limitations under the License. */ #pragma once #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/prelu_op.cc b/paddle/fluid/operators/prelu_op.cc index de35f674058..50dc9d6429a 100644 --- a/paddle/fluid/operators/prelu_op.cc +++ b/paddle/fluid/operators/prelu_op.cc @@ -11,6 +11,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/operators/prim_ops/prim_op_test.cc b/paddle/fluid/operators/prim_ops/prim_op_test.cc index 2d65149d130..e5b84d00f1f 100644 --- a/paddle/fluid/operators/prim_ops/prim_op_test.cc +++ b/paddle/fluid/operators/prim_ops/prim_op_test.cc @@ -13,7 +13,6 @@ // limitations under the License. #include "gtest/gtest.h" - #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/program_desc.h" diff --git a/paddle/fluid/operators/print_op.cc b/paddle/fluid/operators/print_op.cc index 4dd4114d378..16d6185e87e 100644 --- a/paddle/fluid/operators/print_op.cc +++ b/paddle/fluid/operators/print_op.cc @@ -178,10 +178,8 @@ REGISTER_OPERATOR(print, ops::PrintOp, ops::PrintOpProtoAndCheckMaker, ops::PrintOpGradientMaker, ops::PrintOpInferShape, ops::PrintOpVarTypeInference); -REGISTER_OP_VERSION(print) - .AddCheckpoint( - R"ROC(Upgrade print add a new attribute [print_tensor_layout] to " +REGISTER_OP_VERSION(print).AddCheckpoint( + R"ROC(Upgrade print add a new attribute [print_tensor_layout] to " "contorl whether to print tensor's layout.)ROC", - paddle::framework::compatible::OpVersionDesc().NewAttr( - "print_tensor_layout", "Whether to print the tensor's layout.", - true)); + paddle::framework::compatible::OpVersionDesc().NewAttr( + "print_tensor_layout", "Whether to print the tensor's layout.", true)); diff --git a/paddle/fluid/operators/prroi_pool_op.cc b/paddle/fluid/operators/prroi_pool_op.cc index f03a392bfc7..51bd079849a 100644 --- a/paddle/fluid/operators/prroi_pool_op.cc +++ b/paddle/fluid/operators/prroi_pool_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/prroi_pool_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/prroi_pool_op.h b/paddle/fluid/operators/prroi_pool_op.h index 0fdccc729ad..8431d945749 100644 --- a/paddle/fluid/operators/prroi_pool_op.h +++ b/paddle/fluid/operators/prroi_pool_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" #if defined(__NVCC__) || defined(__HIPCC__) diff --git a/paddle/fluid/operators/prune_gate_by_capacity_op.cu b/paddle/fluid/operators/prune_gate_by_capacity_op.cu index 6a2ed6592e7..2e729f94dc8 100644 --- a/paddle/fluid/operators/prune_gate_by_capacity_op.cu +++ b/paddle/fluid/operators/prune_gate_by_capacity_op.cu @@ -79,9 +79,10 @@ class PruneGateByCapacityFunctor { int blocks = NumBlocks(batch_size); int threads = kNumCUDAThreads; - prune_gate_by_capacity_kernel<<>>( - gate_idx_data, new_gate_idx_data_, expert_count_out_data, batch_size); + prune_gate_by_capacity_kernel + <<>>( + gate_idx_data, new_gate_idx_data_, expert_count_out_data, + batch_size); } private: diff --git a/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc b/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc index f101e509d93..d09b1c7aa06 100644 --- a/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc +++ b/paddle/fluid/operators/pscore/distributed_lookup_table_op.cc @@ -9,11 +9,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/pscore/distributed_lookup_table_op.h" + #include #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/pscore/distributed_lookup_table_op.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace paddle { diff --git a/paddle/fluid/operators/pscore/distributed_lookup_table_op.h b/paddle/fluid/operators/pscore/distributed_lookup_table_op.h index c2717c19b2d..c9390aa42a6 100644 --- a/paddle/fluid/operators/pscore/distributed_lookup_table_op.h +++ b/paddle/fluid/operators/pscore/distributed_lookup_table_op.h @@ -13,6 +13,7 @@ #include #include #include + #include "paddle/fluid/distributed/ps/wrapper/fleet.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc b/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc index 9868a625792..701b6250445 100644 --- a/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc +++ b/paddle/fluid/operators/pscore/distributed_push_sparse_op.cc @@ -9,11 +9,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/pscore/distributed_push_sparse_op.h" + #include #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/pscore/distributed_push_sparse_op.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace paddle { diff --git a/paddle/fluid/operators/pscore/distributed_push_sparse_op.h b/paddle/fluid/operators/pscore/distributed_push_sparse_op.h index 6d3faae6a2d..7c361dfd1a7 100644 --- a/paddle/fluid/operators/pscore/distributed_push_sparse_op.h +++ b/paddle/fluid/operators/pscore/distributed_push_sparse_op.h @@ -13,6 +13,7 @@ #include #include #include + #include "paddle/fluid/distributed/ps/wrapper/fleet.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc b/paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc index 457e37744d3..5d77851b72a 100644 --- a/paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc +++ b/paddle/fluid/operators/pscore/heter_listen_and_serv_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/pscore/heter_listen_and_serv_op.h" + #include "paddle/fluid/framework/op_registry.h" PADDLE_DEFINE_EXPORTED_int32(rpc_send_thread_num, 12, @@ -92,8 +93,9 @@ void HeterListenAndServOp::RunAsyncLoop(framework::ProgramDesc *program) const { auto blkid = block_list[i]; auto it = message_to_block_id.find_value(blkid); heter_server_->RegisterServiceHandler( - it->first, [&](const MultiVarMsg *request, MultiVarMsg *response, - brpc::Controller *cntl) -> int { + it->first, + [&](const MultiVarMsg *request, MultiVarMsg *response, + brpc::Controller *cntl) -> int { return send_and_recv_variable_handler_->Handle(request, response, cntl); }); diff --git a/paddle/fluid/operators/pscore/heter_listen_and_serv_op.h b/paddle/fluid/operators/pscore/heter_listen_and_serv_op.h old mode 100755 new mode 100644 index 3ecff083b00..29cc041d682 --- a/paddle/fluid/operators/pscore/heter_listen_and_serv_op.h +++ b/paddle/fluid/operators/pscore/heter_listen_and_serv_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include #include #include diff --git a/paddle/fluid/operators/pscore/heter_listen_and_server_test.cc b/paddle/fluid/operators/pscore/heter_listen_and_server_test.cc index ab2fcba5106..da57660a74d 100644 --- a/paddle/fluid/operators/pscore/heter_listen_and_server_test.cc +++ b/paddle/fluid/operators/pscore/heter_listen_and_server_test.cc @@ -14,11 +14,11 @@ limitations under the License. */ #include #include -#include -#include // NOLINT #include #include +#include +#include // NOLINT #include "gtest/gtest.h" #include "paddle/fluid/distributed/ps/service/heter_client.h" diff --git a/paddle/fluid/operators/pscore/heter_server_test.cc b/paddle/fluid/operators/pscore/heter_server_test.cc index d4ee00d10a5..db647dfaf23 100644 --- a/paddle/fluid/operators/pscore/heter_server_test.cc +++ b/paddle/fluid/operators/pscore/heter_server_test.cc @@ -12,17 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/distributed/ps/service/heter_server.h" + #include -#include -#include -#include // NOLINT +#include #include #include +#include +#include // NOLINT #include "gtest/gtest.h" #include "paddle/fluid/distributed/ps/service/heter_client.h" -#include "paddle/fluid/distributed/ps/service/heter_server.h" #include "paddle/fluid/framework/op_registry.h" namespace framework = paddle::framework; @@ -181,13 +182,15 @@ void StartSendAndRecvServer(std::string endpoint) { heter_server_ptr_->SetEndPoint(endpoint); LOG(INFO) << "before HeterServer::RegisterServiceHandler"; heter_server_ptr_->RegisterServiceHandler( - in_var_name, [&](const MultiVarMsg* request, MultiVarMsg* response, - brpc::Controller* cntl) -> int { + in_var_name, + [&](const MultiVarMsg* request, MultiVarMsg* response, + brpc::Controller* cntl) -> int { return b_req_handler->Handle(request, response, cntl); }); heter_server_ptr_->RegisterServiceHandler( - in_var_name2, [&](const MultiVarMsg* request, MultiVarMsg* response, - brpc::Controller* cntl) -> int { + in_var_name2, + [&](const MultiVarMsg* request, MultiVarMsg* response, + brpc::Controller* cntl) -> int { return b_req_handler->Handle(request, response, cntl); }); diff --git a/paddle/fluid/operators/pscore/send_and_recv_op_cpu_test.cc b/paddle/fluid/operators/pscore/send_and_recv_op_cpu_test.cc old mode 100755 new mode 100644 index 7c25d38d1eb..a21d11ee1b1 --- a/paddle/fluid/operators/pscore/send_and_recv_op_cpu_test.cc +++ b/paddle/fluid/operators/pscore/send_and_recv_op_cpu_test.cc @@ -14,12 +14,13 @@ limitations under the License. */ #if defined PADDLE_WITH_PSCORE #include + #include +#include +#include #include #include // NOLINT -#include -#include #include "gtest/gtest.h" #include "paddle/fluid/distributed/ps/service/heter_client.h" #include "paddle/fluid/distributed/ps/service/heter_server.h" @@ -158,8 +159,9 @@ void StartSendAndRecvServer(std::string endpoint) { b_rpc_service->SetEndPoint(endpoint); LOG(INFO) << "before HeterServer::RegisterServiceHandler"; b_rpc_service->RegisterServiceHandler( - in_var_name, [&](const MultiVarMsg* request, MultiVarMsg* response, - brpc::Controller* cntl) -> int { + in_var_name, + [&](const MultiVarMsg* request, MultiVarMsg* response, + brpc::Controller* cntl) -> int { return b_req_handler->Handle(request, response, cntl); }); diff --git a/paddle/fluid/operators/pscore/send_and_recv_op_gpu_test.cc b/paddle/fluid/operators/pscore/send_and_recv_op_gpu_test.cc index 4054846460b..c8e24c77734 100644 --- a/paddle/fluid/operators/pscore/send_and_recv_op_gpu_test.cc +++ b/paddle/fluid/operators/pscore/send_and_recv_op_gpu_test.cc @@ -15,12 +15,12 @@ limitations under the License. */ #if (defined PADDLE_WITH_CUDA) && (defined PADDLE_WITH_PSCORE) #include -#include -#include -#include // NOLINT +#include #include #include +#include +#include // NOLINT #include "gtest/gtest.h" #include "paddle/fluid/distributed/ps/service/heter_client.h" @@ -178,8 +178,9 @@ void StartSendAndRecvServer(std::string endpoint) { b_rpc_service2->SetEndPoint(endpoint); LOG(INFO) << "before HeterServer::RegisterServiceHandler"; b_rpc_service2->RegisterServiceHandler( - in_var_name, [&](const MultiVarMsg* request, MultiVarMsg* response, - brpc::Controller* cntl) -> int { + in_var_name, + [&](const MultiVarMsg* request, MultiVarMsg* response, + brpc::Controller* cntl) -> int { return b_req_handler->Handle(request, response, cntl); }); diff --git a/paddle/fluid/operators/pull_box_extended_sparse_op.h b/paddle/fluid/operators/pull_box_extended_sparse_op.h index 559c7eed84e..f803b57b187 100644 --- a/paddle/fluid/operators/pull_box_extended_sparse_op.h +++ b/paddle/fluid/operators/pull_box_extended_sparse_op.h @@ -15,6 +15,7 @@ #pragma once #include #include + #include "paddle/fluid/framework/fleet/box_wrapper.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" diff --git a/paddle/fluid/operators/pull_gpups_sparse_op.h b/paddle/fluid/operators/pull_gpups_sparse_op.h index abfdb62ec34..58e11725521 100644 --- a/paddle/fluid/operators/pull_gpups_sparse_op.h +++ b/paddle/fluid/operators/pull_gpups_sparse_op.h @@ -15,6 +15,7 @@ #pragma once #include #include + #include "paddle/fluid/framework/fleet/ps_gpu_wrapper.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" diff --git a/paddle/fluid/operators/pull_sparse_op.cc b/paddle/fluid/operators/pull_sparse_op.cc index fb83746de19..57d361b7a77 100644 --- a/paddle/fluid/operators/pull_sparse_op.cc +++ b/paddle/fluid/operators/pull_sparse_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/pull_sparse_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/pull_sparse_op.h b/paddle/fluid/operators/pull_sparse_op.h index 2498adc141c..e3f0f88ce55 100644 --- a/paddle/fluid/operators/pull_sparse_op.h +++ b/paddle/fluid/operators/pull_sparse_op.h @@ -16,6 +16,7 @@ #include #include #include + #include "paddle/fluid/framework/fleet/fleet_wrapper.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" diff --git a/paddle/fluid/operators/pull_sparse_v2_op.cc b/paddle/fluid/operators/pull_sparse_v2_op.cc index f5f2e728e38..a8fc84b9c2b 100644 --- a/paddle/fluid/operators/pull_sparse_v2_op.cc +++ b/paddle/fluid/operators/pull_sparse_v2_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/pull_sparse_v2_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/pull_sparse_v2_op.h b/paddle/fluid/operators/pull_sparse_v2_op.h index 29337cc2d94..c24d0a4f338 100644 --- a/paddle/fluid/operators/pull_sparse_v2_op.h +++ b/paddle/fluid/operators/pull_sparse_v2_op.h @@ -16,6 +16,7 @@ #include #include #include + #include "paddle/fluid/framework/fleet/fleet_wrapper.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" diff --git a/paddle/fluid/operators/push_dense_op.cc b/paddle/fluid/operators/push_dense_op.cc index 5b9f05bd126..5284a1a61e5 100644 --- a/paddle/fluid/operators/push_dense_op.cc +++ b/paddle/fluid/operators/push_dense_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/push_dense_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/push_dense_op.h b/paddle/fluid/operators/push_dense_op.h index 592ef5ff72a..c8f98a1ea9e 100644 --- a/paddle/fluid/operators/push_dense_op.h +++ b/paddle/fluid/operators/push_dense_op.h @@ -16,6 +16,7 @@ #include #include #include + #include "paddle/fluid/framework/device_worker.h" #include "paddle/fluid/framework/fleet/fleet_wrapper.h" #include "paddle/fluid/framework/no_need_buffer_vars_inference.h" diff --git a/paddle/fluid/operators/py_func_op.cc b/paddle/fluid/operators/py_func_op.cc index f676348bc0a..de46357e497 100644 --- a/paddle/fluid/operators/py_func_op.cc +++ b/paddle/fluid/operators/py_func_op.cc @@ -20,6 +20,7 @@ #include #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/py_layer_op.cc b/paddle/fluid/operators/py_layer_op.cc index 14c9e8b0c26..db8f315366a 100644 --- a/paddle/fluid/operators/py_layer_op.cc +++ b/paddle/fluid/operators/py_layer_op.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include - #include "paddle/fluid/operators/py_layer_op.h" +#include + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/py_layer_op.h b/paddle/fluid/operators/py_layer_op.h index 6625a4a1a75..ea048ee9e59 100644 --- a/paddle/fluid/operators/py_layer_op.h +++ b/paddle/fluid/operators/py_layer_op.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/python_headers.h" diff --git a/paddle/fluid/operators/pyramid_hash_op.cc b/paddle/fluid/operators/pyramid_hash_op.cc index 4b0ade99154..6650037e4d2 100644 --- a/paddle/fluid/operators/pyramid_hash_op.cc +++ b/paddle/fluid/operators/pyramid_hash_op.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include #include + #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/search_compute.h" @@ -216,9 +218,8 @@ class CPUPyramidHashOPKernel : public framework::OpKernel { bool should_use_term(math::bloomfilter* _filter, math::bloomfilter* _black_filter, const float* word_repr, int len) const { - return (!_filter || - 1 == math::bloomfilter_get(_filter, word_repr, - len * sizeof(float))) && + return (!_filter || 1 == math::bloomfilter_get(_filter, word_repr, + len * sizeof(float))) && (!_black_filter || 0 == math::bloomfilter_get(_black_filter, word_repr, len * sizeof(float))); diff --git a/paddle/fluid/operators/qr_op.cc b/paddle/fluid/operators/qr_op.cc index 02d5e5f03f0..55cab539c4d 100644 --- a/paddle/fluid/operators/qr_op.cc +++ b/paddle/fluid/operators/qr_op.cc @@ -13,10 +13,12 @@ // limitations under the License. #include "paddle/fluid/operators/qr_op.h" + #include #include #include #include + #include "paddle/phi/core/ddim.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" diff --git a/paddle/fluid/operators/qr_op.cu b/paddle/fluid/operators/qr_op.cu index a57a8d5cf8b..695b90e9319 100644 --- a/paddle/fluid/operators/qr_op.cu +++ b/paddle/fluid/operators/qr_op.cu @@ -16,8 +16,10 @@ limitations under the License. */ // HIP not support cusolver #include + #include #include + #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/operators/qr_op.h" #include "paddle/fluid/platform/dynload/cusolver.h" @@ -43,8 +45,9 @@ class QrGPUKernel : public framework::OpKernel { std::tie(compute_q, reduced_mode) = _parse_qr_mode(mode); auto numel = x.numel(); - PADDLE_ENFORCE_GT(numel, 0, platform::errors::PreconditionNotMet( - "The input of QR is empty.")); + PADDLE_ENFORCE_GT( + numel, 0, + platform::errors::PreconditionNotMet("The input of QR is empty.")); auto x_dims = x.dims(); int x_rank = x_dims.size(); int m = x_dims[x_rank - 2]; diff --git a/paddle/fluid/operators/qr_op.h b/paddle/fluid/operators/qr_op.h index 5ef02d89427..760b2efd21f 100644 --- a/paddle/fluid/operators/qr_op.h +++ b/paddle/fluid/operators/qr_op.h @@ -16,6 +16,7 @@ #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/svd_helper.h" @@ -89,11 +90,11 @@ class QrGradKernel : public framework::OpKernel { } // m >= n case - auto m_gt_n_case = []( - const framework::ExecutionContext& ctx, - math::DeviceIndependenceTensorOperations& dito, - const Tensor& dQ, const Tensor& dR, const Tensor& A, const Tensor& Q, - const Tensor& R) -> framework::Tensor { + auto m_gt_n_case = + [](const framework::ExecutionContext& ctx, + math::DeviceIndependenceTensorOperations& dito, + const Tensor& dQ, const Tensor& dR, const Tensor& A, const Tensor& Q, + const Tensor& R) -> framework::Tensor { // Hai-Jun Liao, Jin-Guo Liu, Lei Wang, Tao Xiang (2019). Differentiable // Programming Tensor Networks. // https://arxiv.org/abs/1903.09650 Section 3. QR factorization diff --git a/paddle/fluid/operators/quantize_linear_op.cc b/paddle/fluid/operators/quantize_linear_op.cc index 4039f0e9d07..edd2a06a500 100644 --- a/paddle/fluid/operators/quantize_linear_op.cc +++ b/paddle/fluid/operators/quantize_linear_op.cc @@ -10,9 +10,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/quantize_linear_op.h" + #include #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/platform/transform.h" diff --git a/paddle/fluid/operators/quantize_linear_op.cu b/paddle/fluid/operators/quantize_linear_op.cu index 6c7e430f511..6e3e39562c7 100644 --- a/paddle/fluid/operators/quantize_linear_op.cu +++ b/paddle/fluid/operators/quantize_linear_op.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/operators/fake_dequantize_op.cu.h" #include "paddle/fluid/operators/fake_quantize_op.cu.h" @@ -46,10 +47,10 @@ struct ChannelDequantizeFunctorV2 { quant_stride *= in_dims[i]; } - DequantizeOneScaleQuantAxisN< - T><<>>( - in_data, scale_factor, max_range, num, in_dims[quant_axis], - quant_stride, out_data); + DequantizeOneScaleQuantAxisN + <<>>( + in_data, scale_factor, max_range, num, in_dims[quant_axis], + quant_stride, out_data); } }; diff --git a/paddle/fluid/operators/quantize_linear_op.h b/paddle/fluid/operators/quantize_linear_op.h index e20b99e85f0..df1a93ba638 100644 --- a/paddle/fluid/operators/quantize_linear_op.h +++ b/paddle/fluid/operators/quantize_linear_op.h @@ -13,6 +13,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/memory/malloc.h" diff --git a/paddle/fluid/operators/quantize_op.cc b/paddle/fluid/operators/quantize_op.cc index 951951253c4..62ec77bc224 100644 --- a/paddle/fluid/operators/quantize_op.cc +++ b/paddle/fluid/operators/quantize_op.cc @@ -13,6 +13,7 @@ * limitations under the License. */ #include "paddle/fluid/operators/quantize_op.h" + #include "paddle/fluid/framework/op_version_registry.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" @@ -57,13 +58,13 @@ namespace ops = paddle::operators; REGISTER_OPERATOR(quantize, ops::QuantOp, ops::QuantOpMaker); REGISTER_OP_VERSION(quantize) - .AddCheckpoint( - R"ROC( Add a new attribute [bfloat16])ROC", - paddle::framework::compatible::OpVersionDesc().NewAttr( - "bfloat16", "If true, float32 input is converted to bfloat16", - false)) - .AddCheckpoint( - R"ROC( Add a new attribute [Shift])ROC", - paddle::framework::compatible::OpVersionDesc().NewAttr( - "Shift", "Quantize data to uint8 if provided non-zero value.", - 0.0f)); + .AddCheckpoint(R"ROC( Add a new attribute [bfloat16])ROC", + paddle::framework::compatible::OpVersionDesc().NewAttr( + "bfloat16", + "If true, float32 input is converted to bfloat16", + false)) + .AddCheckpoint(R"ROC( Add a new attribute [Shift])ROC", + paddle::framework::compatible::OpVersionDesc().NewAttr( + "Shift", + "Quantize data to uint8 if provided non-zero value.", + 0.0f)); diff --git a/paddle/fluid/operators/quantize_op.h b/paddle/fluid/operators/quantize_op.h index 091306e4637..dd1b3c42fb5 100644 --- a/paddle/fluid/operators/quantize_op.h +++ b/paddle/fluid/operators/quantize_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/queue_generator_op.cc b/paddle/fluid/operators/queue_generator_op.cc index e2174b9346e..3683fbd075d 100644 --- a/paddle/fluid/operators/queue_generator_op.cc +++ b/paddle/fluid/operators/queue_generator_op.cc @@ -43,9 +43,10 @@ class QueueGeneratorOp : public framework::OperatorBase { void RunImpl(const framework::Scope& scope, const platform::Place& dev_place) const override { std::vector names = Attr>("names"); - PADDLE_ENFORCE_GT(names.size(), 0, platform::errors::InvalidArgument( - "The attribute 'names' for " - "Op(queue_generator) must be set.")); + PADDLE_ENFORCE_GT( + names.size(), 0, + platform::errors::InvalidArgument("The attribute 'names' for " + "Op(queue_generator) must be set.")); int capacity = Attr("capacity"); PADDLE_ENFORCE_GT(capacity, 0, diff --git a/paddle/fluid/operators/random_crop_op.h b/paddle/fluid/operators/random_crop_op.h index 2928c3b5027..cfda710bd77 100644 --- a/paddle/fluid/operators/random_crop_op.h +++ b/paddle/fluid/operators/random_crop_op.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/for_range.h" diff --git a/paddle/fluid/operators/random_routing_op.cu b/paddle/fluid/operators/random_routing_op.cu index fec65518a9d..471cfb40e61 100644 --- a/paddle/fluid/operators/random_routing_op.cu +++ b/paddle/fluid/operators/random_routing_op.cu @@ -71,9 +71,9 @@ class RandomRoutingOpCUDAKernel : public framework::OpKernel { auto topk_idx_data = topk_idx->data(); auto out_data = out->data(); - random_routing_kernel< - T><<>>( - out_data, num_idx, N, D, prob_data, topk_idx_data, topk_value_data); + random_routing_kernel + <<>>( + out_data, num_idx, N, D, prob_data, topk_idx_data, topk_value_data); } }; diff --git a/paddle/fluid/operators/randperm_op.cc b/paddle/fluid/operators/randperm_op.cc index 1b28ab3c133..aed1f2b0ed1 100644 --- a/paddle/fluid/operators/randperm_op.cc +++ b/paddle/fluid/operators/randperm_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" @@ -29,10 +30,11 @@ class RandpermOp : public framework::OperatorWithKernel { "The output(Out) of randperm op must not be null.")); int n = ctx->Attrs().Get("n"); PADDLE_ENFORCE_GT( - n, 0, platform::errors::InvalidArgument( - "The input 'n' of randperm op should be greater than 0. " - "But received %d.", - n)); + n, 0, + platform::errors::InvalidArgument( + "The input 'n' of randperm op should be greater than 0. " + "But received %d.", + n)); ctx->SetOutputDim("Out", phi::make_ddim({n})); } diff --git a/paddle/fluid/operators/randperm_op_npu.cc b/paddle/fluid/operators/randperm_op_npu.cc index a16c0d905a5..c9f61211016 100644 --- a/paddle/fluid/operators/randperm_op_npu.cc +++ b/paddle/fluid/operators/randperm_op_npu.cc @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/randperm_op.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/randperm_op.h" template using kernel = diff --git a/paddle/fluid/operators/range_op.cc b/paddle/fluid/operators/range_op.cc index 80fdb2ce6c3..215f8369818 100644 --- a/paddle/fluid/operators/range_op.cc +++ b/paddle/fluid/operators/range_op.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/range_op.h" + #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/fluid/operators/range_op.h b/paddle/fluid/operators/range_op.h index 8924b23ce5c..e2fd16dd629 100644 --- a/paddle/fluid/operators/range_op.h +++ b/paddle/fluid/operators/range_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" @@ -22,13 +23,15 @@ namespace operators { template void GetSize(T start, T end, T step, int64_t* size) { - PADDLE_ENFORCE_NE(step, 0, platform::errors::InvalidArgument( - "The step of range op should not be 0.")); + PADDLE_ENFORCE_NE(step, 0, + platform::errors::InvalidArgument( + "The step of range op should not be 0.")); if (start < end) { PADDLE_ENFORCE_GT( - step, 0, platform::errors::InvalidArgument( - "The step should be greater than 0 while start < end.")); + step, 0, + platform::errors::InvalidArgument( + "The step should be greater than 0 while start < end.")); } if (start > end) { diff --git a/paddle/fluid/operators/range_op_xpu.cc b/paddle/fluid/operators/range_op_xpu.cc index 6672968de3a..bfc0d27f7ca 100644 --- a/paddle/fluid/operators/range_op_xpu.cc +++ b/paddle/fluid/operators/range_op_xpu.cc @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/range_op.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/range_op.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/rank_attention_op.cc b/paddle/fluid/operators/rank_attention_op.cc index e5332da6475..89bdeb57b5f 100644 --- a/paddle/fluid/operators/rank_attention_op.cc +++ b/paddle/fluid/operators/rank_attention_op.cc @@ -10,9 +10,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/rank_attention_op.h" + #include #include #include + #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/rank_attention_op.cu b/paddle/fluid/operators/rank_attention_op.cu index 9b3a1e56371..61d723c27f7 100644 --- a/paddle/fluid/operators/rank_attention_op.cu +++ b/paddle/fluid/operators/rank_attention_op.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/operators/rank_attention.cu.h" #include "paddle/fluid/operators/rank_attention_op.h" diff --git a/paddle/fluid/operators/reader/blocking_queue.h b/paddle/fluid/operators/reader/blocking_queue.h index f126070a7eb..38c45ca2803 100644 --- a/paddle/fluid/operators/reader/blocking_queue.h +++ b/paddle/fluid/operators/reader/blocking_queue.h @@ -161,9 +161,10 @@ class BlockingQueue { private: inline void EnforceNotKilled() { - PADDLE_ENFORCE_NE(killed_, true, platform::errors::Fatal( - "Blocking queue is killed because the " - "data reader raises an exception.")); + PADDLE_ENFORCE_NE( + killed_, true, + platform::errors::Fatal("Blocking queue is killed because the " + "data reader raises an exception.")); } private: diff --git a/paddle/fluid/operators/reader/buffered_reader.cc b/paddle/fluid/operators/reader/buffered_reader.cc index db0f5758d2f..193f6c29724 100644 --- a/paddle/fluid/operators/reader/buffered_reader.cc +++ b/paddle/fluid/operators/reader/buffered_reader.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/reader/buffered_reader.h" + #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/platform/profiler.h" diff --git a/paddle/fluid/operators/reader/create_ctr_reader_op.cc b/paddle/fluid/operators/reader/create_ctr_reader_op.cc index 86fbddc0ec2..b83d0852841 100644 --- a/paddle/fluid/operators/reader/create_ctr_reader_op.cc +++ b/paddle/fluid/operators/reader/create_ctr_reader_op.cc @@ -13,7 +13,6 @@ // limitations under the License. #include "paddle/fluid/operators/reader/ctr_reader.h" - #include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h" #include "paddle/fluid/operators/reader/reader_op_registry.h" diff --git a/paddle/fluid/operators/recurrent_op.cc b/paddle/fluid/operators/recurrent_op.cc index 8557ef950b3..8b2809b286c 100644 --- a/paddle/fluid/operators/recurrent_op.cc +++ b/paddle/fluid/operators/recurrent_op.cc @@ -322,9 +322,10 @@ void RecurrentGradOp::RunImpl(const framework::Scope &scope, framework::Executor executor(place); auto *block = Attr(kStepBlock); auto *program = block->Program(); - auto ctx = executor.Prepare( - *program, block->ID(), Attr>( - kSkipEagerDeletionVars) /*skip_ref_cnt_vars*/); + auto ctx = + executor.Prepare(*program, block->ID(), + Attr>( + kSkipEagerDeletionVars) /*skip_ref_cnt_vars*/); for (size_t step_id = 0; step_id < seq_len; ++step_id) { size_t seq_offset = reverse ? step_id : seq_len - step_id - 1; @@ -387,19 +388,19 @@ void RecurrentGradOp::RunImpl(const framework::Scope &scope, // outside::output[seq_offset: seq_offset + 1] = inside::output executor.CreateVariables(ctx->prog_, &cur_scope, ctx->block_id_); if (step_id > 0) { - LinkTensorWithCallback(scope, Outputs(kInputGrads), cur_scope, - GradVarLists(Inputs(kInputs)), - [&](const framework::LoDTensor &src_tensor, - framework::LoDTensor *dst_tensor) { - if (src_tensor.memory_size() == - 0) { // Inside Gradient is not created. - return; - } - framework::Tensor src_slice = - src_tensor.Slice(seq_offset, seq_offset + 1); - dst_tensor->ShareDataWith(src_slice); - }, - true /*is_backward*/); + LinkTensorWithCallback( + scope, Outputs(kInputGrads), cur_scope, GradVarLists(Inputs(kInputs)), + [&](const framework::LoDTensor &src_tensor, + framework::LoDTensor *dst_tensor) { + if (src_tensor.memory_size() == + 0) { // Inside Gradient is not created. + return; + } + framework::Tensor src_slice = + src_tensor.Slice(seq_offset, seq_offset + 1); + dst_tensor->ShareDataWith(src_slice); + }, + true /*is_backward*/); } VLOG(5) << "Recurrent memory linking finished "; @@ -604,7 +605,8 @@ if reverse is True | | | | v v v v o o o o -)DOC").SetDefault(false); +)DOC") + .SetDefault(false); AddAttr(RecurrentBase::kIsTrain, "").SetDefault(true); AddAttr>(RecurrentBase::kSkipEagerDeletionVars, "Vars that would skip eager deletion." @@ -663,14 +665,16 @@ class RecurrentGradOpShapeInference : public framework::InferShapeBase { ctx->Attrs() .Get>(RecurrentBase::kExStates) .size(), - 0, platform::errors::InvalidArgument("The Attr(%s) should be empty.", - RecurrentBase::kExStates)); + 0, + platform::errors::InvalidArgument("The Attr(%s) should be empty.", + RecurrentBase::kExStates)); PADDLE_ENFORCE_EQ( ctx->Attrs() .Get>(RecurrentBase::kStates) .size(), - 0, platform::errors::InvalidArgument("The Attr(%s) should be empty.", - RecurrentBase::kStates)); + 0, + platform::errors::InvalidArgument("The Attr(%s) should be empty.", + RecurrentBase::kStates)); } PADDLE_ENFORCE_EQ( @@ -702,9 +706,10 @@ class RecurrentGradOpShapeInference : public framework::InferShapeBase { if (ctx->HasInputs(RecurrentBase::kParameters)) { PADDLE_ENFORCE_EQ( ctx->HasOutputs(framework::GradVarName(RecurrentBase::kParameters)), - true, platform::errors::InvalidArgument( - "The output of(%s) should not be empty.", - framework::GradVarName(RecurrentBase::kParameters))); + true, + platform::errors::InvalidArgument( + "The output of(%s) should not be empty.", + framework::GradVarName(RecurrentBase::kParameters))); ctx->SetOutputsDim(framework::GradVarName(RecurrentBase::kParameters), ctx->GetInputsDim(RecurrentBase::kParameters)); } diff --git a/paddle/fluid/operators/reduce_ops/frobenius_norm_op.cc b/paddle/fluid/operators/reduce_ops/frobenius_norm_op.cc index 83a21a919dc..063f7ca041a 100644 --- a/paddle/fluid/operators/reduce_ops/frobenius_norm_op.cc +++ b/paddle/fluid/operators/reduce_ops/frobenius_norm_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/reduce_ops/reduce_op.h" diff --git a/paddle/fluid/operators/reduce_ops/logsumexp_op.cc b/paddle/fluid/operators/reduce_ops/logsumexp_op.cc index 0602c73db6b..4128d51559c 100644 --- a/paddle/fluid/operators/reduce_ops/logsumexp_op.cc +++ b/paddle/fluid/operators/reduce_ops/logsumexp_op.cc @@ -15,6 +15,7 @@ #include #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/operators/reduce_ops/reduce_op_function.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/fluid/operators/reduce_ops/reduce_amax_op.cc b/paddle/fluid/operators/reduce_ops/reduce_amax_op.cc index c5bc66e23ce..29587faa480 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_amax_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_amax_op.cc @@ -16,16 +16,18 @@ REGISTER_REDUCE_OP(reduce_amax); REGISTER_OP_CPU_KERNEL( - reduce_amax, ops::ReduceKernel, + reduce_amax, + ops::ReduceKernel, ops::ReduceKernel, ops::ReduceKernel, ops::ReduceKernel); REGISTER_OP_CPU_KERNEL( - reduce_amax_grad, ops::ReduceGradKernel, + reduce_amax_grad, + ops::ReduceGradKernel, ops::ReduceGradKernel, ops::ReduceGradKernel, + reduce_amax_grad, + ops::ReduceGradKernel, ops::ReduceGradKernel, ops::ReduceGradKernel, + reduce_amin, + ops::ReduceKernel, ops::ReduceKernel, ops::ReduceKernel, ops::ReduceKernel); REGISTER_OP_CPU_KERNEL( - reduce_amin_grad, ops::ReduceGradKernel, + reduce_amin_grad, + ops::ReduceGradKernel, ops::ReduceGradKernel, ops::ReduceGradKernel, + reduce_amin_grad, + ops::ReduceGradKernel, ops::ReduceGradKernel, ops::ReduceGradKernel #include + #include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h" #include "paddle/fluid/platform/device/xpu/xpu_header.h" diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc b/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc index dc41979defb..8ce115ce669 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/reduce_ops/reduce_mean_op.h" + #include #include #include diff --git a/paddle/fluid/operators/reduce_ops/reduce_mean_op_npu.cc b/paddle/fluid/operators/reduce_ops/reduce_mean_op_npu.cc index 715dcb25c20..111537f6455 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_mean_op_npu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_mean_op_npu.cc @@ -11,8 +11,8 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/reduce_ops/reduce_mean_op.h" #include "paddle/fluid/operators/elementwise/elementwise_npu.h" +#include "paddle/fluid/operators/reduce_ops/reduce_mean_op.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" namespace paddle { diff --git a/paddle/fluid/operators/reduce_ops/reduce_min_op.cc b/paddle/fluid/operators/reduce_ops/reduce_min_op.cc index 5e5b04d57b0..f6d8aa13182 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_min_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_min_op.cc @@ -12,9 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/operators/reduce_ops/reduce_min_max_op.h" - #include "paddle/fluid/framework/infershape_utils.h" +#include "paddle/fluid/operators/reduce_ops/reduce_min_max_op.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/unary.h" diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.cu.h b/paddle/fluid/operators/reduce_ops/reduce_op.cu.h index b21e41c5b85..a2048004615 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.cu.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.cu.h @@ -21,7 +21,6 @@ #include #include "paddle/fluid/framework/tensor.h" - #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/kernels/funcs/reduce_function.h" namespace paddle { diff --git a/paddle/fluid/operators/reduce_ops/reduce_op.h b/paddle/fluid/operators/reduce_ops/reduce_op.h index 76641698ead..322ef1fdff6 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/operators/cast_op.h" @@ -484,8 +485,9 @@ class ReduceOp : public framework::OperatorWithKernel { platform::is_gpu_place(ctx.GetPlace()) || platform::is_npu_place(ctx.GetPlace()) || platform::is_mlu_place(ctx.GetPlace()), - true, platform::errors::InvalidArgument( - "float16 can only be used on GPU or NPU or MLU place")); + true, + platform::errors::InvalidArgument( + "float16 can only be used on GPU or NPU or MLU place")); } return framework::OpKernelType(input_data_type, ctx.GetPlace()); } diff --git a/paddle/fluid/operators/reduce_ops/reduce_op_function.h b/paddle/fluid/operators/reduce_ops/reduce_op_function.h index c144e65cbf6..a9d5863558c 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op_function.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op_function.h @@ -14,6 +14,7 @@ #pragma once #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/reduce_ops/reduce_op_mlu.h b/paddle/fluid/operators/reduce_ops/reduce_op_mlu.h index 95dda354cae..96e496217d0 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op_mlu.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op_mlu.h @@ -17,6 +17,7 @@ #ifdef PADDLE_WITH_MLU #include #include + #include "paddle/fluid/operators/mlu/mlu_baseop.h" #include "paddle/fluid/operators/reduce_ops/reduce_op.h" diff --git a/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h b/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h index 324fd369e82..f9ae575e801 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h @@ -20,6 +20,7 @@ #include #include #include + #include "paddle/fluid/operators/reduce_ops/reduce_op.h" #include "paddle/fluid/platform/device/xpu/xpu_header.h" diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc index 7a5c86c35c6..f50cfd0417a 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc @@ -15,6 +15,7 @@ #ifdef PADDLE_WITH_XPU #include #include + #include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h" #include "paddle/fluid/platform/device/xpu/xpu_header.h" diff --git a/paddle/fluid/operators/renorm_op.cu b/paddle/fluid/operators/renorm_op.cu index e40bd147b99..028f5a7f515 100644 --- a/paddle/fluid/operators/renorm_op.cu +++ b/paddle/fluid/operators/renorm_op.cu @@ -12,14 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/operators/renorm_op.h" - #include #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h" #include "paddle/fluid/operators/reduce_ops/reduce_op.cu.h" +#include "paddle/fluid/operators/renorm_op.h" #include "paddle/fluid/operators/utils.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" @@ -107,10 +106,10 @@ __global__ void RenormGradKernelFunc2(const T* x_data, const T* dout_data, __syncthreads(); if (i < size) { dx_data[i] = dim_value[dim_index] * dout_data[i]; - dx_data[i] = dx_data[i] + - weight_derivative[dim_index] * dim_power_sum[dim_index] * - pow(abs(x_data[i]), T(p - 1.0)) * - (x_data[i] >= 0 ? 1 : -1); + dx_data[i] = dx_data[i] + weight_derivative[dim_index] * + dim_power_sum[dim_index] * + pow(abs(x_data[i]), T(p - 1.0)) * + (x_data[i] >= 0 ? 1 : -1); } } diff --git a/paddle/fluid/operators/repeat_interleave_op.cc b/paddle/fluid/operators/repeat_interleave_op.cc index d6f9df5d79e..daa45bf78f2 100644 --- a/paddle/fluid/operators/repeat_interleave_op.cc +++ b/paddle/fluid/operators/repeat_interleave_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/repeat_interleave_op.h" + #include namespace paddle { @@ -51,11 +52,12 @@ class RepeatInterleaveOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( repeats_dim.size() == 1 || (repeats_dim.size() == 2 && repeats_dim[1] == 1), - true, platform::errors::InvalidArgument( - "The 'shape' of Input(RepeatsTensor) must be 1-D tensor. " - "But received: the 'shape' of Input(Index) is [%s], " - "the dimension of Input(Index) is [%d].", - repeats_dim, repeats_dim.size())); + true, + platform::errors::InvalidArgument( + "The 'shape' of Input(RepeatsTensor) must be 1-D tensor. " + "But received: the 'shape' of Input(Index) is [%s], " + "the dimension of Input(Index) is [%d].", + repeats_dim, repeats_dim.size())); PADDLE_ENFORCE_EQ(repeats_dim[0] != 0, true, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/repeat_interleave_op.cu b/paddle/fluid/operators/repeat_interleave_op.cu index 5f48a4a94ac..2b8464d5bf6 100644 --- a/paddle/fluid/operators/repeat_interleave_op.cu +++ b/paddle/fluid/operators/repeat_interleave_op.cu @@ -127,10 +127,10 @@ class RepeatInterleaveCUDAKernel : public framework::OpKernel { int64_t size = output_dim[dim]; int64_t delta = input_dim[dim] - size; - index_select_cuda_kernel<<< - (numel + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS, - PADDLE_CUDA_NUM_THREADS, 0, stream>>>(in_data, out_data, index_data, - numel, stride, size, delta); + index_select_cuda_kernel + <<<(numel + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS, + PADDLE_CUDA_NUM_THREADS, 0, stream>>>( + in_data, out_data, index_data, numel, stride, size, delta); } else { RepeatsTensor2IndexTensor(*repeats_tensor, &index); @@ -143,10 +143,10 @@ class RepeatInterleaveCUDAKernel : public framework::OpKernel { int64_t size = output_dim[dim]; int64_t delta = input_dim[dim] - size; - index_select_cuda_kernel<<< - (numel + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS, - PADDLE_CUDA_NUM_THREADS, 0, stream>>>(in_data, out_data, index_data, - numel, stride, size, delta); + index_select_cuda_kernel + <<<(numel + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS, + PADDLE_CUDA_NUM_THREADS, 0, stream>>>( + in_data, out_data, index_data, numel, stride, size, delta); } } else if (repeats > 0) { int64_t index_size = in->dims()[dim] * repeats; @@ -169,10 +169,10 @@ class RepeatInterleaveCUDAKernel : public framework::OpKernel { int64_t delta = input_dim[dim] - size; const int* index_data = index.data(); - index_select_cuda_kernel<<<(numel + PADDLE_CUDA_NUM_THREADS - 1) / - PADDLE_CUDA_NUM_THREADS, - PADDLE_CUDA_NUM_THREADS, 0, stream>>>( - in_data, out_data, index_data, numel, stride, size, delta); + index_select_cuda_kernel + <<<(numel + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS, + PADDLE_CUDA_NUM_THREADS, 0, stream>>>( + in_data, out_data, index_data, numel, stride, size, delta); platform::GpuStreamSync(stream); } else { PADDLE_THROW(platform::errors::InvalidArgument( @@ -206,9 +206,9 @@ class RepeatInterleaveGradCUDAKernel : public framework::OpKernel { auto stream = context.template device_context().stream(); - index_select_grad_init< - T><<<(numel + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS, - PADDLE_CUDA_NUM_THREADS, 0, stream>>>(in_grad_data, numel); + index_select_grad_init + <<<(numel + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS, + PADDLE_CUDA_NUM_THREADS, 0, stream>>>(in_grad_data, numel); int repeats = context.Attr("Repeats"); framework::LoDTensor index; @@ -237,22 +237,24 @@ class RepeatInterleaveGradCUDAKernel : public framework::OpKernel { int64_t index_nums = index.numel(); const int64_t* index_data = index.data(); - index_select_grad_cuda_kernel<<< - (out_nums + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS, - PADDLE_CUDA_NUM_THREADS, 0, stream>>>( - output_grad_data, in_grad_data, index_data, index_nums, out_nums, - stride, size, delta); + index_select_grad_cuda_kernel + <<<(out_nums + PADDLE_CUDA_NUM_THREADS - 1) / + PADDLE_CUDA_NUM_THREADS, + PADDLE_CUDA_NUM_THREADS, 0, stream>>>( + output_grad_data, in_grad_data, index_data, index_nums, + out_nums, stride, size, delta); platform::GpuStreamSync(stream); } else { RepeatsTensor2IndexTensor(*repeats_tensor, &index); int64_t index_nums = index.numel(); const int* index_data = index.data(); - index_select_grad_cuda_kernel<<< - (out_nums + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS, - PADDLE_CUDA_NUM_THREADS, 0, stream>>>( - output_grad_data, in_grad_data, index_data, index_nums, out_nums, - stride, size, delta); + index_select_grad_cuda_kernel + <<<(out_nums + PADDLE_CUDA_NUM_THREADS - 1) / + PADDLE_CUDA_NUM_THREADS, + PADDLE_CUDA_NUM_THREADS, 0, stream>>>( + output_grad_data, in_grad_data, index_data, index_nums, + out_nums, stride, size, delta); platform::GpuStreamSync(stream); } } else if (repeats > 0) { @@ -268,11 +270,11 @@ class RepeatInterleaveGradCUDAKernel : public framework::OpKernel { const int* index_data = index.data(); int64_t index_nums = index.numel(); - index_select_grad_cuda_kernel<<< - (out_nums + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS, - PADDLE_CUDA_NUM_THREADS, 0, stream>>>(output_grad_data, in_grad_data, - index_data, index_nums, - out_nums, stride, size, delta); + index_select_grad_cuda_kernel + <<<(out_nums + PADDLE_CUDA_NUM_THREADS - 1) / PADDLE_CUDA_NUM_THREADS, + PADDLE_CUDA_NUM_THREADS, 0, stream>>>( + output_grad_data, in_grad_data, index_data, index_nums, out_nums, + stride, size, delta); platform::GpuStreamSync(stream); } else { PADDLE_THROW(platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/repeat_interleave_op.h b/paddle/fluid/operators/repeat_interleave_op.h index 68b66bd534c..f8e39fdc907 100644 --- a/paddle/fluid/operators/repeat_interleave_op.h +++ b/paddle/fluid/operators/repeat_interleave_op.h @@ -14,11 +14,11 @@ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/index_select_op.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/math_function.h" - -#include "paddle/fluid/operators/index_select_op.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/requantize_op.cc b/paddle/fluid/operators/requantize_op.cc index 2d87ae91fbe..d9345c1145b 100644 --- a/paddle/fluid/operators/requantize_op.cc +++ b/paddle/fluid/operators/requantize_op.cc @@ -13,6 +13,7 @@ * limitations under the License. */ #include "paddle/fluid/operators/requantize_op.h" + #include "paddle/fluid/framework/op_version_registry.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" diff --git a/paddle/fluid/operators/requantize_op.h b/paddle/fluid/operators/requantize_op.h index c2b154db11d..8166aa98f07 100644 --- a/paddle/fluid/operators/requantize_op.h +++ b/paddle/fluid/operators/requantize_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/rnn_op.cc b/paddle/fluid/operators/rnn_op.cc index caf90219935..d3c6ee7c1e1 100644 --- a/paddle/fluid/operators/rnn_op.cc +++ b/paddle/fluid/operators/rnn_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/operators/roi_align_op.cc b/paddle/fluid/operators/roi_align_op.cc index bf78b6a6965..db84387e6cf 100644 --- a/paddle/fluid/operators/roi_align_op.cc +++ b/paddle/fluid/operators/roi_align_op.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/operators/roi_align_op_xpu.cc b/paddle/fluid/operators/roi_align_op_xpu.cc index 7be1c190120..18938d71832 100644 --- a/paddle/fluid/operators/roi_align_op_xpu.cc +++ b/paddle/fluid/operators/roi_align_op_xpu.cc @@ -15,6 +15,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/roi_pool_op.cc b/paddle/fluid/operators/roi_pool_op.cc index 12e33d56c00..e47145535a3 100644 --- a/paddle/fluid/operators/roi_pool_op.cc +++ b/paddle/fluid/operators/roi_pool_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_version_registry.h" diff --git a/paddle/fluid/operators/row_conv_op.cc b/paddle/fluid/operators/row_conv_op.cc index 07a6117d711..9c66566fdfd 100644 --- a/paddle/fluid/operators/row_conv_op.cc +++ b/paddle/fluid/operators/row_conv_op.cc @@ -12,9 +12,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/row_conv_op.h" + #include #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/row_conv_op.cu b/paddle/fluid/operators/row_conv_op.cu index c5794948aae..b1cabb018b9 100644 --- a/paddle/fluid/operators/row_conv_op.cu +++ b/paddle/fluid/operators/row_conv_op.cu @@ -344,9 +344,9 @@ class RowConvKernel dim3 block_dim = dim3(32, 32); dim3 grid_dim = dim3(DivUp(input_dim, block_dim.x), 1); int mem_per_block = (future_context * block_dim.x) * sizeof(T); - RowConvForwardSharedMemory< - T><<>>( - in, weight, num_sequence, input_dim, future_context, idx, out); + RowConvForwardSharedMemory + <<>>( + in, weight, num_sequence, input_dim, future_context, idx, out); } else { dim3 block_dim = dim3(32, 32); dim3 grid_dim = dim3(DivUp(input_dim, block_dim.x), 1); @@ -413,10 +413,10 @@ class RowConvGradKernel (block_y * block_x + block_y * (block_x + future_context - 1) + future_context * block_y) * sizeof(T); - RowConvGradFilterImproved< - T><<>>( - in, dout, num_sequence, input_dim, future_context, block_x, block_y, - idx, dfilter); + RowConvGradFilterImproved + <<>>( + in, dout, num_sequence, input_dim, future_context, block_x, + block_y, idx, dfilter); } else { dim3 block_dim = dim3(32, 32); dim3 grid_dim = dim3(DivUp(input_dim, block_dim.x), 1); @@ -424,10 +424,10 @@ class RowConvGradKernel int block_y = block_dim.y; int mem_per_block = (block_x * block_y * 2) * sizeof(T); // For 2 arrays of size 32x32 - RowConvGradFilter< - T><<>>( - in, dout, num_sequence, input_dim, future_context, block_x, block_y, - idx, dfilter); + RowConvGradFilter + <<>>( + in, dout, num_sequence, input_dim, future_context, block_x, + block_y, idx, dfilter); } } @@ -437,9 +437,10 @@ class RowConvGradKernel dim3 block_dim = dim3(32, 32); dim3 grid_dim = dim3(DivUp(input_dim, block_dim.x), 1); int mem_per_block = (future_context * block_dim.x) * sizeof(T); - RowConvGradInputSharedMemory< - T><<>>( - dout, weights, num_sequence, input_dim, future_context, idx, din); + RowConvGradInputSharedMemory + <<>>( + dout, weights, num_sequence, input_dim, future_context, idx, + din); } else { dim3 block_dim = dim3(32, 32); dim3 grid_dim = dim3(DivUp(input_dim, block_dim.x), 1); diff --git a/paddle/fluid/operators/rrelu_op.cc b/paddle/fluid/operators/rrelu_op.cc index c543a088e9d..558c77b5b92 100644 --- a/paddle/fluid/operators/rrelu_op.cc +++ b/paddle/fluid/operators/rrelu_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/infermeta/unary.h" diff --git a/paddle/fluid/operators/run_program_op.h b/paddle/fluid/operators/run_program_op.h index 8007f0bc37b..bfd33efe833 100644 --- a/paddle/fluid/operators/run_program_op.h +++ b/paddle/fluid/operators/run_program_op.h @@ -99,11 +99,12 @@ static void CheckOutputVarStatus(const Variable &src_var, var_name, platform::demangle(framework::ToTypeName(src_var.Type())))); PADDLE_ENFORCE_EQ(src_var.Get().value().IsInitialized(), - true, platform::errors::InvalidArgument( - "The tensor in output variable %s get from " - "RunProgram(Grad)Op's " - "internal scope is not initialized.", - var_name)); + true, + platform::errors::InvalidArgument( + "The tensor in output variable %s get from " + "RunProgram(Grad)Op's " + "internal scope is not initialized.", + var_name)); } else { PADDLE_THROW(platform::errors::InvalidArgument( @@ -224,7 +225,7 @@ class RunProgramOpKernel : public framework::OpKernel { framework::PEAndGraphPair pe_and_graph; auto callable = [this, is_test, &pe_and_graph]( - const framework::ExecutionContext &exe_ctx) { + const framework::ExecutionContext &exe_ctx) { pe_and_graph = ComputeImpl(exe_ctx, is_test, true); }; inner_graphs[graph_idx] = CaptureCUDAGraph( diff --git a/paddle/fluid/operators/sample_logits_op.cc b/paddle/fluid/operators/sample_logits_op.cc index e02c7ade9a1..a80d527fd5c 100644 --- a/paddle/fluid/operators/sample_logits_op.cc +++ b/paddle/fluid/operators/sample_logits_op.cc @@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/sample_logits_op.h" + #include + #include "paddle/fluid/operators/math/sample_prob.h" namespace paddle { diff --git a/paddle/fluid/operators/sample_logits_op.cu b/paddle/fluid/operators/sample_logits_op.cu index 273010e5443..7eff9429244 100644 --- a/paddle/fluid/operators/sample_logits_op.cu +++ b/paddle/fluid/operators/sample_logits_op.cu @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" @@ -146,9 +147,9 @@ class SampleLogitsCUDAKernel : public framework::OpKernel { int threads = 512; size_t size = batch_size * num_true; int grid = (size + threads - 1) / threads; - GPUSetLabel< - T><<>>( - size, num_true, sampled_labels_data); + GPUSetLabel + <<>>( + size, num_true, sampled_labels_data); if (use_customized_samples) { const Tensor* customized_samples = @@ -190,17 +191,17 @@ class SampleLogitsCUDAKernel : public framework::OpKernel { size = batch_size * num_take; grid = (size + threads - 1) / threads; - GPUTakeAlongD1< - T><<>>( - size, batch_size, array_slice_size, idx_slice_size, p_array, p_index, - p_value); + GPUTakeAlongD1 + <<>>( + size, batch_size, array_slice_size, idx_slice_size, p_array, + p_index, p_value); if (remove_accidental_hits) { const size_t size = batch_size * (num_true + num_samples); int grid = (size + threads - 1) / threads; - gpu_compute_remove_accidental_hits< - T><<>>( - size, num_true, idx_slice_size, p_index, p_value); + gpu_compute_remove_accidental_hits + <<>>( + size, num_true, idx_slice_size, p_index, p_value); } // subtracted sampled logits with logQ(y|x) @@ -246,10 +247,10 @@ class SampleLogitsGradCUDAKernel : public framework::OpKernel { const size_t size = batch_size; int grid = (size + threads - 1) / threads; - GPUPutAlongD1< - T><<>>( - size, batch_size, array_slice_size, idx_slice_size, p_array, p_index, - p_value); + GPUPutAlongD1 + <<>>( + size, batch_size, array_slice_size, idx_slice_size, p_array, + p_index, p_value); } }; diff --git a/paddle/fluid/operators/sample_logits_op.h b/paddle/fluid/operators/sample_logits_op.h index ae741ae3212..815a2897d5d 100644 --- a/paddle/fluid/operators/sample_logits_op.h +++ b/paddle/fluid/operators/sample_logits_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor_util.h" diff --git a/paddle/fluid/operators/save_combine_op.cc b/paddle/fluid/operators/save_combine_op.cc index 7fe6623dcca..23aa88459ce 100644 --- a/paddle/fluid/operators/save_combine_op.cc +++ b/paddle/fluid/operators/save_combine_op.cc @@ -12,10 +12,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include - #include "paddle/fluid/operators/save_combine_op.h" +#include + namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/save_combine_op.h b/paddle/fluid/operators/save_combine_op.h index 8b8e27b79b9..a419e862501 100644 --- a/paddle/fluid/operators/save_combine_op.h +++ b/paddle/fluid/operators/save_combine_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include #include #include diff --git a/paddle/fluid/operators/save_load_combine_op_test.cc b/paddle/fluid/operators/save_load_combine_op_test.cc index 493f5081ee4..797321efd6c 100644 --- a/paddle/fluid/operators/save_load_combine_op_test.cc +++ b/paddle/fluid/operators/save_load_combine_op_test.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include + #include "gtest/gtest.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/bfloat16.h" diff --git a/paddle/fluid/operators/save_op.cc b/paddle/fluid/operators/save_op.cc index d819c172e4a..02774c6b72a 100644 --- a/paddle/fluid/operators/save_op.cc +++ b/paddle/fluid/operators/save_op.cc @@ -12,14 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/save_op.h" + #include + #include #include #include #include -#include "paddle/fluid/operators/save_op.h" - namespace paddle { namespace operators { class SaveOp : public framework::OperatorWithKernel { diff --git a/paddle/fluid/operators/save_op.h b/paddle/fluid/operators/save_op.h index e4ca1423afa..64aca1ab6b7 100644 --- a/paddle/fluid/operators/save_op.h +++ b/paddle/fluid/operators/save_op.h @@ -12,6 +12,7 @@ limitations under the License. */ #pragma once #include + #include #include #include diff --git a/paddle/fluid/operators/scale_op.cc b/paddle/fluid/operators/scale_op.cc index cbf2b915207..ebc4c644148 100644 --- a/paddle/fluid/operators/scale_op.cc +++ b/paddle/fluid/operators/scale_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/float16.h" diff --git a/paddle/fluid/operators/scale_op_xpu.cc b/paddle/fluid/operators/scale_op_xpu.cc index 40f5699a29b..fdc98d084ed 100644 --- a/paddle/fluid/operators/scale_op_xpu.cc +++ b/paddle/fluid/operators/scale_op_xpu.cc @@ -15,6 +15,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/scale_kernel.h" diff --git a/paddle/fluid/operators/scatter_nd_add_op.cc b/paddle/fluid/operators/scatter_nd_add_op.cc index 0ae0e1500c1..0cfc3a77aad 100644 --- a/paddle/fluid/operators/scatter_nd_add_op.cc +++ b/paddle/fluid/operators/scatter_nd_add_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/ddim.h" diff --git a/paddle/fluid/operators/scatter_op.cc b/paddle/fluid/operators/scatter_op.cc index 5f6b04cf59e..a2e8071e013 100644 --- a/paddle/fluid/operators/scatter_op.cc +++ b/paddle/fluid/operators/scatter_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/ddim.h" diff --git a/paddle/fluid/operators/scatter_op_xpu.cc b/paddle/fluid/operators/scatter_op_xpu.cc index 07dd2f2d85f..3ab084b660a 100644 --- a/paddle/fluid/operators/scatter_op_xpu.cc +++ b/paddle/fluid/operators/scatter_op_xpu.cc @@ -56,11 +56,12 @@ class ScatterOpXPUKernel : public framework::OpKernel { PADDLE_ENFORCE_EQ( index->dims().size() == 1 || (index->dims().size() == 2 && index->dims()[1] == 1), - true, platform::errors::InvalidArgument( - "index's shape is error, " - "expect index'dims shape is 1 or 2 and index.dims[1] is 1" - "but got index'dims shape is %d", - index->dims().size())); + true, + platform::errors::InvalidArgument( + "index's shape is error, " + "expect index'dims shape is 1 or 2 and index.dims[1] is 1" + "but got index'dims shape is %d", + index->dims().size())); int index_size = static_cast(index->dims()[0]); auto x_dims = x->dims(); diff --git a/paddle/fluid/operators/seed_op.cc b/paddle/fluid/operators/seed_op.cc index 837ccae0284..7cad6dcab7c 100644 --- a/paddle/fluid/operators/seed_op.cc +++ b/paddle/fluid/operators/seed_op.cc @@ -74,13 +74,12 @@ REGISTER_OP_CPU_KERNEL( seed, ops::CPUSeedKernel); /* ========================== register checkpoint ===========================*/ -REGISTER_OP_VERSION(seed) - .AddCheckpoint( - R"ROC( +REGISTER_OP_VERSION(seed).AddCheckpoint( + R"ROC( Upgrade seed add a new attribute [force_cpu])ROC", - paddle::framework::compatible::OpVersionDesc().NewAttr( - "force_cpu", - "If true, Force fill output variable to cpu." - "memory. Otherwise, fill output variable to the running " - "device", - false)); + paddle::framework::compatible::OpVersionDesc().NewAttr( + "force_cpu", + "If true, Force fill output variable to cpu." + "memory. Otherwise, fill output variable to the running " + "device", + false)); diff --git a/paddle/fluid/operators/segment_pool_op.cc b/paddle/fluid/operators/segment_pool_op.cc index 9d4c8532a82..92010e8afc0 100644 --- a/paddle/fluid/operators/segment_pool_op.cc +++ b/paddle/fluid/operators/segment_pool_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/fluid/operators/sequence_ops/sequence_concat_op.cc b/paddle/fluid/operators/sequence_ops/sequence_concat_op.cc index f6523255e24..0f17ff1e1b7 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_concat_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_concat_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/sequence_ops/sequence_concat_op.h" + #include #include diff --git a/paddle/fluid/operators/sequence_ops/sequence_concat_op.cu.cc b/paddle/fluid/operators/sequence_ops/sequence_concat_op.cu.cc index d58a2da29c9..4856e38011b 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_concat_op.cu.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_concat_op.cu.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/sequence_ops/sequence_concat_op.h" + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/sequence_ops/sequence_concat_op.h b/paddle/fluid/operators/sequence_ops/sequence_concat_op.h index 1b8525febe2..f27e6535d31 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_concat_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_concat_op.h @@ -16,6 +16,7 @@ #include #include + #include "boost/optional.hpp" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/concat_and_split.h" @@ -49,7 +50,7 @@ inline framework::LoD ConcatLoD(const Container &xs, template inline std::vector> GetDataVectorSafely( - const std::vector &vec, ARGS &&... args) { + const std::vector &vec, ARGS &&...args) { std::vector> result; result.reserve(vec.size()); for (auto *ptr : vec) { diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op.h b/paddle/fluid/operators/sequence_ops/sequence_conv_op.h index 62fa5bc26ac..1935a62621d 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_conv_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_conv_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/context_project.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc b/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc index 23c6a0133e1..ef440a580f9 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_conv_op_xpu.cc @@ -54,10 +54,12 @@ class SequenceConvXPUKernel : public framework::OpKernel { int up_pad = std::max(0, -context_start); int down_pad = std::max(0, context_start + context_length - 1); - PADDLE_ENFORCE_EQ(up_pad, 2, platform::errors::InvalidArgument( - "Only support up_pad equal 2.")); - PADDLE_ENFORCE_EQ(down_pad, 2, platform::errors::InvalidArgument( - "Only support down_pad equal 2.")); + PADDLE_ENFORCE_EQ( + up_pad, 2, + platform::errors::InvalidArgument("Only support up_pad equal 2.")); + PADDLE_ENFORCE_EQ( + down_pad, 2, + platform::errors::InvalidArgument("Only support down_pad equal 2.")); auto xpu_context = context.template device_context().x_context(); @@ -75,8 +77,9 @@ class SequenceConvXPUKernel : public framework::OpKernel { // If batch size set to 256, the lod is {0, batch[0] - 0, // batch[1] - batch [0], ..., batch[255] - batch[254]}, // so the lod_size will be 257. - PADDLE_ENFORCE_LE(lod_size, 257, platform::errors::InvalidArgument( - "Only support batch size <= 256.")); + PADDLE_ENFORCE_LE( + lod_size, 257, + platform::errors::InvalidArgument("Only support batch size <= 256.")); std::vector cpu_lodx(lod_size); for (int i = 0; i < lod_size; i++) { @@ -155,15 +158,18 @@ class SequenceConvGradXPUKernel : public framework::OpKernel { int up_pad = std::max(0, -context_start); int down_pad = std::max(0, context_start + context_length - 1); - PADDLE_ENFORCE_EQ(up_pad, 2, platform::errors::InvalidArgument( - "Only support up_pad equal 2.")); - PADDLE_ENFORCE_EQ(down_pad, 2, platform::errors::InvalidArgument( - "Only support down_pad equal 2.")); + PADDLE_ENFORCE_EQ( + up_pad, 2, + platform::errors::InvalidArgument("Only support up_pad equal 2.")); + PADDLE_ENFORCE_EQ( + down_pad, 2, + platform::errors::InvalidArgument("Only support down_pad equal 2.")); auto lod_level_0 = in->lod()[0]; int lod_size = lod_level_0.size(); - PADDLE_ENFORCE_LE(lod_size, 257, platform::errors::InvalidArgument( - "Only support batch size <= 256.")); + PADDLE_ENFORCE_LE( + lod_size, 257, + platform::errors::InvalidArgument("Only support batch size <= 256.")); std::vector cpu_lodx(lod_size); for (int i = 0; i < lod_size; i++) { diff --git a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu index 9591f3e8b5b..0f47e8a9c2a 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cu @@ -14,6 +14,7 @@ #include #include + #include "paddle/fluid/operators/sequence_ops/sequence_enumerate_op.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" diff --git a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc index 79503d9714f..552a8283b36 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/sequence_ops/sequence_erase_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu index 12d3eee65da..a87c3279224 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_erase_op.cu @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/operators/sequence_ops/sequence_erase_op.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" diff --git a/paddle/fluid/operators/sequence_ops/sequence_erase_op.h b/paddle/fluid/operators/sequence_ops/sequence_erase_op.h index ed98b694b27..8d10ee508a2 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_erase_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_erase_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc index 494c8e3ab74..01e9835270c 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h" + #include #include diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu index 7e1a06b9eca..5cc4ecdd12a 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h index 117fa504ff3..5abe6df09e5 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_as_op.h @@ -17,6 +17,7 @@ limitations under the License. */ #include // std::iota #include #include + #include "glog/logging.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cc b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cc index e4f2c1b2b8f..4817b003a28 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/sequence_ops/sequence_expand_op.h" + #include namespace paddle { @@ -64,10 +65,11 @@ class SequenceExpandOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( ref_level == -1 || (ref_level >= 0 && ref_level < static_cast(y_lod.size())), - true, platform::errors::InvalidArgument( - "Invlid `ref_level`, which should be either equal to -1 " - "or in [0, %d), but received `ref_level` = %u.", - y_lod.size(), ref_level)); + true, + platform::errors::InvalidArgument( + "Invlid `ref_level`, which should be either equal to -1 " + "or in [0, %d), but received `ref_level` = %u.", + y_lod.size(), ref_level)); if (ref_level == -1) ref_level = y_lod.size() - 1; diff --git a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu index 7b7bc5183bf..90f911c438b 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_expand_op.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/operators/sequence_ops/sequence_expand_op.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" diff --git a/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc b/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc index f22b424b307..060a3e7cab3 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_mask_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/sequence_ops/sequence_mask_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc b/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc index 5d0e1d0194e..7d018e764bd 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_pad_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/sequence_ops/sequence_pad_op.h" + #include #include diff --git a/paddle/fluid/operators/sequence_ops/sequence_pad_op.h b/paddle/fluid/operators/sequence_ops/sequence_pad_op.h index 3aaa2828d5b..d4022e80d80 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_pad_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_pad_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/operators/math/sequence_padding.h" diff --git a/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc b/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc index 01990ebb732..af42285158b 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_pool_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/sequence_ops/sequence_pool_op.h" + #include #include @@ -30,11 +31,12 @@ class SequencePoolOp : public framework::OperatorWithKernel { if (!ctx->IsRuntime()) { // Check the lod_level for compile-time. auto in_lod_level = ctx->GetLoDLevel("X"); - PADDLE_ENFORCE_GT(in_lod_level, 0, platform::errors::InvalidArgument( - "The LoD level of Input(X) should " - "be larger than 0, but received: " - "lod level %u.", - in_lod_level)); + PADDLE_ENFORCE_GT( + in_lod_level, 0, + platform::errors::InvalidArgument("The LoD level of Input(X) should " + "be larger than 0, but received: " + "lod level %u.", + in_lod_level)); ctx->SetLoDLevel("Out", in_lod_level - 1); } diff --git a/paddle/fluid/operators/sequence_ops/sequence_pool_op.h b/paddle/fluid/operators/sequence_ops/sequence_pool_op.h index 4d981e0187a..96d02e6d2e5 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_pool_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_pool_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/sequence_pooling.h" @@ -38,9 +39,10 @@ class SequencePoolKernel : public framework::OpKernel { auto lod = in->lod(); auto lod_level = lod.size(); // InferShape by lod - PADDLE_ENFORCE_GT(lod_level, 0, platform::errors::InvalidArgument( - "Input(X) Tensor of SequencePoolOp " - "does not contain LoD information.")); + PADDLE_ENFORCE_GT( + lod_level, 0, + platform::errors::InvalidArgument("Input(X) Tensor of SequencePoolOp " + "does not contain LoD information.")); PADDLE_ENFORCE_LE(lod_level, 2UL, platform::errors::InvalidArgument( "The lod level of input shall be no more than 2." diff --git a/paddle/fluid/operators/sequence_ops/sequence_reshape_op.cc b/paddle/fluid/operators/sequence_ops/sequence_reshape_op.cc index 980879db4d0..3a62bc554df 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_reshape_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_reshape_op.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/operators/sequence_ops/sequence_reshape_op.h" + #include + #include "paddle/phi/core/ddim.h" namespace paddle { diff --git a/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h b/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h index 90a17d713cf..85282bf23b4 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_reverse_op.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/for_range.h" #include "paddle/phi/kernels/funcs/algorithm.h" diff --git a/paddle/fluid/operators/sequence_ops/sequence_scatter_op.cc b/paddle/fluid/operators/sequence_ops/sequence_scatter_op.cc index 25c12ab565a..6fa151af4e1 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_scatter_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_scatter_op.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/sequence_ops/sequence_scatter_op.h" + #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/sequence_ops/sequence_slice_op.cc b/paddle/fluid/operators/sequence_ops/sequence_slice_op.cc index 06fb444740f..fdb24892e09 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_slice_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_slice_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/sequence_ops/sequence_slice_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc index e7585f7ab04..e3f8d16a7ad 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/sequence_ops/sequence_softmax_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu index c91c59dbfee..0d91832948d 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu +++ b/paddle/fluid/operators/sequence_ops/sequence_softmax_op.cu @@ -134,10 +134,10 @@ struct SequenceSoftmaxFunctor { dim3 block_size(thread_x); dim3 grid_size(max_blocks); paddle::framework::MixVector mixv_ref_lod(&ref_lod); - sequence_softmax_kernel< - T, kThreadsPerBlock><<>>( - x.data(), mixv_ref_lod.CUDAData(context.GetPlace()), height, - out->mutable_data(context.GetPlace())); + sequence_softmax_kernel + <<>>( + x.data(), mixv_ref_lod.CUDAData(context.GetPlace()), height, + out->mutable_data(context.GetPlace())); } }; @@ -158,11 +158,11 @@ struct SequenceSoftmaxGradFunctor { dim3 grid_size(max_blocks); paddle::framework::MixVector mixv_ref_lod(&ref_lod); - sequence_softmax_grad_kernel< - T, kThreadsPerBlock><<>>( - dout.data(), out.data(), - mixv_ref_lod.CUDAData(context.GetPlace()), height, - dx->mutable_data(context.GetPlace())); + sequence_softmax_grad_kernel + <<>>( + dout.data(), out.data(), + mixv_ref_lod.CUDAData(context.GetPlace()), height, + dx->mutable_data(context.GetPlace())); } }; diff --git a/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.cc b/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.cc index bacdd7e4ccb..b1d5ec8e9c6 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h" + #include #include @@ -44,8 +45,9 @@ class SequenceTopkAvgPoolingOp : public framework::OperatorWithKernel { auto topks = attr.Get>("topks"); auto num_k = topks.size(); PADDLE_ENFORCE_GT( - num_k, 0, platform::errors::InvalidArgument( - "Expected topks.size() > 0, but received %zu.", num_k)); + num_k, 0, + platform::errors::InvalidArgument( + "Expected topks.size() > 0, but received %zu.", num_k)); auto row_dim = ctx->GetInputDim("ROW"); auto row_shape_0 = row_dim[0]; diff --git a/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h b/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h index 47180f123fa..b5ee43387b3 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_topk_avg_pooling_op.h @@ -19,6 +19,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc index 180d14cfada..636be3b2f6c 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc +++ b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/sequence_ops/sequence_unpad_op.h" + #include #include diff --git a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h index d96dc91f3bc..d643ef860c3 100644 --- a/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h +++ b/paddle/fluid/operators/sequence_ops/sequence_unpad_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/operators/math/sequence_padding.h" diff --git a/paddle/fluid/operators/set_value_op.cc b/paddle/fluid/operators/set_value_op.cc index 73655bcb185..4adedf09aa3 100644 --- a/paddle/fluid/operators/set_value_op.cc +++ b/paddle/fluid/operators/set_value_op.cc @@ -18,7 +18,6 @@ #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_version_registry.h" - #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/unary.h" diff --git a/paddle/fluid/operators/set_value_op_npu.cc b/paddle/fluid/operators/set_value_op_npu.cc index daa033f9dc6..2231eb212a2 100644 --- a/paddle/fluid/operators/set_value_op_npu.cc +++ b/paddle/fluid/operators/set_value_op_npu.cc @@ -14,7 +14,6 @@ limitations under the License. */ #include "paddle/fluid/operators/set_value_op.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" - #include "paddle/phi/kernels/funcs/slice_utils.h" namespace paddle { diff --git a/paddle/fluid/operators/shape_op.cc b/paddle/fluid/operators/shape_op.cc index 9001ce5d51d..38482f7b55e 100644 --- a/paddle/fluid/operators/shape_op.cc +++ b/paddle/fluid/operators/shape_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/fluid/operators/shape_op_xpu.cc b/paddle/fluid/operators/shape_op_xpu.cc index a62d1b434e7..d4c7d937d4b 100644 --- a/paddle/fluid/operators/shape_op_xpu.cc +++ b/paddle/fluid/operators/shape_op_xpu.cc @@ -11,6 +11,7 @@ #ifdef PADDLE_WITH_XPU #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/share_buffer_op.h b/paddle/fluid/operators/share_buffer_op.h index 1d0abf14f57..1b564c3bef0 100644 --- a/paddle/fluid/operators/share_buffer_op.h +++ b/paddle/fluid/operators/share_buffer_op.h @@ -27,8 +27,9 @@ class ShareBufferOpKernel : public framework::OpKernel { const auto inputs = ctx.MultiInput("X"); auto outputs = ctx.MultiOutput("Out"); size_t n = inputs.size(); - PADDLE_ENFORCE_EQ(n, outputs.size(), platform::errors::PermissionDenied( - "Variable number not match.")); + PADDLE_ENFORCE_EQ( + n, outputs.size(), + platform::errors::PermissionDenied("Variable number not match.")); const auto &share_dims_and_dtype = ctx.Attr>("share_dims_and_dtype"); if (!share_dims_and_dtype.empty()) { diff --git a/paddle/fluid/operators/share_data_op.cc b/paddle/fluid/operators/share_data_op.cc index 6fcc29e9002..63e8cb648e8 100644 --- a/paddle/fluid/operators/share_data_op.cc +++ b/paddle/fluid/operators/share_data_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/share_data_op.h" + #include "paddle/fluid/framework/op_registry.h" namespace paddle { @@ -31,8 +32,9 @@ class ShareDataOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ( in_type == framework::proto::VarType::LOD_TENSOR || in_type == framework::proto::VarType::SELECTED_ROWS, - true, platform::errors::InvalidArgument( - "Type of Variable[X] must be LoDTensor or SelectedRows!")); + true, + platform::errors::InvalidArgument( + "Type of Variable[X] must be LoDTensor or SelectedRows!")); PADDLE_ENFORCE_EQ( in_type, out_type, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/shrink_rnn_memory_op.cc b/paddle/fluid/operators/shrink_rnn_memory_op.cc index 1a3666ad823..7388144dda3 100644 --- a/paddle/fluid/operators/shrink_rnn_memory_op.cc +++ b/paddle/fluid/operators/shrink_rnn_memory_op.cc @@ -12,9 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/array_operator.h" -#include "paddle/phi/kernels/funcs/math_function.h" - #include "paddle/phi/core/lod_utils.h" +#include "paddle/phi/kernels/funcs/math_function.h" namespace paddle { namespace framework { diff --git a/paddle/fluid/operators/shuffle_batch_op.cc b/paddle/fluid/operators/shuffle_batch_op.cc index 45f7ab278a3..e338b48a4cc 100644 --- a/paddle/fluid/operators/shuffle_batch_op.cc +++ b/paddle/fluid/operators/shuffle_batch_op.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/operators/shuffle_batch_op.h" + #include + #include "paddle/fluid/framework/no_need_buffer_vars_inference.h" #include "paddle/fluid/framework/var_type_inference.h" diff --git a/paddle/fluid/operators/shuffle_batch_op.h b/paddle/fluid/operators/shuffle_batch_op.h index 2708b4a392d..f56832f9599 100644 --- a/paddle/fluid/operators/shuffle_batch_op.h +++ b/paddle/fluid/operators/shuffle_batch_op.h @@ -21,6 +21,7 @@ #include #include #include + #include "glog/logging.h" #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/lod_tensor.h" diff --git a/paddle/fluid/operators/shuffle_channel_op.cc b/paddle/fluid/operators/shuffle_channel_op.cc index 70fddc9b047..c43d456e94e 100644 --- a/paddle/fluid/operators/shuffle_channel_op.cc +++ b/paddle/fluid/operators/shuffle_channel_op.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/shuffle_channel_op.h" + #include #include @@ -61,8 +62,9 @@ class ShuffleChannelOpMaker : public framework::OpProtoAndCheckerMaker { AddAttr("group", "the number of groups.") .SetDefault(1) .AddCustomChecker([](const int& group) { - PADDLE_ENFORCE_GE(group, 1, platform::errors::InvalidArgument( - "group should be larger than 0.")); + PADDLE_ENFORCE_GE(group, 1, + platform::errors::InvalidArgument( + "group should be larger than 0.")); }); AddAttr("use_mkldnn", "(bool, default false) Only used in mkldnn kernel") diff --git a/paddle/fluid/operators/shuffle_channel_op.cu b/paddle/fluid/operators/shuffle_channel_op.cu index 582d1ea0f26..d3f6224594b 100644 --- a/paddle/fluid/operators/shuffle_channel_op.cu +++ b/paddle/fluid/operators/shuffle_channel_op.cu @@ -67,10 +67,10 @@ class ShuffleChannelOpCUDAKernel : public framework::OpKernel { const T* input_data = input->data(); T* output_data = output->mutable_data(ctx.GetPlace()); - ShuffleChannel< - T><<>>( - count, feature_map_size, output_data, input_data, group_row, - group_column, sp_sz); + ShuffleChannel + <<>>( + count, feature_map_size, output_data, input_data, group_row, + group_column, sp_sz); } }; @@ -103,10 +103,10 @@ class ShuffleChannelGradOpCUDAKernel : public framework::OpKernel { int threads = kNumCUDAThreads; int count = num * group_column * group_row * sp_sz; - ShuffleChannel< - T><<>>( - count, feature_map_size, input_grad_data, output_grad_data, group_row, - group_column, sp_sz); + ShuffleChannel + <<>>( + count, feature_map_size, input_grad_data, output_grad_data, + group_row, group_column, sp_sz); } }; } // namespace operators diff --git a/paddle/fluid/operators/shuffle_channel_op.h b/paddle/fluid/operators/shuffle_channel_op.h index aeaac486f3f..409acdfdff7 100644 --- a/paddle/fluid/operators/shuffle_channel_op.h +++ b/paddle/fluid/operators/shuffle_channel_op.h @@ -12,6 +12,7 @@ limitations under the License. */ #pragma once #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc index 016ff54645b..0cf1296fce6 100644 --- a/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc +++ b/paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/fluid/operators/similarity_focus_op.h b/paddle/fluid/operators/similarity_focus_op.h index 4fa4d772aa3..17ea30277b8 100644 --- a/paddle/fluid/operators/similarity_focus_op.h +++ b/paddle/fluid/operators/similarity_focus_op.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" @@ -67,16 +68,16 @@ class SimilarityFocusKernel : public framework::OpKernel { std::vector> array(array_size); - bool (*cmp)(std::pair, std::pair) = []( - std::pair x, std::pair y) { - return x.first > y.first; - }; + bool (*cmp)(std::pair, std::pair) = + [](std::pair x, std::pair y) { + return x.first > y.first; + }; - int64_t (*compute_index)(int64_t*, int, int, int, int) = []( - int64_t* dim, int d1, int d2, int d3, int d4) { - return d1 * dim[1] * dim[2] * dim[3] + d2 * dim[2] * dim[3] + - d3 * dim[3] + d4; - }; + int64_t (*compute_index)(int64_t*, int, int, int, int) = + [](int64_t* dim, int d1, int d2, int d3, int d4) { + return d1 * dim[1] * dim[2] * dim[3] + d2 * dim[2] * dim[3] + + d3 * dim[3] + d4; + }; PADDLE_ENFORCE_GT( axis, 0, diff --git a/paddle/fluid/operators/slice_op.cc b/paddle/fluid/operators/slice_op.cc index c6432d00e9d..a815e12d061 100644 --- a/paddle/fluid/operators/slice_op.cc +++ b/paddle/fluid/operators/slice_op.cc @@ -13,10 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/slice_op.h" + #include #include #include #include + #include "paddle/phi/kernels/funcs/slice_utils.h" namespace paddle { @@ -85,8 +87,9 @@ class SliceOp : public framework::OperatorWithKernel { } if (ctx->HasInputs("EndsTensorList")) { ends_size = ctx->Inputs("EndsTensorList").size(); - PADDLE_ENFORCE_GT(ends_size, 0, platform::errors::InvalidArgument( - "EndsTensorList size can't be zero")); + PADDLE_ENFORCE_GT(ends_size, 0, + platform::errors::InvalidArgument( + "EndsTensorList size can't be zero")); } if (!ctx->HasInput("StartsTensor")) { diff --git a/paddle/fluid/operators/slice_op.h b/paddle/fluid/operators/slice_op.h index a9a98b46d5e..f18ffef3f58 100644 --- a/paddle/fluid/operators/slice_op.h +++ b/paddle/fluid/operators/slice_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/eigen/eigen_function.h" #include "paddle/fluid/operators/utils.h" diff --git a/paddle/fluid/operators/slice_op_mlu.cc b/paddle/fluid/operators/slice_op_mlu.cc index 43322e4b2e7..7645232ec0c 100644 --- a/paddle/fluid/operators/slice_op_mlu.cc +++ b/paddle/fluid/operators/slice_op_mlu.cc @@ -12,9 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/slice_op.h" - #include "paddle/fluid/operators/mlu/mlu_baseop.h" +#include "paddle/fluid/operators/slice_op.h" #include "paddle/phi/kernels/funcs/slice_utils.h" namespace paddle { diff --git a/paddle/fluid/operators/slice_op_npu.cc b/paddle/fluid/operators/slice_op_npu.cc index 0d0d9ab19df..3441453430e 100644 --- a/paddle/fluid/operators/slice_op_npu.cc +++ b/paddle/fluid/operators/slice_op_npu.cc @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/slice_op.h" - #include "paddle/fluid/platform/device/npu/npu_op_runner.h" #include "paddle/phi/kernels/funcs/slice_utils.h" diff --git a/paddle/fluid/operators/slice_op_xpu.cc b/paddle/fluid/operators/slice_op_xpu.cc index 6ac1027b0ce..8f2dfd38d49 100644 --- a/paddle/fluid/operators/slice_op_xpu.cc +++ b/paddle/fluid/operators/slice_op_xpu.cc @@ -13,11 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/slice_op.h" #include #include #include #include + +#include "paddle/fluid/operators/slice_op.h" #include "xpu/refactor/math.h" namespace paddle { @@ -53,8 +54,9 @@ class SliceXPUKernel : public framework::OpKernel { start = std::max(start, 0); end = std::max(end, 0); end = std::min(end, dim_value); - PADDLE_ENFORCE_GT(end, start, platform::errors::InvalidArgument( - "end should greater than start")); + PADDLE_ENFORCE_GT( + end, start, + platform::errors::InvalidArgument("end should greater than start")); starts[i] = start; ends[i] = end; } diff --git a/paddle/fluid/operators/smooth_l1_loss_op.cc b/paddle/fluid/operators/smooth_l1_loss_op.cc index c0318d344ae..05204354d09 100644 --- a/paddle/fluid/operators/smooth_l1_loss_op.cc +++ b/paddle/fluid/operators/smooth_l1_loss_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/smooth_l1_loss_op.h" + #include namespace paddle { diff --git a/paddle/fluid/operators/smooth_l1_loss_op_npu.cc b/paddle/fluid/operators/smooth_l1_loss_op_npu.cc index 136ea68ac9e..bdc46abff2a 100644 --- a/paddle/fluid/operators/smooth_l1_loss_op_npu.cc +++ b/paddle/fluid/operators/smooth_l1_loss_op_npu.cc @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/smooth_l1_loss_op.h" #include "paddle/fluid/framework/tensor_util.h" +#include "paddle/fluid/operators/smooth_l1_loss_op.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" namespace paddle { diff --git a/paddle/fluid/operators/softmax_op.cc b/paddle/fluid/operators/softmax_op.cc index 3840b99dd17..7304467833a 100644 --- a/paddle/fluid/operators/softmax_op.cc +++ b/paddle/fluid/operators/softmax_op.cc @@ -61,8 +61,9 @@ class SoftmaxOp : public framework::OperatorWithKernel { if (input_data_type == framework::proto::VarType::FP16) { PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()) || platform::is_xpu_place(ctx.GetPlace()), - true, platform::errors::InvalidArgument( - "float16 can only be used on GPU/XPU place")); + true, + platform::errors::InvalidArgument( + "float16 can only be used on GPU/XPU place")); } #endif diff --git a/paddle/fluid/operators/softmax_with_cross_entropy_op_xpu.cc b/paddle/fluid/operators/softmax_with_cross_entropy_op_xpu.cc index c07467a9b0b..4b55f5af09d 100644 --- a/paddle/fluid/operators/softmax_with_cross_entropy_op_xpu.cc +++ b/paddle/fluid/operators/softmax_with_cross_entropy_op_xpu.cc @@ -44,8 +44,9 @@ class SoftmaxWithCrossEntropyXPUKernel : public framework::OpKernel { Tensor* loss = context.Output("Loss"); const int rank = logits->dims().size(); const int axis = phi::funcs::CanonicalAxis(context.Attr("axis"), rank); - PADDLE_ENFORCE_EQ(axis, rank - 1, platform::errors::InvalidArgument( - "axis should == rank - 1")); + PADDLE_ENFORCE_EQ( + axis, rank - 1, + platform::errors::InvalidArgument("axis should == rank - 1")); softmax->mutable_data(context.GetPlace()); loss->mutable_data(context.GetPlace()); const int n = phi::funcs::SizeToAxis(axis, logits->dims()); @@ -140,8 +141,9 @@ class SoftmaxWithCrossEntropyGradXPUKernel : public framework::OpKernel { const int rank = logit_grad->dims().size(); const int axis = phi::funcs::CanonicalAxis(context.Attr("axis"), rank); - PADDLE_ENFORCE_EQ(axis, rank - 1, platform::errors::InvalidArgument( - "axis should == rank - 1")); + PADDLE_ENFORCE_EQ( + axis, rank - 1, + platform::errors::InvalidArgument("axis should == rank - 1")); const int n = phi::funcs::SizeToAxis(axis, logit_grad->dims()); const int d = phi::funcs::SizeFromAxis(axis, logit_grad->dims()); diff --git a/paddle/fluid/operators/solve_op.cc b/paddle/fluid/operators/solve_op.cc index 57302ae0342..4d23f1ce209 100644 --- a/paddle/fluid/operators/solve_op.cc +++ b/paddle/fluid/operators/solve_op.cc @@ -13,10 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/solve_op.h" + #include #include #include #include + #include "paddle/phi/core/ddim.h" namespace paddle { diff --git a/paddle/fluid/operators/solve_op.h b/paddle/fluid/operators/solve_op.h index 7f3a5748666..928fbf755d7 100644 --- a/paddle/fluid/operators/solve_op.h +++ b/paddle/fluid/operators/solve_op.h @@ -92,9 +92,10 @@ static framework::DDim GetOutputShapeUnsqueeze( for (int axis : unsqz_dims) { int cur = axis < 0 ? axis + cur_output_size + 1 : axis; // Vaildity Check: the axis bound - PADDLE_ENFORCE_GE(cur, 0, platform::errors::InvalidArgument( - "The insert dimension value should " - "not be less than 0")); + PADDLE_ENFORCE_GE( + cur, 0, + platform::errors::InvalidArgument("The insert dimension value should " + "not be less than 0")); PADDLE_ENFORCE_LE(cur, cur_output_size, platform::errors::InvalidArgument( "The insert dimension value shoule not be larger " diff --git a/paddle/fluid/operators/space_to_depth_op.cc b/paddle/fluid/operators/space_to_depth_op.cc index 013467396b3..6a6972f3293 100644 --- a/paddle/fluid/operators/space_to_depth_op.cc +++ b/paddle/fluid/operators/space_to_depth_op.cc @@ -38,8 +38,9 @@ class SpaceToDepthOp : public framework::OperatorWithKernel { "Output(Out) of SpaceToDepthOp should not be null.")); auto x_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE_EQ(x_dims.size(), 4, platform::errors::InvalidArgument( - "input should be a 4D tensor")); + PADDLE_ENFORCE_EQ( + x_dims.size(), 4, + platform::errors::InvalidArgument("input should be a 4D tensor")); auto blocksize = ctx->Attrs().Get("blocksize"); PADDLE_ENFORCE_GT(blocksize, 1, diff --git a/paddle/fluid/operators/sparse_attention_op.cc b/paddle/fluid/operators/sparse_attention_op.cc index a6534543a65..14d1ffe3f11 100644 --- a/paddle/fluid/operators/sparse_attention_op.cc +++ b/paddle/fluid/operators/sparse_attention_op.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/sparse_attention_op.cu b/paddle/fluid/operators/sparse_attention_op.cu index 49f8263ab28..2949642d2f3 100644 --- a/paddle/fluid/operators/sparse_attention_op.cu +++ b/paddle/fluid/operators/sparse_attention_op.cu @@ -13,9 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include #include #include + #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" #if defined(PADDLE_WITH_CUDA) @@ -90,17 +92,15 @@ __global__ void BlockSparseSoftmaxForward(T* softmax, const T* src, T scale, if (cur_block_col < cur_block_nnz) { // read kp mask T cur_kp_mask; - if ((kp_mask != nullptr) && - std::abs(kp_mask[colindex[cur_block_col]]) < - std::numeric_limits::epsilon()) { + if ((kp_mask != nullptr) && std::abs(kp_mask[colindex[cur_block_col]]) < + std::numeric_limits::epsilon()) { cur_kp_mask = -std::numeric_limits::infinity(); } else { cur_kp_mask = 0; } // do mask operation - if ((attnptr != nullptr) && - std::abs(attnptr[colindex[cur_block_col]]) < - std::numeric_limits::epsilon()) { + if ((attnptr != nullptr) && std::abs(attnptr[colindex[cur_block_col]]) < + std::numeric_limits::epsilon()) { srcdata[cur_reg_index] = -std::numeric_limits::infinity() * scale + cur_kp_mask; } else { @@ -280,37 +280,37 @@ void SparseSoftmaxBackward(const platform::CUDADeviceContext& ctx, T scaling = static_cast(1.0) / sqrt(static_cast(num_cols)); if (num_cols <= 4) { - BlockSparseSoftmaxBackward<<>>( - dx_data, dout_data, out_data, scaling, offset_data, columns_data, - num_rows); + BlockSparseSoftmaxBackward + <<>>(dx_data, dout_data, out_data, scaling, offset_data, + columns_data, num_rows); } else if (num_cols > 4 && num_cols <= 8) { - BlockSparseSoftmaxBackward<<>>( - dx_data, dout_data, out_data, scaling, offset_data, columns_data, - num_rows); + BlockSparseSoftmaxBackward + <<>>(dx_data, dout_data, out_data, scaling, offset_data, + columns_data, num_rows); } else if (num_cols > 8 && num_cols <= 16) { - BlockSparseSoftmaxBackward<<>>( - dx_data, dout_data, out_data, scaling, offset_data, columns_data, - num_rows); + BlockSparseSoftmaxBackward + <<>>(dx_data, dout_data, out_data, scaling, offset_data, + columns_data, num_rows); } else if (num_cols > 16 && num_cols <= 32) { - BlockSparseSoftmaxBackward<<>>( - dx_data, dout_data, out_data, scaling, offset_data, columns_data, - num_rows); + BlockSparseSoftmaxBackward + <<>>(dx_data, dout_data, out_data, scaling, offset_data, + columns_data, num_rows); } else if (num_cols > 32 && num_cols <= 64) { - BlockSparseSoftmaxBackward<<>>( - dx_data, dout_data, out_data, scaling, offset_data, columns_data, - num_rows); + BlockSparseSoftmaxBackward + <<>>(dx_data, dout_data, out_data, scaling, offset_data, + columns_data, num_rows); } else if (num_cols > 64 && num_cols <= 128) { - BlockSparseSoftmaxBackward<<>>( - dx_data, dout_data, out_data, scaling, offset_data, columns_data, - num_rows); + BlockSparseSoftmaxBackward + <<>>(dx_data, dout_data, out_data, scaling, offset_data, + columns_data, num_rows); } else if (num_cols > 128 && num_cols <= 256) { - BlockSparseSoftmaxBackward<<>>( - dx_data, dout_data, out_data, scaling, offset_data, columns_data, - num_rows); + BlockSparseSoftmaxBackward + <<>>(dx_data, dout_data, out_data, scaling, offset_data, + columns_data, num_rows); } else if (num_cols > 256 && num_cols <= 512) { - BlockSparseSoftmaxBackward<<>>( - dx_data, dout_data, out_data, scaling, offset_data, columns_data, - num_rows); + BlockSparseSoftmaxBackward + <<>>(dx_data, dout_data, out_data, scaling, offset_data, + columns_data, num_rows); } else { PADDLE_THROW(platform::errors::InvalidArgument( "The head_dim of query in sparse_attention op should less or equal " diff --git a/paddle/fluid/operators/spectral_norm_op.h b/paddle/fluid/operators/spectral_norm_op.h index ee75c96c23a..765b9a4dbfa 100644 --- a/paddle/fluid/operators/spectral_norm_op.h +++ b/paddle/fluid/operators/spectral_norm_op.h @@ -11,6 +11,7 @@ #pragma once #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/fluid/operators/spectral_op.cc b/paddle/fluid/operators/spectral_op.cc index 0270f7e0576..cd2053b4ef0 100644 --- a/paddle/fluid/operators/spectral_op.cc +++ b/paddle/fluid/operators/spectral_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/spectral_op.h" + #include "paddle/fluid/operators/spectral_helper.h" namespace paddle { diff --git a/paddle/fluid/operators/spectral_op.h b/paddle/fluid/operators/spectral_op.h index 71b54caf5ee..4900e88fbe1 100644 --- a/paddle/fluid/operators/spectral_op.h +++ b/paddle/fluid/operators/spectral_op.h @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/data_type_transform.h" diff --git a/paddle/fluid/operators/split_op.cc b/paddle/fluid/operators/split_op.cc index dc20952903a..6c60c1a17e0 100644 --- a/paddle/fluid/operators/split_op.cc +++ b/paddle/fluid/operators/split_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/split_op.h" + #include #include "paddle/fluid/framework/infershape_utils.h" diff --git a/paddle/fluid/operators/split_op.h b/paddle/fluid/operators/split_op.h index cf44f341b2b..143e1d72868 100644 --- a/paddle/fluid/operators/split_op.h +++ b/paddle/fluid/operators/split_op.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/utils.h" #include "paddle/phi/kernels/split_kernel.h" diff --git a/paddle/fluid/operators/split_op_mlu.cc b/paddle/fluid/operators/split_op_mlu.cc index adc3ea14e32..0d438854673 100644 --- a/paddle/fluid/operators/split_op_mlu.cc +++ b/paddle/fluid/operators/split_op_mlu.cc @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/split_op.h" #include "paddle/fluid/operators/mlu/mlu_baseop.h" +#include "paddle/fluid/operators/split_op.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/split_op_xpu.cc b/paddle/fluid/operators/split_op_xpu.cc index 8f02d8157b2..b24d0a70b05 100644 --- a/paddle/fluid/operators/split_op_xpu.cc +++ b/paddle/fluid/operators/split_op_xpu.cc @@ -12,9 +12,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/split_op.h" #include #include + +#include "paddle/fluid/operators/split_op.h" #include "paddle/fluid/platform/device/xpu/xpu_header.h" namespace paddle { diff --git a/paddle/fluid/operators/spp_op.cc b/paddle/fluid/operators/spp_op.cc index b1e0127f4cf..05230399b30 100644 --- a/paddle/fluid/operators/spp_op.cc +++ b/paddle/fluid/operators/spp_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/spp_op.h" + #include #include namespace paddle { diff --git a/paddle/fluid/operators/spp_op.h b/paddle/fluid/operators/spp_op.h index aa944cfcfbb..cd81ade1f9d 100644 --- a/paddle/fluid/operators/spp_op.h +++ b/paddle/fluid/operators/spp_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/phi_utils.h" #include "paddle/fluid/operators/strided_memcpy.h" diff --git a/paddle/fluid/operators/stack_op.cc b/paddle/fluid/operators/stack_op.cc index 6fc80ca379f..6b0a0657afb 100644 --- a/paddle/fluid/operators/stack_op.cc +++ b/paddle/fluid/operators/stack_op.cc @@ -14,6 +14,7 @@ #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/fluid/operators/stack_op_npu.cc b/paddle/fluid/operators/stack_op_npu.cc index 9d4ef0ffa20..c3e6e333e4c 100644 --- a/paddle/fluid/operators/stack_op_npu.cc +++ b/paddle/fluid/operators/stack_op_npu.cc @@ -30,8 +30,9 @@ class StackNPUKernel : public framework::OpKernel { if (axis < 0) axis += (x[0]->dims().size() + 1); int num = static_cast(x.size()); - PADDLE_ENFORCE_GT(num, 0, platform::errors::InvalidArgument( - "number of input Tensor <= 0")); + PADDLE_ENFORCE_GT( + num, 0, + platform::errors::InvalidArgument("number of input Tensor <= 0")); auto stream = ctx.template device_context() @@ -59,8 +60,9 @@ class StackGradNPUKernel : public framework::OpKernel { if (axis < 0) axis += dy->dims().size(); int num = dy->dims()[axis]; - PADDLE_ENFORCE_GT(num, 0, platform::errors::InvalidArgument( - "number of input Tensor <= 0")); + PADDLE_ENFORCE_GT( + num, 0, + platform::errors::InvalidArgument("number of input Tensor <= 0")); auto stream = ctx.template device_context() diff --git a/paddle/fluid/operators/stack_op_xpu.cc b/paddle/fluid/operators/stack_op_xpu.cc index baaa2b4884c..925fcc08615 100644 --- a/paddle/fluid/operators/stack_op_xpu.cc +++ b/paddle/fluid/operators/stack_op_xpu.cc @@ -15,6 +15,7 @@ #ifdef PADDLE_WITH_XPU #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/concat_op.h" #include "paddle/fluid/platform/device/xpu/xpu_header.h" diff --git a/paddle/fluid/operators/stft_op.cc b/paddle/fluid/operators/stft_op.cc index 7d4103ddf38..36e86741729 100644 --- a/paddle/fluid/operators/stft_op.cc +++ b/paddle/fluid/operators/stft_op.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/operators/stft_op.h" + #include "paddle/fluid/operators/spectral_helper.h" namespace paddle { diff --git a/paddle/fluid/operators/stft_op.h b/paddle/fluid/operators/stft_op.h index e75c59232bc..cc17ed9a43c 100644 --- a/paddle/fluid/operators/stft_op.h +++ b/paddle/fluid/operators/stft_op.h @@ -17,7 +17,6 @@ #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/tensor.h" - #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/fluid/operators/frame_op.h" #include "paddle/fluid/operators/spectral_op.h" diff --git a/paddle/fluid/operators/strided_slice_op_npu.cc b/paddle/fluid/operators/strided_slice_op_npu.cc index b142b8f099b..80952e9b556 100644 --- a/paddle/fluid/operators/strided_slice_op_npu.cc +++ b/paddle/fluid/operators/strided_slice_op_npu.cc @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/phi/kernels/funcs/strided_slice.h" #include "paddle/fluid/operators/slice_op.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" +#include "paddle/phi/kernels/funcs/strided_slice.h" namespace paddle { namespace operators { @@ -186,14 +186,16 @@ class StridedSliceNPUKernel : public framework::OpKernel { out->Resize(out_dims); out->mutable_data(place); - const auto& runner = NpuOpRunner( - "StridedSlice", {*in, starts_indices_tensor, ends_indices_tensor, - strides_indices_tensor}, - {*out}, {{"begin_mask", 0}, - {"end_mask", 0}, - {"ellipsis_mask", 0}, - {"new_axis_mask", 0}, - {"shrink_axis_mask", 0}}); + const auto& runner = + NpuOpRunner("StridedSlice", + {*in, starts_indices_tensor, ends_indices_tensor, + strides_indices_tensor}, + {*out}, + {{"begin_mask", 0}, + {"end_mask", 0}, + {"ellipsis_mask", 0}, + {"new_axis_mask", 0}, + {"shrink_axis_mask", 0}}); runner.Run(stream); if (need_reverse) { diff --git a/paddle/fluid/operators/string/faster_tokenizer_op.cc b/paddle/fluid/operators/string/faster_tokenizer_op.cc index 42047021b40..9e4089680f4 100644 --- a/paddle/fluid/operators/string/faster_tokenizer_op.cc +++ b/paddle/fluid/operators/string/faster_tokenizer_op.cc @@ -9,9 +9,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/string/faster_tokenizer_op.h" + #include #include +#include #include #include #include @@ -22,10 +25,7 @@ limitations under the License. */ #include #include -#include - #include "paddle/fluid/framework/string_array.h" -#include "paddle/fluid/operators/string/faster_tokenizer_op.h" namespace paddle { namespace operators { @@ -38,12 +38,11 @@ using std::ifstream; using std::int64_t; using std::min; using std::runtime_error; -using std::unordered_map; -using std::unordered_set; using std::shared_ptr; using std::size_t; -using std::int64_t; using std::string; +using std::unordered_map; +using std::unordered_set; using std::vector; using std::wstring; diff --git a/paddle/fluid/operators/string/faster_tokenizer_op.h b/paddle/fluid/operators/string/faster_tokenizer_op.h index 446be3a1999..a6b8bfea59c 100644 --- a/paddle/fluid/operators/string/faster_tokenizer_op.h +++ b/paddle/fluid/operators/string/faster_tokenizer_op.h @@ -26,15 +26,14 @@ namespace operators { using std::endl; using std::int64_t; +using std::shared_ptr; using std::size_t; using std::string; -using std::shared_ptr; -using std::vector; using std::unordered_map; using std::unordered_set; using std::vector; -using std::wstring; using std::wcout; +using std::wstring; inline bool IsControl(const wchar_t& ch); inline bool IsChineseChar(const wchar_t& ch); diff --git a/paddle/fluid/operators/sum_op.cc b/paddle/fluid/operators/sum_op.cc index 51040544fac..bc6997e36eb 100644 --- a/paddle/fluid/operators/sum_op.cc +++ b/paddle/fluid/operators/sum_op.cc @@ -36,9 +36,8 @@ class SumOp : public framework::OperatorWithKernel { OP_INOUT_CHECK(ctx->HasInputs("X"), "Input", "X", "sum"); OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "sum"); - if (ctx->IsRuntime() && - ctx->GetOutputsVarType("Out")[0] == - framework::proto::VarType::LOD_TENSOR_ARRAY) { + if (ctx->IsRuntime() && ctx->GetOutputsVarType("Out")[0] == + framework::proto::VarType::LOD_TENSOR_ARRAY) { return; // skip runtime infershape when is tensor array; } @@ -47,11 +46,12 @@ class SumOp : public framework::OperatorWithKernel { auto N = x_dims.size(); PADDLE_ENFORCE_GT( - N, 0, platform::errors::InvalidArgument( - "The input tensor X's dimensions of SumOp " - "should be larger than 0. But received X's dimensions %d, " - "X's shape = [%s].", - N, &x_dims)); + N, 0, + platform::errors::InvalidArgument( + "The input tensor X's dimensions of SumOp " + "should be larger than 0. But received X's dimensions %d, " + "X's shape = [%s].", + N, &x_dims)); if (N == 1) { VLOG(3) << "Warning: SumOp have only one input, may waste memory"; } @@ -115,8 +115,9 @@ class SumOp : public framework::OperatorWithKernel { framework::LibraryType library{framework::LibraryType::kPlain}; framework::DataLayout layout{framework::DataLayout::kAnyLayout}; - PADDLE_ENFORCE_GT(x_vars.size(), 0, platform::errors::InvalidArgument( - "Input[X] should not be empty")); + PADDLE_ENFORCE_GT( + x_vars.size(), 0, + platform::errors::InvalidArgument("Input[X] should not be empty")); PADDLE_ENFORCE_NOT_NULL( x_vars[0], platform::errors::NotFound( diff --git a/paddle/fluid/operators/sum_op.cu b/paddle/fluid/operators/sum_op.cu index 8c6c083cde8..3bf249425c2 100644 --- a/paddle/fluid/operators/sum_op.cu +++ b/paddle/fluid/operators/sum_op.cu @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/operators/sum_op.h" @@ -205,8 +206,8 @@ void SumToLoDTensor(const framework::ExecutionContext &context) { reinterpret_cast(tmp_sr_in_out_array->ptr()); ComputeKernelParameter(length); - SumSelectedRowsCUDAKernel<<>>( - sr_in_out_array_data, length, rows); + SumSelectedRowsCUDAKernel + <<>>(sr_in_out_array_data, length, rows); dst_write = true; } } diff --git a/paddle/fluid/operators/sum_op.h b/paddle/fluid/operators/sum_op.h index 3c51b3398be..8c1e3a3dbf1 100644 --- a/paddle/fluid/operators/sum_op.h +++ b/paddle/fluid/operators/sum_op.h @@ -11,6 +11,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/lod_tensor_array.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/sum_op_mlu.cc b/paddle/fluid/operators/sum_op_mlu.cc index 179c038e837..68e31c364b6 100644 --- a/paddle/fluid/operators/sum_op_mlu.cc +++ b/paddle/fluid/operators/sum_op_mlu.cc @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/sum_op.h" #include "paddle/fluid/operators/mlu/mlu_baseop.h" +#include "paddle/fluid/operators/sum_op.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/sum_op_xpu.cc b/paddle/fluid/operators/sum_op_xpu.cc index 5899591549e..a1cdaddd11b 100644 --- a/paddle/fluid/operators/sum_op_xpu.cc +++ b/paddle/fluid/operators/sum_op_xpu.cc @@ -11,8 +11,9 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/sum_op.h" #include + +#include "paddle/fluid/operators/sum_op.h" #include "paddle/fluid/platform/device/xpu/xpu_header.h" namespace paddle { diff --git a/paddle/fluid/operators/svd_helper.h b/paddle/fluid/operators/svd_helper.h index 166f49999d5..468c658e5e6 100644 --- a/paddle/fluid/operators/svd_helper.h +++ b/paddle/fluid/operators/svd_helper.h @@ -15,9 +15,11 @@ #pragma once #include + #include #include #include + #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/diag_op.h" @@ -101,20 +103,22 @@ struct RealMulComplexFunctor { // y: complex number (c+0j) pretend to be a real number // out: complex number (ac+bcj) inline HOSTDEVICE T operator()(T x, T y) { - PADDLE_ENFORCE_LT(y.imag, 1e-6, platform::errors::InvalidArgument( - "The image part of y must to be 0" - "but got [%d]", - y.imag)); + PADDLE_ENFORCE_LT( + y.imag, 1e-6, + platform::errors::InvalidArgument("The image part of y must to be 0" + "but got [%d]", + y.imag)); return platform::complex>(x.real * y.real, x.imag * y.real); } }; static std::vector GetBroadcastShape(InTensors ins) { - PADDLE_ENFORCE_EQ(ins.size(), 2, platform::errors::InvalidArgument( - "GetBroadcastShape Receive 2 tensors" - "but got [%d]", - ins.size())); + PADDLE_ENFORCE_EQ( + ins.size(), 2, + platform::errors::InvalidArgument("GetBroadcastShape Receive 2 tensors" + "but got [%d]", + ins.size())); auto x_dim = ins[0]->dims(); auto y_dim = ins[1]->dims(); std::vector broadcast_shape = @@ -596,8 +600,9 @@ struct DeviceIndependenceTensorOperations { attrs["lower"] = lower; NameInTensorMap inputs({{"X", {&x}}}); int x_rank = x.dims().size(); - PADDLE_ENFORCE_GE(x_rank, 2, platform::errors::InvalidArgument( - "Rank must be at least 2.")); + PADDLE_ENFORCE_GE( + x_rank, 2, + platform::errors::InvalidArgument("Rank must be at least 2.")); std::vector out_shape = phi::vectorize(x.dims()); return CreateOpRunAndReturnTensor("tril_triu", inputs, attrs, out_shape); } diff --git a/paddle/fluid/operators/svd_op.cc b/paddle/fluid/operators/svd_op.cc index 3ca7320114a..e68b013d2fb 100644 --- a/paddle/fluid/operators/svd_op.cc +++ b/paddle/fluid/operators/svd_op.cc @@ -13,10 +13,12 @@ // limitations under the License. #include "paddle/fluid/operators/svd_op.h" + #include #include #include #include + #include "paddle/phi/core/ddim.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_helper.h" diff --git a/paddle/fluid/operators/svd_op.cu b/paddle/fluid/operators/svd_op.cu index e987589e83c..317ea7c5363 100644 --- a/paddle/fluid/operators/svd_op.cu +++ b/paddle/fluid/operators/svd_op.cu @@ -16,8 +16,10 @@ limitations under the License. */ // HIP not support cusolver #include + #include #include + #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/operators/svd_op.h" #include "paddle/fluid/platform/dynload/cusolver.h" diff --git a/paddle/fluid/operators/svd_op.h b/paddle/fluid/operators/svd_op.h index 42a847206a3..1008a69e6de 100644 --- a/paddle/fluid/operators/svd_op.h +++ b/paddle/fluid/operators/svd_op.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/svd_helper.h" diff --git a/paddle/fluid/operators/sync_batch_norm_op.cu.h b/paddle/fluid/operators/sync_batch_norm_op.cu.h index 17c96544988..9818aa3651b 100644 --- a/paddle/fluid/operators/sync_batch_norm_op.cu.h +++ b/paddle/fluid/operators/sync_batch_norm_op.cu.h @@ -137,7 +137,7 @@ void SyncBatchNormFunctor(const framework::ExecutionContext &ctx, const float momentum, const bool is_test, const bool use_global_stats - ) { +) { const auto &x_dims = x->dims(); PADDLE_ENFORCE_GE(x_dims.size(), 2, platform::errors::InvalidArgument( @@ -178,13 +178,11 @@ void SyncBatchNormFunctor(const framework::ExecutionContext &ctx, const int threads = 256; int grid = std::min(C, (max_threads + threads - 1) / threads); if (layout == framework::DataLayout::kNCHW) { - KeLocalStats<<>>( - x_d, N, H * W * D, C, stats); + KeLocalStats + <<>>(x_d, N, H * W * D, C, stats); } else { - KeLocalStats<<>>( - x_d, N, H * W * D, C, stats); + KeLocalStats + <<>>(x_d, N, H * W * D, C, stats); } #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) @@ -221,13 +219,13 @@ void SyncBatchNormFunctor(const framework::ExecutionContext &ctx, int grid2 = (std::min(x_numel, max_threads) + block - 1) / block; if (layout == framework::DataLayout::kNCHW) { - KeNormAffine<<>>( - x_d, s_d, b_d, mean_data, var_data, epsilon, C, H * W * D, x_numel, - y_d); + KeNormAffine + <<>>(x_d, s_d, b_d, mean_data, var_data, + epsilon, C, H * W * D, x_numel, y_d); } else { - KeNormAffine<<>>( - x_d, s_d, b_d, mean_data, var_data, epsilon, C, H * W * D, x_numel, - y_d); + KeNormAffine + <<>>(x_d, s_d, b_d, mean_data, var_data, + epsilon, C, H * W * D, x_numel, y_d); } } @@ -436,30 +434,30 @@ void SyncBatchNormGradFunctor( if (is_inplace) { if (layout == framework::DataLayout::kNCHW) { - KeBNRestoreData< - T, framework::DataLayout::kNCHW><<>>( - px.mutable_data(ctx.GetPlace()), - scale->data>(), - bias->data>(), saved_mean, saved_inv_var, - epsilon, C, H * W * D, x_numel, x->data()); + KeBNRestoreData + <<>>(px.mutable_data(ctx.GetPlace()), + scale->data>(), + bias->data>(), + saved_mean, saved_inv_var, epsilon, C, + H * W * D, x_numel, x->data()); } else { - KeBNRestoreData< - T, framework::DataLayout::kNHWC><<>>( - px.mutable_data(ctx.GetPlace()), - scale->data>(), - bias->data>(), saved_mean, saved_inv_var, - epsilon, C, H * W * D, x_numel, x->data()); + KeBNRestoreData + <<>>(px.mutable_data(ctx.GetPlace()), + scale->data>(), + bias->data>(), + saved_mean, saved_inv_var, epsilon, C, + H * W * D, x_numel, x->data()); } } if (layout == framework::DataLayout::kNCHW) { - KeBackwardLocalStats< - T, threads, framework::DataLayout::kNCHW><<>>( - dy_d, x_d, saved_mean, N, fsize, C, stats); + KeBackwardLocalStats + <<>>(dy_d, x_d, saved_mean, N, fsize, C, + stats); } else { - KeBackwardLocalStats< - T, threads, framework::DataLayout::kNHWC><<>>( - dy_d, x_d, saved_mean, N, fsize, C, stats); + KeBackwardLocalStats + <<>>(dy_d, x_d, saved_mean, N, fsize, C, + stats); } #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) @@ -476,35 +474,33 @@ void SyncBatchNormGradFunctor( if (layout == framework::DataLayout::kNCHW) { if (d_scale && d_bias) { - KeBNBackwardScaleBias< - T, threads, - framework::DataLayout::kNCHW><<>>( - dy_d, x_d, saved_mean, saved_inv_var, epsilon, N, C, fsize, - d_scale->data>(), - d_bias->data>()); + KeBNBackwardScaleBias + <<>>(dy_d, x_d, saved_mean, saved_inv_var, + epsilon, N, C, fsize, + d_scale->data>(), + d_bias->data>()); } if (d_x) { - KeBNBackwardData< - T, framework::DataLayout::kNCHW><<>>( - dy_d, x_d, scale->data>(), saved_mean, - saved_inv_var, stats, stats + C, stats + 2 * C, epsilon, C, fsize, - x->numel(), d_x->data()); + KeBNBackwardData + <<>>( + dy_d, x_d, scale->data>(), saved_mean, + saved_inv_var, stats, stats + C, stats + 2 * C, epsilon, C, fsize, + x->numel(), d_x->data()); } } else { if (d_scale && d_bias) { - KeBNBackwardScaleBias< - T, threads, - framework::DataLayout::kNHWC><<>>( - dy_d, x_d, saved_mean, saved_inv_var, epsilon, N, C, fsize, - d_scale->data>(), - d_bias->data>()); + KeBNBackwardScaleBias + <<>>(dy_d, x_d, saved_mean, saved_inv_var, + epsilon, N, C, fsize, + d_scale->data>(), + d_bias->data>()); } if (d_x) { - KeBNBackwardData< - T, framework::DataLayout::kNHWC><<>>( - dy_d, x_d, scale->data>(), saved_mean, - saved_inv_var, stats, stats + C, stats + 2 * C, epsilon, C, fsize, - x->numel(), d_x->data()); + KeBNBackwardData + <<>>( + dy_d, x_d, scale->data>(), saved_mean, + saved_inv_var, stats, stats + C, stats + 2 * C, epsilon, C, fsize, + x->numel(), d_x->data()); } } } diff --git a/paddle/fluid/operators/sync_batch_norm_op_npu.cc b/paddle/fluid/operators/sync_batch_norm_op_npu.cc index b5632f4fe4a..604f8f97a3f 100644 --- a/paddle/fluid/operators/sync_batch_norm_op_npu.cc +++ b/paddle/fluid/operators/sync_batch_norm_op_npu.cc @@ -566,8 +566,9 @@ class SyncBatchNormNPUGradKernel : public framework::OpKernel { paddle::framework::TensorToVector( device_count_tensor, ctx.device_context(), &device_count_vec); device_counts = device_count_vec[0]; - PADDLE_ENFORCE_GE(device_counts, 2, platform::errors::PreconditionNotMet( - "device_counts should >= 2.")); + PADDLE_ENFORCE_GE( + device_counts, 2, + platform::errors::PreconditionNotMet("device_counts should >= 2.")); } // cacl var_ref diff --git a/paddle/fluid/operators/tdm_child_op.cc b/paddle/fluid/operators/tdm_child_op.cc index a60fc537e32..a7a218972ec 100644 --- a/paddle/fluid/operators/tdm_child_op.cc +++ b/paddle/fluid/operators/tdm_child_op.cc @@ -13,7 +13,9 @@ limitations under the License. */ #include "paddle/fluid/operators/tdm_child_op.h" + #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/sampler.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/tdm_child_op.h b/paddle/fluid/operators/tdm_child_op.h index e437975320c..c39d8260a8b 100644 --- a/paddle/fluid/operators/tdm_child_op.h +++ b/paddle/fluid/operators/tdm_child_op.h @@ -20,6 +20,7 @@ #include #include #include + #include "gflags/gflags.h" #include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/tdm_sampler_op.cc b/paddle/fluid/operators/tdm_sampler_op.cc index 6aad72a0d9c..68d079e6793 100644 --- a/paddle/fluid/operators/tdm_sampler_op.cc +++ b/paddle/fluid/operators/tdm_sampler_op.cc @@ -13,7 +13,9 @@ limitations under the License. */ #include "paddle/fluid/operators/tdm_sampler_op.h" + #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/sampler.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/operators/tdm_sampler_op.h b/paddle/fluid/operators/tdm_sampler_op.h index bf752a9c8ad..c3ed90ae68e 100644 --- a/paddle/fluid/operators/tdm_sampler_op.h +++ b/paddle/fluid/operators/tdm_sampler_op.h @@ -20,6 +20,7 @@ #include #include #include + #include "gflags/gflags.h" #include "paddle/fluid/framework/mixed_vector.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/temporal_shift_op.cc b/paddle/fluid/operators/temporal_shift_op.cc index 3bdb9cb972f..12d0f288d97 100644 --- a/paddle/fluid/operators/temporal_shift_op.cc +++ b/paddle/fluid/operators/temporal_shift_op.cc @@ -10,12 +10,13 @@ limitations under the License. */ #include "paddle/fluid/operators/temporal_shift_op.h" + #include #include #include -#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/infershape_utils.h" +#include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/unary.h" diff --git a/paddle/fluid/operators/temporal_shift_op.cu b/paddle/fluid/operators/temporal_shift_op.cu index 1d7aeec142f..f8e642cdb89 100644 --- a/paddle/fluid/operators/temporal_shift_op.cu +++ b/paddle/fluid/operators/temporal_shift_op.cu @@ -179,13 +179,13 @@ class TemporalShiftOpCUDAKernel : public framework::OpKernel { grid = std::min(dev_ctx.GetSMCount() * blocks_per_sm, grid); if (data_layout == DataLayout::kNCHW) { - KeTemporalShiftFwNCHW< - T><<>>( - input_data, output_data, ntchw, tchw, chw, hw, t, c1, c2); + KeTemporalShiftFwNCHW + <<>>( + input_data, output_data, ntchw, tchw, chw, hw, t, c1, c2); } else { - KeTemporalShiftFwNHWC< - T><<>>( - input_data, output_data, ntchw, tchw, chw, t, c, c1, c2); + KeTemporalShiftFwNHWC + <<>>( + input_data, output_data, ntchw, tchw, chw, t, c, c1, c2); } } }; @@ -233,13 +233,15 @@ class TemporalShiftGradOpCUDAKernel : public framework::OpKernel { grid = std::min(dev_ctx.GetSMCount() * blocks_per_sm, grid); if (data_layout == DataLayout::kNCHW) { - KeTemporalShiftBwNCHW< - T><<>>( - output_grad_data, input_grad_data, ntchw, tchw, chw, hw, t, c1, c2); + KeTemporalShiftBwNCHW + <<>>( + output_grad_data, input_grad_data, ntchw, tchw, chw, hw, t, c1, + c2); } else { - KeTemporalShiftBwNHWC< - T><<>>( - output_grad_data, input_grad_data, ntchw, tchw, chw, t, c, c1, c2); + KeTemporalShiftBwNHWC + <<>>( + output_grad_data, input_grad_data, ntchw, tchw, chw, t, c, c1, + c2); } } }; diff --git a/paddle/fluid/operators/tensor_array_to_tensor_op.cc b/paddle/fluid/operators/tensor_array_to_tensor_op.cc index 95ae32fa2ea..41d1fc2356e 100644 --- a/paddle/fluid/operators/tensor_array_to_tensor_op.cc +++ b/paddle/fluid/operators/tensor_array_to_tensor_op.cc @@ -96,10 +96,11 @@ class LoDTensorArray2TensorOp : public framework::OperatorBase { *scope.FindVar(Output("OutIndex"))->GetMutable(); const size_t n = inx.size(); - PADDLE_ENFORCE_GT(n, 0, platform::errors::InvalidArgument( - "Input tensorarray size should > 0," - "but the received is %d", - n)); + PADDLE_ENFORCE_GT( + n, 0, + platform::errors::InvalidArgument("Input tensorarray size should > 0," + "but the received is %d", + n)); std::string base_name = Inputs("X")[0]; std::vector names; @@ -235,10 +236,11 @@ class LoDTensorArray2TensorGradOp : public framework::OperatorBase { auto &inx = scope.FindVar(Input("X"))->Get(); const size_t n = inx.size(); - PADDLE_ENFORCE_GT(n, 0, platform::errors::InvalidArgument( - "Input tensorarray size should > 0, " - "but the received is: %d. ", - n)); + PADDLE_ENFORCE_GT( + n, 0, + platform::errors::InvalidArgument("Input tensorarray size should > 0, " + "but the received is: %d. ", + n)); std::string base_name = Inputs("X")[0]; std::vector names; diff --git a/paddle/fluid/operators/tensor_formatter.cc b/paddle/fluid/operators/tensor_formatter.cc index ef46ee25156..8f02bc870e2 100644 --- a/paddle/fluid/operators/tensor_formatter.cc +++ b/paddle/fluid/operators/tensor_formatter.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/operators/tensor_formatter.h" #include + #include "paddle/fluid/framework/convert_utils.h" namespace paddle { diff --git a/paddle/fluid/operators/tensor_to_string.h b/paddle/fluid/operators/tensor_to_string.h index bd9e7f6219b..c1ca1dff9ff 100644 --- a/paddle/fluid/operators/tensor_to_string.h +++ b/paddle/fluid/operators/tensor_to_string.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/device_context.h" @@ -56,7 +57,7 @@ static std::vector ToVector(const framework::Tensor &src) { } template -static std::string FlattenToString(Args &&... args) { +static std::string FlattenToString(Args &&...args) { const auto &vec = ToVector(std::forward(args)...); return "[" + string::join_strings(vec, ',') + "]"; } diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 0a71875d893..1e5ce6fa3e8 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -52,9 +52,9 @@ namespace operators { using inference::Singleton; using inference::tensorrt::TensorRTEngine; -using inference::tensorrt::TRTInt8Calibrator; using inference::tensorrt::TRTCalibratorEngine; using inference::tensorrt::TRTCalibratorEngineManager; +using inference::tensorrt::TRTInt8Calibrator; static void RuntimeStaticShapeCheck(std::vector runtime_input_shape, std::vector model_input_shape) { @@ -111,10 +111,10 @@ static void RuntimeDynamicShapeCheck( // "TRT engine runtime input %s dims size(%d) inconsistent " // "with the dynamic shape size(%d)", // x, runtime_input_shape.size(), min_input_shape.size())); - auto is_input_shape_valid = [&]( - const std::vector &runtime_input_shape, - const std::vector &min_input_shape, - const std::vector &max_input_shape) -> bool { + auto is_input_shape_valid = + [&](const std::vector &runtime_input_shape, + const std::vector &min_input_shape, + const std::vector &max_input_shape) -> bool { for (size_t i = 0; i < runtime_input_shape.size(); i++) { if (runtime_input_shape[i] <= max_input_shape[i] && runtime_input_shape[i] >= min_input_shape[i]) { diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc b/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc index 243ae757277..c4278cfeb58 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op_test.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/tensorrt/tensorrt_engine_op.h" + #include + #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/op_desc.h" diff --git a/paddle/fluid/operators/tile_op_npu.cc b/paddle/fluid/operators/tile_op_npu.cc index cea6b458aec..ee2d38fea70 100644 --- a/paddle/fluid/operators/tile_op_npu.cc +++ b/paddle/fluid/operators/tile_op_npu.cc @@ -27,10 +27,11 @@ class TileNPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto rank = context.Input("X")->dims().size(); PADDLE_ENFORCE_GE( - rank, 1, platform::errors::InvalidArgument( - "The rank of the input 'x' for tile op must be a positive " - "integer, but the value received is %d.", - rank)); + rank, 1, + platform::errors::InvalidArgument( + "The rank of the input 'x' for tile op must be a positive " + "integer, but the value received is %d.", + rank)); PADDLE_ENFORCE_LE( rank, MAX_RANK_SUPPORTED, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/tile_op_xpu.cc b/paddle/fluid/operators/tile_op_xpu.cc index 598377587d6..a0ce4a2bebe 100644 --- a/paddle/fluid/operators/tile_op_xpu.cc +++ b/paddle/fluid/operators/tile_op_xpu.cc @@ -25,10 +25,11 @@ class TileXPUKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto rank = context.Input("X")->dims().size(); PADDLE_ENFORCE_GE( - rank, 1, platform::errors::InvalidArgument( - "The rank of the input 'x' for tile op must be a positive " - "integer, but the value received is %d.", - rank)); + rank, 1, + platform::errors::InvalidArgument( + "The rank of the input 'x' for tile op must be a positive " + "integer, but the value received is %d.", + rank)); PADDLE_ENFORCE_LE( rank, MAX_RANK_SUPPORTED, platform::errors::InvalidArgument( diff --git a/paddle/fluid/operators/top_k_function_cuda.h b/paddle/fluid/operators/top_k_function_cuda.h index 848ab1cb556..a7981c86c45 100644 --- a/paddle/fluid/operators/top_k_function_cuda.h +++ b/paddle/fluid/operators/top_k_function_cuda.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include #include #ifdef __NVCC__ diff --git a/paddle/fluid/operators/top_k_op.cc b/paddle/fluid/operators/top_k_op.cc index cce5ad26317..d8fc129588a 100644 --- a/paddle/fluid/operators/top_k_op.cc +++ b/paddle/fluid/operators/top_k_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/top_k_op.h" + #include namespace paddle { @@ -39,8 +40,9 @@ class TopkOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_GE(k, 1, platform::errors::InvalidArgument( "Attribute k must be >= 1, but got k is %d.", k)); - PADDLE_ENFORCE_GE(input_dims.size(), 1, platform::errors::InvalidArgument( - "input must have >= 1d shape")); + PADDLE_ENFORCE_GE( + input_dims.size(), 1, + platform::errors::InvalidArgument("input must have >= 1d shape")); if (ctx->IsRuntime()) { PADDLE_ENFORCE_GE( diff --git a/paddle/fluid/operators/top_k_op.cu b/paddle/fluid/operators/top_k_op.cu index 30a5a802a53..fc8f08ca480 100644 --- a/paddle/fluid/operators/top_k_op.cu +++ b/paddle/fluid/operators/top_k_op.cu @@ -96,8 +96,8 @@ class TopkOpCUDAKernel : public framework::OpKernel { int gridx = input_height < kMaxHeight ? input_height : kMaxHeight; switch (GetDesiredBlockDim(input_width)) { FIXED_BLOCK_DIM( - KeMatrixTopK<<>>( + KeMatrixTopK + <<>>( output_data, k, indices_data, input_data, input_width, input_width, static_cast(k), gridx, input_height)); default: @@ -133,8 +133,8 @@ class TopkOpGradCUDAKernel : public framework::OpKernel { int gridx = row < kMaxHeight ? row : kMaxHeight; switch (GetDesiredBlockDim(col)) { FIXED_BLOCK_DIM( - AssignGrad<<>>( + AssignGrad + <<>>( x_grad_data, indices_data, out_grad_data, row, col, k)); default: PADDLE_THROW( diff --git a/paddle/fluid/operators/top_k_op.h b/paddle/fluid/operators/top_k_op.h index aad2f096a53..9d933eb5c47 100644 --- a/paddle/fluid/operators/top_k_op.h +++ b/paddle/fluid/operators/top_k_op.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/op_registry.h" diff --git a/paddle/fluid/operators/top_k_op_mlu.cc b/paddle/fluid/operators/top_k_op_mlu.cc index 102902bdaaa..16b2ac9807e 100644 --- a/paddle/fluid/operators/top_k_op_mlu.cc +++ b/paddle/fluid/operators/top_k_op_mlu.cc @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/top_k_op.h" #include "paddle/fluid/operators/mlu/mlu_baseop.h" +#include "paddle/fluid/operators/top_k_op.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/top_k_v2_op_npu.cc b/paddle/fluid/operators/top_k_v2_op_npu.cc index 04e4d88b008..051cb9611ba 100644 --- a/paddle/fluid/operators/top_k_v2_op_npu.cc +++ b/paddle/fluid/operators/top_k_v2_op_npu.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" diff --git a/paddle/fluid/operators/trace_op.cc b/paddle/fluid/operators/trace_op.cc index c6c0fa3c001..36ad2d74869 100644 --- a/paddle/fluid/operators/trace_op.cc +++ b/paddle/fluid/operators/trace_op.cc @@ -118,19 +118,16 @@ REGISTER_OPERATOR(trace_grad, ops::TraceGradOp, ops::TraceGradNoNeedBufferVarsInferer); /* ========================== register checkpoint ===========================*/ -REGISTER_OP_VERSION(trace) - .AddCheckpoint( - R"ROC(Upgrade trace add a new attribute [axis2])ROC", - paddle::framework::compatible::OpVersionDesc() - .NewAttr("axis1", - "The added attribute 'axis1' is not yet registered.", - std::vector{0.0f}) - .NewAttr("axis2", - "The added attribute 'axis2' is not yet registered.", - std::vector{1.0f}) - .DeleteAttr("dim1", - "The attribute 'dim1' is not recommend according to " - "the specification 2.0.") - .DeleteAttr("dim2", - "The attribute 'dim2' is not recommend according to " - "the specification 2.0.")); +REGISTER_OP_VERSION(trace).AddCheckpoint( + R"ROC(Upgrade trace add a new attribute [axis2])ROC", + paddle::framework::compatible::OpVersionDesc() + .NewAttr("axis1", "The added attribute 'axis1' is not yet registered.", + std::vector{0.0f}) + .NewAttr("axis2", "The added attribute 'axis2' is not yet registered.", + std::vector{1.0f}) + .DeleteAttr("dim1", + "The attribute 'dim1' is not recommend according to " + "the specification 2.0.") + .DeleteAttr("dim2", + "The attribute 'dim2' is not recommend according to " + "the specification 2.0.")); diff --git a/paddle/fluid/operators/transfer_layout_op.cc b/paddle/fluid/operators/transfer_layout_op.cc index f26bcdca4a7..3b55631900d 100644 --- a/paddle/fluid/operators/transfer_layout_op.cc +++ b/paddle/fluid/operators/transfer_layout_op.cc @@ -146,7 +146,7 @@ REGISTER_OPERATOR( REGISTER_OP_CPU_KERNEL_FUNCTOR(transfer_layout, float, ops::TransferLayoutKernel); REGISTER_OP_VERSION(transfer_layout) - .AddCheckpoint( - R"ROC(refine transfer_layout, add src_layout attribute)ROC", - paddle::framework::compatible::OpVersionDesc().NewAttr( - "src_layout", "(int, the layout of the input tensor", -1)); + .AddCheckpoint(R"ROC(refine transfer_layout, add src_layout attribute)ROC", + paddle::framework::compatible::OpVersionDesc().NewAttr( + "src_layout", "(int, the layout of the input tensor", + -1)); diff --git a/paddle/fluid/operators/transpose_op.cc b/paddle/fluid/operators/transpose_op.cc index a45d32b34b9..4eceb69e8ce 100644 --- a/paddle/fluid/operators/transpose_op.cc +++ b/paddle/fluid/operators/transpose_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/transpose_op.h" + #include #include #include diff --git a/paddle/fluid/operators/transpose_op.cu.h b/paddle/fluid/operators/transpose_op.cu.h index a31ac28c991..40a967b11f7 100644 --- a/paddle/fluid/operators/transpose_op.cu.h +++ b/paddle/fluid/operators/transpose_op.cu.h @@ -96,12 +96,15 @@ __global__ void TilingSwapDim1And2(const T* __restrict__ input, Dim3 input_dims, int x = threadIdx.x; Dim3 output_dims = { - input_dims[0], input_dims[2], input_dims[1], + input_dims[0], + input_dims[2], + input_dims[1], }; // Align dim to Tiles Dim3 tile_aligned_input_dim = { - input_dims[0], (input_dims[1] + TileX - 1) / TileX, + input_dims[0], + (input_dims[1] + TileX - 1) / TileX, (input_dims[2] + TileY - 1) / TileY, }; @@ -111,7 +114,8 @@ __global__ void TilingSwapDim1And2(const T* __restrict__ input, Dim3 input_dims, // Compute real index align to tile:0, 32, 64... Index3 block_tile_index_in_input = { - input_block_tile_index[0], input_block_tile_index[1] * TileX, + input_block_tile_index[0], + input_block_tile_index[1] * TileX, input_block_tile_index[2] * TileY, }; @@ -165,12 +169,14 @@ __global__ void TilingSwapDim1And2(const T* __restrict__ input, Dim3 input_dims, // Store sm value back to out Index3 output_block_tile_index = { - input_block_tile_index[0], input_block_tile_index[2], + input_block_tile_index[0], + input_block_tile_index[2], input_block_tile_index[1], }; Index3 block_tile_index_in_output = { - output_block_tile_index[0], output_block_tile_index[1] * TileY, + output_block_tile_index[0], + output_block_tile_index[1] * TileY, output_block_tile_index[2] * TileX, }; @@ -265,15 +271,13 @@ void LaunchNarrowDims2TransposeKernel(const phi::GPUContext& d, int tile_size_i, T* output) { constexpr int NumThreads = tile_long; if (tile_size_i <= tile_long && tile_size_j <= tile_short) { - TilingSwapDim1And2< - T, NumThreads, tile_long, - tile_short><<>>( - input, input_dims, output); + TilingSwapDim1And2 + <<>>(input, input_dims, + output); } else { - TilingSwapDim1And2< - T, NumThreads, tile_short, - tile_long><<>>( - input, input_dims, output); + TilingSwapDim1And2 + <<>>(input, input_dims, + output); } } @@ -392,10 +396,10 @@ void SwapDim1And2InNarrow(const phi::GPUContext& d, const T* input, // data may not aligned to tile, so some threads wasted, we need // to find least wasted threads, which means we need to find tile // can split input properly, in another words: num_wasted_threads=0. - int num_wasted_threads = input_long_edge - - framework::CeilOrFloor( - input_long_edge, proposed_tile_long_edge) * - proposed_tile_long_edge; + int num_wasted_threads = + input_long_edge - framework::CeilOrFloor( + input_long_edge, proposed_tile_long_edge) * + proposed_tile_long_edge; int num_full_tiles = framework::CeilOrFloor( input_long_edge, proposed_tile_long_edge); @@ -499,10 +503,9 @@ void SendSwapDim1And2InTranspose(const phi::GPUContext& d, const T* input, int total_tiles_count = input_dims_aligned[0] * input_dims_aligned[1] * input_dims_aligned[2]; - TilingSwapDim1And2< - T, kNumThreads, kTileSize, - kTileSize><<>>( - input, input_dims, output); + TilingSwapDim1And2 + <<>>(input, input_dims, + output); } else if (narrow_tile) { // If input shape is like Rect, such as 2X100, use Narrow tile size. @@ -513,9 +516,9 @@ void SendSwapDim1And2InTranspose(const phi::GPUContext& d, const T* input, // If input shape is small, such as 8X8, just do simple copy int total_elements = input_dims[0] * input_dims[1] * input_dims[2]; auto config = phi::backends::gpu::GetGpuLaunchConfig1D(d, total_elements); - TransposeSimpleKernel<<< - config.block_per_grid.x, config.thread_per_block.x, 0, d.stream()>>>( - total_elements, input, input_dims, output); + TransposeSimpleKernel + <<>>( + total_elements, input, input_dims, output); } } @@ -543,9 +546,9 @@ struct SwapDim0And2InTranspose { size_t total_size = combined_dims[0] * combined_dims[1] * combined_dims[2]; auto config = phi::backends::gpu::GetGpuLaunchConfig1D(d, total_size); - TransposeSimpleKernel<<< - config.block_per_grid.x, config.thread_per_block.x, 0, d.stream()>>>( - total_size, in, input_dims, out); + TransposeSimpleKernel + <<>>( + total_size, in, input_dims, out); } }; diff --git a/paddle/fluid/operators/transpose_op.h b/paddle/fluid/operators/transpose_op.h index a9e4876cc82..891aa312f69 100644 --- a/paddle/fluid/operators/transpose_op.h +++ b/paddle/fluid/operators/transpose_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/transpose_op_mlu.cc b/paddle/fluid/operators/transpose_op_mlu.cc index 40cb22bab50..38f6114e48d 100644 --- a/paddle/fluid/operators/transpose_op_mlu.cc +++ b/paddle/fluid/operators/transpose_op_mlu.cc @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/operators/mlu/mlu_baseop.h" +#include "paddle/fluid/operators/transpose_op.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/transpose_op_xpu.cc b/paddle/fluid/operators/transpose_op_xpu.cc index 00a43c74d87..32b303238ab 100644 --- a/paddle/fluid/operators/transpose_op_xpu.cc +++ b/paddle/fluid/operators/transpose_op_xpu.cc @@ -13,10 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/transpose_op.h" #include #include #include + +#include "paddle/fluid/operators/transpose_op.h" #include "paddle/fluid/platform/device/xpu/xpu_header.h" namespace paddle { diff --git a/paddle/fluid/operators/tree_conv_op.h b/paddle/fluid/operators/tree_conv_op.h index afe5379dc3f..8c479076175 100644 --- a/paddle/fluid/operators/tree_conv_op.h +++ b/paddle/fluid/operators/tree_conv_op.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/tree2col.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/fluid/operators/tril_indices_op.cc b/paddle/fluid/operators/tril_indices_op.cc index be42f53dd23..63b5c1a2431 100644 --- a/paddle/fluid/operators/tril_indices_op.cc +++ b/paddle/fluid/operators/tril_indices_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/fluid/operators/tril_triu_op.cc b/paddle/fluid/operators/tril_triu_op.cc index b941fa3d03a..8ca83ed8810 100644 --- a/paddle/fluid/operators/tril_triu_op.cc +++ b/paddle/fluid/operators/tril_triu_op.cc @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" - #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/unary.h" diff --git a/paddle/fluid/operators/truncated_gaussian_random_op.cc b/paddle/fluid/operators/truncated_gaussian_random_op.cc index dc5a66dce16..21e2061e73b 100644 --- a/paddle/fluid/operators/truncated_gaussian_random_op.cc +++ b/paddle/fluid/operators/truncated_gaussian_random_op.cc @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/operators/truncated_gaussian_random_op.h" + #include #include #include @@ -19,7 +21,6 @@ limitations under the License. */ #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/truncated_gaussian_random_op.h" #include "paddle/phi/infermeta/nullary.h" namespace paddle { diff --git a/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc b/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc index 261d9cee2d5..363d909d84d 100644 --- a/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc +++ b/paddle/fluid/operators/truncated_gaussian_random_op_npu.cc @@ -12,10 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/truncated_gaussian_random_op.h" #include #include + #include "paddle/fluid/framework/convert_utils.h" +#include "paddle/fluid/operators/truncated_gaussian_random_op.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" namespace paddle { diff --git a/paddle/fluid/operators/truncated_gaussian_random_op_xpu.cc b/paddle/fluid/operators/truncated_gaussian_random_op_xpu.cc index 803b61fbe81..45a4b6a3bab 100644 --- a/paddle/fluid/operators/truncated_gaussian_random_op_xpu.cc +++ b/paddle/fluid/operators/truncated_gaussian_random_op_xpu.cc @@ -14,11 +14,12 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/truncated_gaussian_random_op.h" #include #include + #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/truncated_gaussian_random_op.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/unbind_op.cc b/paddle/fluid/operators/unbind_op.cc index f2fc08308c6..739fc98f3f0 100644 --- a/paddle/fluid/operators/unbind_op.cc +++ b/paddle/fluid/operators/unbind_op.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/unbind_op.h" + #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/phi/core/infermeta_utils.h" #include "paddle/phi/infermeta/unary.h" diff --git a/paddle/fluid/operators/unbind_op.h b/paddle/fluid/operators/unbind_op.h index 6e35f262de4..8e6cd391578 100644 --- a/paddle/fluid/operators/unbind_op.h +++ b/paddle/fluid/operators/unbind_op.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/concat_and_split.h" #include "paddle/fluid/operators/strided_memcpy.h" diff --git a/paddle/fluid/operators/uniform_random_op.h b/paddle/fluid/operators/uniform_random_op.h index 3e27402c869..a988c684389 100644 --- a/paddle/fluid/operators/uniform_random_op.h +++ b/paddle/fluid/operators/uniform_random_op.h @@ -16,10 +16,12 @@ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #if defined(__NVCC__) || defined(__HIPCC__) #include + #include "paddle/fluid/framework/generator.h" #include "paddle/phi/kernels/full_kernel.h" #include "paddle/phi/kernels/funcs/distribution_helper.h" @@ -102,8 +104,9 @@ inline std::vector GetNewDataFromShapeTensorList( "Expected dtype of ShapeTensorList of %d-th must be int32, int64. " "But got " "unsupport dtype: %s.", - i, paddle::framework::DataTypeToString( - framework::TransToProtoVarType(tensor->dtype())))); + i, + paddle::framework::DataTypeToString( + framework::TransToProtoVarType(tensor->dtype())))); } } diff --git a/paddle/fluid/operators/uniform_random_op_mlu.cc b/paddle/fluid/operators/uniform_random_op_mlu.cc index 2c5f13f5a93..fdf1252eb0d 100644 --- a/paddle/fluid/operators/uniform_random_op_mlu.cc +++ b/paddle/fluid/operators/uniform_random_op_mlu.cc @@ -12,9 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/uniform_random_op.h" #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/operators/mlu/mlu_baseop.h" +#include "paddle/fluid/operators/uniform_random_op.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/uniform_random_op_xpu.cc b/paddle/fluid/operators/uniform_random_op_xpu.cc index ae2adf83419..23d0f61c2bd 100644 --- a/paddle/fluid/operators/uniform_random_op_xpu.cc +++ b/paddle/fluid/operators/uniform_random_op_xpu.cc @@ -14,11 +14,12 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/uniform_random_op.h" #include + #include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/operators/uniform_random_op.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/unique_consecutive_op.cc b/paddle/fluid/operators/unique_consecutive_op.cc index 24ef3a85ee2..567f7bac34b 100644 --- a/paddle/fluid/operators/unique_consecutive_op.cc +++ b/paddle/fluid/operators/unique_consecutive_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/unique_consecutive_op.h" + #include "paddle/fluid/framework/op_version_registry.h" namespace paddle { diff --git a/paddle/fluid/operators/unique_consecutive_op.cu b/paddle/fluid/operators/unique_consecutive_op.cu index fbffb01ed19..9db14e82b25 100644 --- a/paddle/fluid/operators/unique_consecutive_op.cu +++ b/paddle/fluid/operators/unique_consecutive_op.cu @@ -18,8 +18,10 @@ limitations under the License. */ #include #include #include + #include #include + #include "paddle/fluid/framework/tensor_util.h" // TensorToVector() #include "paddle/fluid/operators/unique_consecutive_op.h" // TransComute() diff --git a/paddle/fluid/operators/unique_consecutive_op.h b/paddle/fluid/operators/unique_consecutive_op.h index b31c2aa67a5..4dc1871b5d1 100644 --- a/paddle/fluid/operators/unique_consecutive_op.h +++ b/paddle/fluid/operators/unique_consecutive_op.h @@ -20,6 +20,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/concat_and_split.h" #include "paddle/fluid/operators/transpose_op.h" diff --git a/paddle/fluid/operators/unique_op.cc b/paddle/fluid/operators/unique_op.cc index 5c103e088b5..fbbd562c1b8 100644 --- a/paddle/fluid/operators/unique_op.cc +++ b/paddle/fluid/operators/unique_op.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/unique_op.h" + #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/fluid/operators/unique_op.h b/paddle/fluid/operators/unique_op.h index 01439d21824..d59e6590a88 100644 --- a/paddle/fluid/operators/unique_op.h +++ b/paddle/fluid/operators/unique_op.h @@ -20,6 +20,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/concat_and_split.h" #include "paddle/fluid/operators/transpose_op.h" diff --git a/paddle/fluid/operators/unique_with_counts_op.h b/paddle/fluid/operators/unique_with_counts_op.h index af8bfe813a6..227fdef2224 100644 --- a/paddle/fluid/operators/unique_with_counts_op.h +++ b/paddle/fluid/operators/unique_with_counts_op.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/unique_op.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/unpool_op.cc b/paddle/fluid/operators/unpool_op.cc index 36e9d894541..b18c4e4de44 100644 --- a/paddle/fluid/operators/unpool_op.cc +++ b/paddle/fluid/operators/unpool_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/unpool_op.h" + #include #include #include diff --git a/paddle/fluid/operators/unpool_op.h b/paddle/fluid/operators/unpool_op.h index 35aeb4e0d61..062008f95ea 100644 --- a/paddle/fluid/operators/unpool_op.h +++ b/paddle/fluid/operators/unpool_op.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/unpooling.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/operators/unsqueeze_op.cc b/paddle/fluid/operators/unsqueeze_op.cc index 445e8cd468b..82edcd5a9fc 100644 --- a/paddle/fluid/operators/unsqueeze_op.cc +++ b/paddle/fluid/operators/unsqueeze_op.cc @@ -101,9 +101,10 @@ class UnsqueezeOp : public framework::OperatorWithKernel { for (int axis : unsqz_dims) { int cur = axis < 0 ? axis + cur_output_size + 1 : axis; // Vaildity Check: the axis bound - PADDLE_ENFORCE_GE(cur, 0, platform::errors::InvalidArgument( - "The insert dimension value should " - "not be less than 0")); + PADDLE_ENFORCE_GE( + cur, 0, + platform::errors::InvalidArgument("The insert dimension value should " + "not be less than 0")); PADDLE_ENFORCE_LE(cur, cur_output_size, platform::errors::InvalidArgument( "The insert dimension value shoud not be larger " diff --git a/paddle/fluid/operators/unsqueeze_op.h b/paddle/fluid/operators/unsqueeze_op.h index f6112fb59c1..86038aced38 100644 --- a/paddle/fluid/operators/unsqueeze_op.h +++ b/paddle/fluid/operators/unsqueeze_op.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/utils.h" #include "paddle/fluid/platform/device_context.h" @@ -72,9 +73,10 @@ class UnsqueezeKernel : public framework::OpKernel { for (int axis : unsqz_dims) { int cur = axis < 0 ? axis + cur_output_size + 1 : axis; // Vaildity Check: the axis bound - PADDLE_ENFORCE_GE(cur, 0, platform::errors::InvalidArgument( - "The insert dimension value should " - "not be less than 0")); + PADDLE_ENFORCE_GE( + cur, 0, + platform::errors::InvalidArgument("The insert dimension value should " + "not be less than 0")); PADDLE_ENFORCE_LE(cur, cur_output_size, platform::errors::InvalidArgument( "The insert dimension value shoule not be larger " diff --git a/paddle/fluid/operators/unstack_op.cc b/paddle/fluid/operators/unstack_op.cc index 8c8684bf4b0..df2325f5dc5 100644 --- a/paddle/fluid/operators/unstack_op.cc +++ b/paddle/fluid/operators/unstack_op.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/for_range.h" diff --git a/paddle/fluid/operators/utils.h b/paddle/fluid/operators/utils.h index d84f7b165fd..009e883ccb6 100644 --- a/paddle/fluid/operators/utils.h +++ b/paddle/fluid/operators/utils.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include #include diff --git a/paddle/fluid/operators/var_conv_2d_op.cc b/paddle/fluid/operators/var_conv_2d_op.cc index 3dffa0be2e2..977cd99984c 100644 --- a/paddle/fluid/operators/var_conv_2d_op.cc +++ b/paddle/fluid/operators/var_conv_2d_op.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/var_conv_2d_op.h" + #include #include + #include "paddle/fluid/platform/dynload/mklml.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/fluid/platform/aligned_vector.h b/paddle/fluid/platform/aligned_vector.h index 6d48917ba1f..b42ae15405e 100644 --- a/paddle/fluid/platform/aligned_vector.h +++ b/paddle/fluid/platform/aligned_vector.h @@ -43,11 +43,11 @@ HOSTDEVICE inline void Store(const AlignedVector& vec, T* addr) { } /* -* Only the address of input data is the multiplier of 1,2,4, vectorized load -* with corresponding multiplier-value is possible. Moreover, the maximum length -* of vectorized load is 128 bits once. Hence, valid length of vectorized load -* shall be determined under both former constraints. -*/ + * Only the address of input data is the multiplier of 1,2,4, vectorized load + * with corresponding multiplier-value is possible. Moreover, the maximum length + * of vectorized load is 128 bits once. Hence, valid length of vectorized load + * shall be determined under both former constraints. + */ template int GetVectorizedSize(const T* pointer) { constexpr int max_load_bits = 128; @@ -58,11 +58,11 @@ int GetVectorizedSize(const T* pointer) { constexpr int vec2 = std::alignment_of>::value; // NOLINT if (address % vec8 == 0) { /* - * Currently, decide to deal with no more than 4 data once while adopting - * vectorization load/store, if performance test shows that dealing with - * 8 data once in vectorization load/store does get optimized, return code - * below can be changed into " return std::min(8, valid_vec_size); " . - */ + * Currently, decide to deal with no more than 4 data once while adopting + * vectorization load/store, if performance test shows that dealing with + * 8 data once in vectorization load/store does get optimized, return code + * below can be changed into " return std::min(8, valid_vec_size); " . + */ return std::min(4, valid_vec_size); } else if (address % vec4 == 0) { return std::min(4, valid_vec_size); diff --git a/paddle/fluid/platform/bfloat16_test.cc b/paddle/fluid/platform/bfloat16_test.cc index 794c1ff684c..f824716ab92 100644 --- a/paddle/fluid/platform/bfloat16_test.cc +++ b/paddle/fluid/platform/bfloat16_test.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/bfloat16.h" + #include "paddle/phi/kernels/funcs/eigen/extensions.h" #define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h diff --git a/paddle/fluid/platform/bfloat16_test.cu b/paddle/fluid/platform/bfloat16_test.cu index 391b91487fa..c5f38cf94ee 100644 --- a/paddle/fluid/platform/bfloat16_test.cu +++ b/paddle/fluid/platform/bfloat16_test.cu @@ -17,7 +17,9 @@ limitations under the License. */ #define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h #include #include + #include + #include "paddle/fluid/framework/lod_tensor.h" #if defined(PADDLE_CUDA_BF16) diff --git a/paddle/fluid/platform/collective_helper.cc b/paddle/fluid/platform/collective_helper.cc index d05de900e5e..8f0e4204772 100644 --- a/paddle/fluid/platform/collective_helper.cc +++ b/paddle/fluid/platform/collective_helper.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/platform/collective_helper.h" + #include #include "paddle/fluid/memory/allocation/allocator_facade.h" diff --git a/paddle/fluid/platform/complex_test.cc b/paddle/fluid/platform/complex_test.cc index c7ded758717..3547631064d 100644 --- a/paddle/fluid/platform/complex_test.cc +++ b/paddle/fluid/platform/complex_test.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/platform/complex.h" + #include + #include "paddle/phi/kernels/funcs/eigen/extensions.h" #define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h diff --git a/paddle/fluid/platform/complex_test.cu b/paddle/fluid/platform/complex_test.cu index 08ec75878b8..b814bcde684 100644 --- a/paddle/fluid/platform/complex_test.cu +++ b/paddle/fluid/platform/complex_test.cu @@ -18,6 +18,7 @@ #include #include #include + #include #include diff --git a/paddle/fluid/platform/cpu_info.cc b/paddle/fluid/platform/cpu_info.cc index e4860444865..c32af3b37a4 100644 --- a/paddle/fluid/platform/cpu_info.cc +++ b/paddle/fluid/platform/cpu_info.cc @@ -31,6 +31,7 @@ limitations under the License. */ #endif // _WIN32 #include + #include "paddle/fluid/platform/flags.h" DECLARE_double(fraction_of_cpu_memory_to_use); diff --git a/paddle/fluid/platform/cuda_graph_with_memory_pool.cc b/paddle/fluid/platform/cuda_graph_with_memory_pool.cc index c40a43dbfb8..4ef2a9709a5 100644 --- a/paddle/fluid/platform/cuda_graph_with_memory_pool.cc +++ b/paddle/fluid/platform/cuda_graph_with_memory_pool.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/platform/cuda_graph_with_memory_pool.h" + #include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/fluid/platform/device_context.h" diff --git a/paddle/fluid/platform/cuda_graph_with_memory_pool.h b/paddle/fluid/platform/cuda_graph_with_memory_pool.h index 81b68a5c678..b8831126be0 100644 --- a/paddle/fluid/platform/cuda_graph_with_memory_pool.h +++ b/paddle/fluid/platform/cuda_graph_with_memory_pool.h @@ -24,10 +24,9 @@ namespace paddle { namespace platform { #ifdef PADDLE_WITH_CUDA -#define PD_RECORD_CUDA_GRAPH_RANDOM_KERNEL(__cond, __kernel_func, __grid, \ - __block, __sm_size, __stream, \ - __seed_inc, __seed_expr, \ - __offset_expr, ...) \ +#define PD_RECORD_CUDA_GRAPH_RANDOM_KERNEL( \ + __cond, __kernel_func, __grid, __block, __sm_size, __stream, __seed_inc, \ + __seed_expr, __offset_expr, ...) \ do { \ if (::paddle::platform::CUDAGraph::IsThisThreadCapturing() && (__cond)) { \ using __Helper = \ @@ -36,9 +35,9 @@ namespace platform { auto *dev_ctx = \ ::paddle::platform::DeviceContextPool::Instance().GetByPlace( \ ::paddle::platform::CUDAGraph::CapturingPlace()); \ - auto __set_seed_func = [=]( \ - ::paddle::platform::CUDAKernelParams *__params, \ - bool __check_only) -> bool { \ + auto __set_seed_func = \ + [=](::paddle::platform::CUDAKernelParams *__params, \ + bool __check_only) -> bool { \ if (__check_only) { \ return __params->func() == &__kernel_func && \ __Helper::Compare(*__params, __VA_ARGS__); \ @@ -56,12 +55,11 @@ namespace platform { __kernel_func<<<__grid, __block, __sm_size, __stream>>>(__VA_ARGS__); \ } while (0) #else -#define PD_RECORD_CUDA_GRAPH_RANDOM_KERNEL(__cond, __kernel_func, __grid, \ - __block, __sm_size, __stream, \ - __seed_inc, __seed_expr, \ - __offset_expr, ...) \ - do { \ - __kernel_func<<<__grid, __block, __sm_size, __stream>>>(__VA_ARGS__); \ +#define PD_RECORD_CUDA_GRAPH_RANDOM_KERNEL( \ + __cond, __kernel_func, __grid, __block, __sm_size, __stream, __seed_inc, \ + __seed_expr, __offset_expr, ...) \ + do { \ + __kernel_func<<<__grid, __block, __sm_size, __stream>>>(__VA_ARGS__); \ } while (0) #endif diff --git a/paddle/fluid/platform/denormal.cc b/paddle/fluid/platform/denormal.cc index 4af156d1577..4cfb0825443 100644 --- a/paddle/fluid/platform/denormal.cc +++ b/paddle/fluid/platform/denormal.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/platform/denormal.h" + #include #include diff --git a/paddle/fluid/platform/device/gpu/cuda/cuda_graph.cc b/paddle/fluid/platform/device/gpu/cuda/cuda_graph.cc index ae6d90510f4..c5a515ce436 100644 --- a/paddle/fluid/platform/device/gpu/cuda/cuda_graph.cc +++ b/paddle/fluid/platform/device/gpu/cuda/cuda_graph.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/platform/device/gpu/cuda/cuda_graph.h" + #include #include #include diff --git a/paddle/fluid/platform/device/gpu/cuda/cuda_graph.h b/paddle/fluid/platform/device/gpu/cuda/cuda_graph.h index 083478ed7e6..b3704fc628a 100644 --- a/paddle/fluid/platform/device/gpu/cuda/cuda_graph.h +++ b/paddle/fluid/platform/device/gpu/cuda/cuda_graph.h @@ -20,10 +20,10 @@ #include #include #include + #include "cuda.h" // NOLINT #include "cuda_runtime.h" // NOLINT #include "paddle/fluid/platform/device/gpu/gpu_types.h" - #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/place.h" diff --git a/paddle/fluid/platform/device/gpu/cuda/cuda_helper.h b/paddle/fluid/platform/device/gpu/cuda/cuda_helper.h index a32db3a9921..7185d2356aa 100644 --- a/paddle/fluid/platform/device/gpu/cuda/cuda_helper.h +++ b/paddle/fluid/platform/device/gpu/cuda/cuda_helper.h @@ -68,7 +68,7 @@ namespace platform { * } * } * -*/ + */ #define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \ int64_t __index__ = blockIdx.x * blockDim.x + threadIdx.x; \ diff --git a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper_test.cc b/paddle/fluid/platform/device/gpu/cuda/cudnn_helper_test.cc index 851d0d18c60..86c72769eb5 100644 --- a/paddle/fluid/platform/device/gpu/cuda/cudnn_helper_test.cc +++ b/paddle/fluid/platform/device/gpu/cuda/cudnn_helper_test.cc @@ -15,13 +15,13 @@ limitations under the License. */ #define GLOG_NO_ABBREVIATED_SEVERITIES #define GOOGLE_GLOG_DLL_DECL -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" - #include +#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" + TEST(CudnnHelper, ScopedTensorDescriptor) { - using paddle::platform::ScopedTensorDescriptor; using paddle::platform::DataLayout; + using paddle::platform::ScopedTensorDescriptor; ScopedTensorDescriptor tensor_desc; std::vector shape = {2, 4, 6, 6}; @@ -65,8 +65,8 @@ TEST(CudnnHelper, ScopedTensorDescriptor) { } TEST(CudnnHelper, ScopedFilterDescriptor) { - using paddle::platform::ScopedFilterDescriptor; using paddle::platform::DataLayout; + using paddle::platform::ScopedFilterDescriptor; ScopedFilterDescriptor filter_desc; std::vector shape = {2, 3, 3}; @@ -129,8 +129,8 @@ TEST(CudnnHelper, ScopedConvolutionDescriptor) { } TEST(CudnnHelper, ScopedPoolingDescriptor) { - using paddle::platform::ScopedPoolingDescriptor; using paddle::platform::PoolingMode; + using paddle::platform::ScopedPoolingDescriptor; ScopedPoolingDescriptor pool_desc; std::vector src_kernel = {2, 2, 5}; diff --git a/paddle/fluid/platform/device/gpu/cuda_helper_test.cu b/paddle/fluid/platform/device/gpu/cuda_helper_test.cu index ab8bb2cad8c..28c0e0ef9ac 100644 --- a/paddle/fluid/platform/device/gpu/cuda_helper_test.cu +++ b/paddle/fluid/platform/device/gpu/cuda_helper_test.cu @@ -13,6 +13,7 @@ // limitations under the License. #include + #include #include #ifdef _WIN32 @@ -22,13 +23,12 @@ #define PADDLE_CUDA_FP16 #include "paddle/fluid/platform/device/gpu/gpu_device_function.h" +#include "paddle/fluid/platform/device/gpu/gpu_helper.h" #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/float16.h" -#include "paddle/fluid/platform/device/gpu/gpu_helper.h" - -using paddle::platform::PADDLE_CUDA_NUM_THREADS; using paddle::platform::float16; +using paddle::platform::PADDLE_CUDA_NUM_THREADS; template __global__ void AddKernel(const T* data_a, T* data_b, size_t num) { diff --git a/paddle/fluid/platform/device/gpu/cudnn_desc_test.cc b/paddle/fluid/platform/device/gpu/cudnn_desc_test.cc index 8ea30027e8a..2e58e71cc2c 100644 --- a/paddle/fluid/platform/device/gpu/cudnn_desc_test.cc +++ b/paddle/fluid/platform/device/gpu/cudnn_desc_test.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" - #include +#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" + namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/device/gpu/gpu_info.cc b/paddle/fluid/platform/device/gpu/gpu_info.cc index 8c04e935134..6b302d2449d 100644 --- a/paddle/fluid/platform/device/gpu/gpu_info.cc +++ b/paddle/fluid/platform/device/gpu/gpu_info.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include #include #include + #include "gflags/gflags.h" #include "paddle/fluid/memory/memory.h" #include "paddle/fluid/platform/cuda_device_guard.h" @@ -100,8 +101,9 @@ static size_t GpuAllocSize(bool realloc) { size_t flag_mb = realloc ? FLAGS_reallocate_gpu_memory_in_mb : FLAGS_initial_gpu_memory_in_mb; size_t alloc_bytes = - (flag_mb > 0ul ? flag_mb << 20 : available_to_alloc * - FLAGS_fraction_of_gpu_memory_to_use); + (flag_mb > 0ul + ? flag_mb << 20 + : available_to_alloc * FLAGS_fraction_of_gpu_memory_to_use); PADDLE_ENFORCE_GE( available_to_alloc, alloc_bytes, platform::errors::ResourceExhausted("Not enough available GPU memory.")); diff --git a/paddle/fluid/platform/device/gpu/gpu_info.h b/paddle/fluid/platform/device/gpu/gpu_info.h index 94b47cca948..3a97797c982 100644 --- a/paddle/fluid/platform/device/gpu/gpu_info.h +++ b/paddle/fluid/platform/device/gpu/gpu_info.h @@ -14,6 +14,7 @@ limitations under the License. */ #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #include + #include #include #include diff --git a/paddle/fluid/platform/device/gpu/gpu_launch_config.h b/paddle/fluid/platform/device/gpu/gpu_launch_config.h index 80d60ca95bf..5cacdfcb12f 100644 --- a/paddle/fluid/platform/device/gpu/gpu_launch_config.h +++ b/paddle/fluid/platform/device/gpu/gpu_launch_config.h @@ -25,9 +25,11 @@ #endif #include + #include #include #include + #include "paddle/fluid/platform/device_context.h" #ifdef __HIPCC__ @@ -93,9 +95,9 @@ struct GpuLaunchConfig { }; /* According to NVIDIA, if number of threads per block is 64/128/256/512, - * cuda performs better. And number of blocks should be greater (at least - * 2x~4x) than number of SMs. Hence, SM count is took into account within - * this function to determine the right number of threads per block. */ + * cuda performs better. And number of blocks should be greater (at least + * 2x~4x) than number of SMs. Hence, SM count is took into account within + * this function to determine the right number of threads per block. */ inline GpuLaunchConfig GetGpuLaunchConfig1D( const platform::CUDADeviceContext& context, int64_t numel, int vec_size = 1) { @@ -143,14 +145,16 @@ inline GpuLaunchConfig GetGpuLaunchConfig1D( inline GpuLaunchConfig GetGpuLaunchConfig2D( const platform::CUDADeviceContext& context, int x_dim, int y_dim) { - PADDLE_ENFORCE_GT(x_dim, 0, platform::errors::InvalidArgument( - "x dim number should greater than 0," - " but received value is: %d", - x_dim)); - PADDLE_ENFORCE_GT(y_dim, 0, platform::errors::InvalidArgument( - "y dim number should greater than 0," - " but received value is: %d", - y_dim)); + PADDLE_ENFORCE_GT( + x_dim, 0, + platform::errors::InvalidArgument("x dim number should greater than 0," + " but received value is: %d", + x_dim)); + PADDLE_ENFORCE_GT( + y_dim, 0, + platform::errors::InvalidArgument("y dim number should greater than 0," + " but received value is: %d", + y_dim)); const int kThreadsPerBlock = 256; int block_cols = (std::min)(x_dim, kThreadsPerBlock); diff --git a/paddle/fluid/platform/device/gpu/gpu_primitives.h b/paddle/fluid/platform/device/gpu/gpu_primitives.h index 803674779e7..a0e9d459721 100644 --- a/paddle/fluid/platform/device/gpu/gpu_primitives.h +++ b/paddle/fluid/platform/device/gpu/gpu_primitives.h @@ -20,6 +20,7 @@ limitations under the License. */ #include #endif #include + #include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/float16.h" diff --git a/paddle/fluid/platform/device/gpu/gpu_resource_pool.cc b/paddle/fluid/platform/device/gpu/gpu_resource_pool.cc index 2c55eb972b7..56fdb0da340 100644 --- a/paddle/fluid/platform/device/gpu/gpu_resource_pool.cc +++ b/paddle/fluid/platform/device/gpu/gpu_resource_pool.cc @@ -14,6 +14,7 @@ #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #include "paddle/fluid/platform/device/gpu/gpu_resource_pool.h" + #include "paddle/fluid/platform/device/gpu/gpu_info.h" namespace paddle { diff --git a/paddle/fluid/platform/device/gpu/gpu_types.h b/paddle/fluid/platform/device/gpu/gpu_types.h index d0b48eca502..2cadd55d2dc 100644 --- a/paddle/fluid/platform/device/gpu/gpu_types.h +++ b/paddle/fluid/platform/device/gpu/gpu_types.h @@ -19,11 +19,13 @@ #ifdef PADDLE_WITH_HIP #include + #include "paddle/fluid/platform/dynload/miopen.h" #include "paddle/fluid/platform/dynload/rocblas.h" #else #include + #include "paddle/fluid/platform/dynload/cublas.h" #include "paddle/fluid/platform/dynload/cublasLt.h" #include "paddle/fluid/platform/dynload/cudnn.h" diff --git a/paddle/fluid/platform/device/gpu/nccl_helper.h b/paddle/fluid/platform/device/gpu/nccl_helper.h index d0cb9c953a5..b9e612b98de 100644 --- a/paddle/fluid/platform/device/gpu/nccl_helper.h +++ b/paddle/fluid/platform/device/gpu/nccl_helper.h @@ -16,6 +16,7 @@ #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #include + #include #include #include // NOLINT @@ -264,7 +265,7 @@ class NCCLCommunicator { *allreduce ophandle and sync_batch_norm_op use ncclallreduce parallelly. So *create a new nccl comm for sync_batch_norm_op. And these codes should be *polished with a unified nccl management. - */ + */ NCCLContextMap *GetSyncBatchNormCtx( framework::Scope *scope, const std::vector &places) { auto *nccl_id_var = scope->FindVar(NCCL_ID_VARNAME); diff --git a/paddle/fluid/platform/device/gpu/rocm/miopen_helper_test.cc b/paddle/fluid/platform/device/gpu/rocm/miopen_helper_test.cc index 13cf52dc2c6..e99fc7f37a8 100644 --- a/paddle/fluid/platform/device/gpu/rocm/miopen_helper_test.cc +++ b/paddle/fluid/platform/device/gpu/rocm/miopen_helper_test.cc @@ -15,13 +15,13 @@ limitations under the License. */ #define GLOG_NO_ABBREVIATED_SEVERITIES #define GOOGLE_GLOG_DLL_DECL -#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" - #include +#include "paddle/fluid/platform/device/gpu/gpu_dnn.h" + TEST(MIOpenHelper, ScopedTensorDescriptor) { - using paddle::platform::ScopedTensorDescriptor; using paddle::platform::DataLayout; + using paddle::platform::ScopedTensorDescriptor; ScopedTensorDescriptor tensor_desc; std::vector shape = {2, 4, 6, 6}; diff --git a/paddle/fluid/platform/device/gpu/rocm/rocm_helper.h b/paddle/fluid/platform/device/gpu/rocm/rocm_helper.h index a0f3fb0f73b..c0f6f173a79 100644 --- a/paddle/fluid/platform/device/gpu/rocm/rocm_helper.h +++ b/paddle/fluid/platform/device/gpu/rocm/rocm_helper.h @@ -65,7 +65,7 @@ namespace platform { * } * } * -*/ + */ #define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \ int64_t __index__ = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; \ diff --git a/paddle/fluid/platform/device/ipu/ipu_device.cc b/paddle/fluid/platform/device/ipu/ipu_device.cc index 2d0381cb8b3..f6de526c900 100644 --- a/paddle/fluid/platform/device/ipu/ipu_device.cc +++ b/paddle/fluid/platform/device/ipu/ipu_device.cc @@ -45,9 +45,10 @@ int GetNumDevices() { } int num_devices = popart::DeviceManager::createDeviceManager().enumerateDevices().size(); - PADDLE_ENFORCE_GT(num_devices, 0, platform::errors::Unavailable( - "Do not found any IPU devices, please " - "make sure Poplar sdk is enabled")); + PADDLE_ENFORCE_GT( + num_devices, 0, + platform::errors::Unavailable("Do not found any IPU devices, please " + "make sure Poplar sdk is enabled")); return num_devices; } diff --git a/paddle/fluid/platform/device/ipu/ipu_executor.cc b/paddle/fluid/platform/device/ipu/ipu_executor.cc index d490334ee33..30c9bc2094a 100644 --- a/paddle/fluid/platform/device/ipu/ipu_executor.cc +++ b/paddle/fluid/platform/device/ipu/ipu_executor.cc @@ -243,7 +243,8 @@ void Executor::AcquireDevice() { VLOG(10) << "Create IPU model device..."; std::map deviceOpts{ { - "numIPUs", std::to_string(ipu_strategy_->num_ipus), + "numIPUs", + std::to_string(ipu_strategy_->num_ipus), }, {"ipuVersion", "ipu2"}, }; @@ -254,7 +255,8 @@ void Executor::AcquireDevice() { VLOG(10) << "Create offline device..."; std::map deviceOpts{ { - "numIPUs", std::to_string(ipu_strategy_->num_ipus), + "numIPUs", + std::to_string(ipu_strategy_->num_ipus), }, {"ipuVersion", "ipu2"}, }; diff --git a/paddle/fluid/platform/device/ipu/ipu_info.h b/paddle/fluid/platform/device/ipu/ipu_info.h index fe7076e0b50..06ef070ed65 100644 --- a/paddle/fluid/platform/device/ipu/ipu_info.h +++ b/paddle/fluid/platform/device/ipu/ipu_info.h @@ -13,6 +13,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_IPU #include #include + #include "glog/logging.h" namespace paddle { diff --git a/paddle/fluid/platform/device/ipu/ipu_strategy.h b/paddle/fluid/platform/device/ipu/ipu_strategy.h index da08c76fb90..0e17a485afb 100644 --- a/paddle/fluid/platform/device/ipu/ipu_strategy.h +++ b/paddle/fluid/platform/device/ipu/ipu_strategy.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/platform/device/ipu/ipu_utils.h" #include "paddle/fluid/platform/enforce.h" @@ -143,10 +144,11 @@ class IpuStrategy { std::map> &options, // NOLINT const std::string &type_str) { auto it = options.find(key); - PADDLE_ENFORCE_NE(it, options.end(), platform::errors::InvalidArgument( - "Cannot find option: %s, type: %s " - "when setting IpuStrategy options", - key, type_str)); + PADDLE_ENFORCE_NE( + it, options.end(), + platform::errors::InvalidArgument("Cannot find option: %s, type: %s " + "when setting IpuStrategy options", + key, type_str)); it->second(value); } diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/activation_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/activation_ops.cc index 254e5665674..1d5fe8c329f 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/activation_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/activation_ops.cc @@ -57,14 +57,14 @@ Node *gelu_handler(Graph *graph, Node *node) { {{"value", std::vector{1.4142135623730951}}, {"dims", std::vector{1}}, {"dtype", GetOutputVarDType(node)}}); - auto zero_point_five = - CreateConst(graph, node, {}, {}, {{"value", std::vector{0.5}}, - {"dims", std::vector{1}}, - {"dtype", GetOutputVarDType(node)}}); - auto one = - CreateConst(graph, node, {}, {}, {{"value", std::vector{1}}, - {"dims", std::vector{1}}, - {"dtype", GetOutputVarDType(node)}}); + auto zero_point_five = CreateConst(graph, node, {}, {}, + {{"value", std::vector{0.5}}, + {"dims", std::vector{1}}, + {"dtype", GetOutputVarDType(node)}}); + auto one = CreateConst(graph, node, {}, {}, + {{"value", std::vector{1}}, + {"dims", std::vector{1}}, + {"dtype", GetOutputVarDType(node)}}); auto div = CreateBaseOp(graph, node, "popart_div", {GetInputVarNode("X", node), sqrt2->outputs[0]}, {}, {}); diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/math_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/math_ops.cc index af72f84c9d7..9b91abc4a67 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/math_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/math_ops.cc @@ -44,9 +44,10 @@ Node *pow_handler(Graph *graph, Node *node) { MakeConstAttrMapFromValue(value_, {1}, GetOutputVarDType(node)); auto new_node_const = CreateConst(graph, node, {}, {}, attrs); - return CreateBaseOp(graph, node, "popart_pow", {GetInputVarNode("X", node), - new_node_const->outputs[0]}, - node->outputs); + return CreateBaseOp( + graph, node, "popart_pow", + {GetInputVarNode("X", node), new_node_const->outputs[0]}, + node->outputs); } } @@ -380,10 +381,10 @@ Node *cumsum_handler(Graph *graph, Node *node) { auto reverse = BOOST_GET_CONST(bool, op->GetAttr("reverse")); int64_t popart_reverse = 1 ? reverse : 0; auto axis = BOOST_GET_CONST(int, op->GetAttr("axis")); - auto axis_node = - CreateConst(graph, node, {}, {}, {{"value", std::vector{axis}}, - {"dims", std::vector{1}}, - {"dtype", ONNXDataType::INT64}}); + auto axis_node = CreateConst(graph, node, {}, {}, + {{"value", std::vector{axis}}, + {"dims", std::vector{1}}, + {"dtype", ONNXDataType::INT64}}); return CreateBaseOp( graph, node, "popart_cumsum", {GetInputVarNode("X", node), axis_node->outputs[0]}, diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/nn_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/nn_ops.cc index 2e9913f58ef..bce6bac88e2 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/nn_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/nn_ops.cc @@ -35,20 +35,20 @@ Node *conv2d_handler(Graph *graph, Node *node) { auto stride_ = BOOST_GET_CONST(std::vector, op->GetAttr("strides")); auto stride = std::vector{stride_.begin(), stride_.end()}; if (!op->Input("Bias").empty()) { - return CreateConv( - graph, node, - { - GetInputVarNode("Input", node), GetInputVarNode("Filter", node), - GetInputVarNode("Bias", node), - }, - node->outputs, dilations, group_, {}, pads, stride); + return CreateConv(graph, node, + { + GetInputVarNode("Input", node), + GetInputVarNode("Filter", node), + GetInputVarNode("Bias", node), + }, + node->outputs, dilations, group_, {}, pads, stride); } else { - return CreateConv( - graph, node, - { - GetInputVarNode("Input", node), GetInputVarNode("Filter", node), - }, - node->outputs, dilations, group_, {}, pads, stride); + return CreateConv(graph, node, + { + GetInputVarNode("Input", node), + GetInputVarNode("Filter", node), + }, + node->outputs, dilations, group_, {}, pads, stride); } } @@ -148,15 +148,16 @@ Node *pool2d_handler(Graph *graph, Node *node) { auto dilations = std::vector{}; int64_t storage_order = 0; return CreateBaseOp(graph, node, "popart_maxpool", node->inputs, - node->outputs, { - {"num_outputs", num_outputs}, - {"kernel_shape", kernel_shape}, - {"ceil_mode", ceil_mode}, - {"dilations", dilations}, - {"pads", pads}, - {"storage_order", storage_order}, - {"strides", strides}, - }); + node->outputs, + { + {"num_outputs", num_outputs}, + {"kernel_shape", kernel_shape}, + {"ceil_mode", ceil_mode}, + {"dilations", dilations}, + {"pads", pads}, + {"storage_order", storage_order}, + {"strides", strides}, + }); } else if (pooling_type == "avg") { int64_t count_include_pad = 0; return CreateBaseOp(graph, node, "popart_averagepool", node->inputs, diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.cc index 0525bb66f16..b51d923bfcf 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/op_builder.cc @@ -173,8 +173,9 @@ Node *CreateConv(Graph *graph, Node *node, const std::vector &inputs, Node *CreateSoftmaxOpset11(Graph *graph, Node *node, const std::vector &inputs, const std::vector &outputs, int64_t axis) { - PADDLE_ENFORCE_EQ(inputs.size(), 1, platform::errors::InvalidArgument( - "Softmax op only support one input")); + PADDLE_ENFORCE_EQ( + inputs.size(), 1, + platform::errors::InvalidArgument("Softmax op only support one input")); auto x_shape = inputs[0]->Var()->GetShape(); int x_rank = x_shape.size(); if (axis < 0) { diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/search_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/search_ops.cc index aec89a1cf0d..77ce2f31669 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/search_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/search_ops.cc @@ -69,10 +69,10 @@ Node *topk_handler(Graph *graph, Node *node) { var_k = GetInputVarNode("K", node); } else { auto k = BOOST_GET_CONST(int, op->GetAttr("k")); - auto *op_k = - CreateConst(graph, node, {}, {}, {{"value", std::vector{k}}, - {"dims", std::vector{1}}, - {"dtype", ONNXDataType::INT64}}); + auto *op_k = CreateConst(graph, node, {}, {}, + {{"value", std::vector{k}}, + {"dims", std::vector{1}}, + {"dtype", ONNXDataType::INT64}}); var_k = op_k->outputs[0]; } diff --git a/paddle/fluid/platform/device/ipu/popart_canonicalization/tensor_ops.cc b/paddle/fluid/platform/device/ipu/popart_canonicalization/tensor_ops.cc index 00926ee7a0b..bf32744d5a5 100644 --- a/paddle/fluid/platform/device/ipu/popart_canonicalization/tensor_ops.cc +++ b/paddle/fluid/platform/device/ipu/popart_canonicalization/tensor_ops.cc @@ -61,7 +61,9 @@ Node *fill_constant_handler(Graph *graph, Node *node) { } return CreateConst(graph, node, node->inputs, node->outputs, AttributeMap{ - {"value", value}, {"dims", dims}, {"dtype", dtype}, + {"value", value}, + {"dims", dims}, + {"dtype", dtype}, }); } @@ -76,13 +78,14 @@ Node *gaussian_random_handler(Graph *graph, Node *node) { auto seed_ = BOOST_GET_CONST(int, op->GetAttr("seed")); auto seed = static_cast(seed_); return CreateBaseOp(graph, node, "popart_randomnormal", node->inputs, - node->outputs, { - {"shape", shape}, - {"dtype", dtype}, - {"mean", mean}, - {"scale", scale}, - {"seed", seed}, - }); + node->outputs, + { + {"shape", shape}, + {"dtype", dtype}, + {"mean", mean}, + {"scale", scale}, + {"seed", seed}, + }); } Node *uniform_random_handler(Graph *graph, Node *node) { @@ -96,13 +99,14 @@ Node *uniform_random_handler(Graph *graph, Node *node) { auto seed_ = BOOST_GET_CONST(int, op->GetAttr("seed")); auto seed = static_cast(seed_); return CreateBaseOp(graph, node, "popart_randomuniform", node->inputs, - node->outputs, { - {"shape", shape}, - {"dtype", dtype}, - {"high", high}, - {"low", low}, - {"seed", seed}, - }); + node->outputs, + { + {"shape", shape}, + {"dtype", dtype}, + {"high", high}, + {"low", low}, + {"seed", seed}, + }); } Node *transpose_handler(Graph *graph, Node *node) { @@ -204,32 +208,33 @@ Node *lookup_table_op_handler(Graph *graph, Node *node, if (padding_idx_ >= 0 && padding_idx_ < table_size_) { std::vector const_value_(emb_size_, 0); std::vector const_shape_{1, emb_size_}; - auto concat_const = - CreateConst(graph, node, {}, {}, {{"value", const_value_}, - {"dims", const_shape_}, - {"dtype", GetOutputVarDType(node)}}); - auto axes = - CreateConst(graph, node, {}, {}, {{"value", std::vector{0}}, - {"dims", std::vector{1}}, - {"dtype", ONNXDataType::INT64}}); - auto step = - CreateConst(graph, node, {}, {}, {{"value", std::vector{1}}, - {"dims", std::vector{1}}, - {"dtype", ONNXDataType::INT64}}); - - auto left_start = - CreateConst(graph, node, {}, {}, {{"value", std::vector{0}}, - {"dims", std::vector{1}}, - {"dtype", ONNXDataType::INT64}}); + auto concat_const = CreateConst(graph, node, {}, {}, + {{"value", const_value_}, + {"dims", const_shape_}, + {"dtype", GetOutputVarDType(node)}}); + auto axes = CreateConst(graph, node, {}, {}, + {{"value", std::vector{0}}, + {"dims", std::vector{1}}, + {"dtype", ONNXDataType::INT64}}); + auto step = CreateConst(graph, node, {}, {}, + {{"value", std::vector{1}}, + {"dims", std::vector{1}}, + {"dtype", ONNXDataType::INT64}}); + + auto left_start = CreateConst(graph, node, {}, {}, + {{"value", std::vector{0}}, + {"dims", std::vector{1}}, + {"dtype", ONNXDataType::INT64}}); auto left_end = CreateConst(graph, node, {}, {}, {{"value", std::vector{padding_idx_}}, {"dims", std::vector{1}}, {"dtype", ONNXDataType::INT64}}); - auto right_start = CreateConst( - graph, node, {}, {}, {{"value", std::vector{padding_idx_ + 1}}, - {"dims", std::vector{1}}, - {"dtype", ONNXDataType::INT64}}); + auto right_start = + CreateConst(graph, node, {}, {}, + {{"value", std::vector{padding_idx_ + 1}}, + {"dims", std::vector{1}}, + {"dtype", ONNXDataType::INT64}}); auto right_end = CreateConst(graph, node, {}, {}, {{"value", std::vector{table_size_}}, {"dims", std::vector{1}}, @@ -471,7 +476,9 @@ Node *assign_value_handler(Graph *graph, Node *node) { } return CreateConst(graph, node, node->inputs, node->outputs, AttributeMap{ - {"value", values}, {"dims", dims}, {"dtype", dtype}, + {"value", values}, + {"dims", dims}, + {"dtype", dtype}, }); } @@ -529,10 +536,10 @@ Node *one_hot_handler(Graph *graph, Node *node) { {{"value", std::vector{depth}}, {"dims", std::vector{1}}, {"dtype", ONNXDataType::INT64}}); - auto value_tensor = - CreateConst(graph, node, {}, {}, {{"value", std::vector{0, 1}}, - {"dims", std::vector{2}}, - {"dtype", ONNXDataType::FLOAT}}); + auto value_tensor = CreateConst(graph, node, {}, {}, + {{"value", std::vector{0, 1}}, + {"dims", std::vector{2}}, + {"dtype", ONNXDataType::FLOAT}}); return CreateBaseOp(graph, node, "popart_onehot", {GetInputVarNode("X", node), depth_tensor->outputs[0], value_tensor->outputs[0]}, @@ -550,21 +557,21 @@ Node *one_hot_v2_handler(Graph *graph, Node *node) { PADDLE_THROW(platform::errors::Unimplemented( "Do not support allow_out_of_range=True")); } else { - auto depth_tensor = - CreateConst(graph, node, {}, {}, {{"value", std::vector{depth}}, - {"dims", std::vector{1}}, - {"dtype", ONNXDataType::INT32}}); + auto depth_tensor = CreateConst(graph, node, {}, {}, + {{"value", std::vector{depth}}, + {"dims", std::vector{1}}, + {"dtype", ONNXDataType::INT32}}); Node *value_tensor = nullptr; if (GetOutputVarNode("Out", node)->Var()->GetDataType() == VarType::FP16) { - value_tensor = - CreateConst(graph, node, {}, {}, {{"value", std::vector{0, 1}}, - {"dims", std::vector{2}}, - {"dtype", ONNXDataType::FLOAT16}}); + value_tensor = CreateConst(graph, node, {}, {}, + {{"value", std::vector{0, 1}}, + {"dims", std::vector{2}}, + {"dtype", ONNXDataType::FLOAT16}}); } else { - value_tensor = - CreateConst(graph, node, {}, {}, {{"value", std::vector{0, 1}}, - {"dims", std::vector{2}}, - {"dtype", ONNXDataType::FLOAT}}); + value_tensor = CreateConst(graph, node, {}, {}, + {{"value", std::vector{0, 1}}, + {"dims", std::vector{2}}, + {"dtype", ONNXDataType::FLOAT}}); } return CreateBaseOp(graph, node, "popart_onehot", diff --git a/paddle/fluid/platform/device/mlu/cncl_helper.h b/paddle/fluid/platform/device/mlu/cncl_helper.h index 2f9bed01426..634e420d5ce 100644 --- a/paddle/fluid/platform/device/mlu/cncl_helper.h +++ b/paddle/fluid/platform/device/mlu/cncl_helper.h @@ -16,8 +16,8 @@ limitations under the License. */ #ifdef PADDLE_WITH_CNCL #include - #include + #include #include #include // NOLINT diff --git a/paddle/fluid/platform/device/mlu/device_context.h b/paddle/fluid/platform/device/mlu/device_context.h index 120916b4f5c..d607b1e12f5 100644 --- a/paddle/fluid/platform/device/mlu/device_context.h +++ b/paddle/fluid/platform/device/mlu/device_context.h @@ -12,6 +12,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_MLU #include + #include "paddle/fluid/platform/device/mlu/enforce.h" #include "paddle/fluid/platform/device/mlu/mlu_stream.h" #include "paddle/fluid/platform/device_context.h" diff --git a/paddle/fluid/platform/device/mlu/device_context_test.cc b/paddle/fluid/platform/device/mlu/device_context_test.cc index 5caaa9dec1e..41f79c7092e 100644 --- a/paddle/fluid/platform/device/mlu/device_context_test.cc +++ b/paddle/fluid/platform/device/mlu/device_context_test.cc @@ -20,9 +20,9 @@ limitations under the License. */ TEST(Device, Init) { using paddle::platform::DeviceContext; + using paddle::platform::MLUContext; using paddle::platform::MLUDeviceContext; using paddle::platform::MLUPlace; - using paddle::platform::MLUContext; int count = paddle::platform::GetMLUDeviceCount(); for (int i = 0; i < count; i++) { @@ -34,9 +34,9 @@ TEST(Device, Init) { } TEST(Device, MLUDeviceContext) { + using paddle::mluCnnlHandle; using paddle::platform::MLUDeviceContext; using paddle::platform::MLUPlace; - using paddle::mluCnnlHandle; int count = paddle::platform::GetMLUDeviceCount(); for (int i = 0; i < count; i++) { @@ -48,9 +48,9 @@ TEST(Device, MLUDeviceContext) { } TEST(Device, MLUStream) { + using paddle::mluStream; using paddle::platform::MLUDeviceContext; using paddle::platform::MLUPlace; - using paddle::mluStream; int count = paddle::platform::GetMLUDeviceCount(); for (int i = 0; i < count; i++) { @@ -62,11 +62,11 @@ TEST(Device, MLUStream) { } TEST(Device, DeviceContextPool) { + using paddle::platform::CPUPlace; using paddle::platform::DeviceContextPool; using paddle::platform::MLUDeviceContext; - using paddle::platform::Place; - using paddle::platform::CPUPlace; using paddle::platform::MLUPlace; + using paddle::platform::Place; DeviceContextPool& pool = DeviceContextPool::Instance(); auto cpu_dev_ctx1 = pool.Get(CPUPlace()); diff --git a/paddle/fluid/platform/device/mlu/mlu_collective_helper.cc b/paddle/fluid/platform/device/mlu/mlu_collective_helper.cc index 7708267c1bc..4051caac1c8 100644 --- a/paddle/fluid/platform/device/mlu/mlu_collective_helper.cc +++ b/paddle/fluid/platform/device/mlu/mlu_collective_helper.cc @@ -14,6 +14,7 @@ limitations under the License. */ #if defined(PADDLE_WITH_CNCL) #include + #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/device/mlu/enforce.h" diff --git a/paddle/fluid/platform/device/mlu/mlu_info.cc b/paddle/fluid/platform/device/mlu/mlu_info.cc index 7cad99bf5d2..e3672707210 100644 --- a/paddle/fluid/platform/device/mlu/mlu_info.cc +++ b/paddle/fluid/platform/device/mlu/mlu_info.cc @@ -13,8 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/device/mlu/mlu_info.h" + #include #include + #include "gflags/gflags.h" #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/platform/device/mlu/enforce.h" @@ -187,8 +189,9 @@ static size_t MLUAllocSize(bool realloc) { size_t flag_mb = realloc ? FLAGS_reallocate_gpu_memory_in_mb : FLAGS_initial_gpu_memory_in_mb; size_t alloc_bytes = - (flag_mb > 0ul ? flag_mb << 20 : available_to_alloc * - FLAGS_fraction_of_gpu_memory_to_use); + (flag_mb > 0ul + ? flag_mb << 20 + : available_to_alloc * FLAGS_fraction_of_gpu_memory_to_use); PADDLE_ENFORCE_GE( available_to_alloc, alloc_bytes, platform::errors::ResourceExhausted("Not enough available MLU memory.")); diff --git a/paddle/fluid/platform/device/mlu/mlu_stream.cc b/paddle/fluid/platform/device/mlu/mlu_stream.cc index 7a27a49250a..f570cc77e5a 100644 --- a/paddle/fluid/platform/device/mlu/mlu_stream.cc +++ b/paddle/fluid/platform/device/mlu/mlu_stream.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/device/mlu/mlu_stream.h" + #include "paddle/fluid/platform/device/mlu/device_context.h" namespace paddle { diff --git a/paddle/fluid/platform/device/npu/ascend_npu_info.cc b/paddle/fluid/platform/device/npu/ascend_npu_info.cc index c100b2d0a17..a9204ac3fca 100644 --- a/paddle/fluid/platform/device/npu/ascend_npu_info.cc +++ b/paddle/fluid/platform/device/npu/ascend_npu_info.cc @@ -12,7 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/device/npu/ascend_npu_info.h" + #include + #include "acl/acl_rt.h" namespace paddle { diff --git a/paddle/fluid/platform/device/npu/dynload/hccl.h b/paddle/fluid/platform/device/npu/dynload/hccl.h index 3d7587bfa26..ae140dd2950 100644 --- a/paddle/fluid/platform/device/npu/dynload/hccl.h +++ b/paddle/fluid/platform/device/npu/dynload/hccl.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include + #include // NOLINT #include "paddle/fluid/platform/dynload/dynamic_loader.h" diff --git a/paddle/fluid/platform/device/npu/enforce_npu.h b/paddle/fluid/platform/device/npu/enforce_npu.h index 3887ee4866a..24392686863 100644 --- a/paddle/fluid/platform/device/npu/enforce_npu.h +++ b/paddle/fluid/platform/device/npu/enforce_npu.h @@ -17,10 +17,9 @@ limitations under the License. */ #ifdef PADDLE_WITH_ASCEND_CL #include -#include "paddle/fluid/platform/enforce.h" - #include "acl/acl.h" #include "hccl/hccl_types.h" +#include "paddle/fluid/platform/enforce.h" namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/device/npu/hccl_helper.h b/paddle/fluid/platform/device/npu/hccl_helper.h index 134ec04030d..107fe5989dd 100644 --- a/paddle/fluid/platform/device/npu/hccl_helper.h +++ b/paddle/fluid/platform/device/npu/hccl_helper.h @@ -17,6 +17,7 @@ #ifdef PADDLE_WITH_ASCEND_CL #include + #include #include #include // NOLINT @@ -24,11 +25,10 @@ #include #include -#include "paddle/fluid/platform/device/npu/dynload/hccl.h" -#include "paddle/fluid/platform/device/npu/enforce_npu.h" - #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/platform/collective_helper.h" +#include "paddle/fluid/platform/device/npu/dynload/hccl.h" +#include "paddle/fluid/platform/device/npu/enforce_npu.h" #include "paddle/fluid/platform/float16.h" #define HCCL_ID_VARNAME "HCCLID" diff --git a/paddle/fluid/platform/device/npu/npu_collective_helper.cc b/paddle/fluid/platform/device/npu/npu_collective_helper.cc index cdec3519a23..77528fe19fc 100644 --- a/paddle/fluid/platform/device/npu/npu_collective_helper.cc +++ b/paddle/fluid/platform/device/npu/npu_collective_helper.cc @@ -14,6 +14,7 @@ #if defined(PADDLE_WITH_ASCEND_CL) #include + #include "paddle/fluid/platform/collective_helper.h" #include "paddle/fluid/platform/device/npu/enforce_npu.h" diff --git a/paddle/fluid/platform/device/npu/npu_info.cc b/paddle/fluid/platform/device/npu/npu_info.cc index b5516944b75..2688c88f557 100644 --- a/paddle/fluid/platform/device/npu/npu_info.cc +++ b/paddle/fluid/platform/device/npu/npu_info.cc @@ -13,12 +13,12 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/device/npu/npu_info.h" + #include #include #include #include "gflags/gflags.h" - #include "paddle/fluid/platform/lock_guard_ptr.h" #include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/monitor.h" @@ -153,8 +153,9 @@ static size_t NPUAllocSize(bool realloc) { size_t flag_mb = realloc ? FLAGS_reallocate_gpu_memory_in_mb : FLAGS_initial_gpu_memory_in_mb; size_t alloc_bytes = - (flag_mb > 0ul ? flag_mb << 20 : available_to_alloc * - FLAGS_fraction_of_gpu_memory_to_use); + (flag_mb > 0ul + ? flag_mb << 20 + : available_to_alloc * FLAGS_fraction_of_gpu_memory_to_use); PADDLE_ENFORCE_GE( available_to_alloc, alloc_bytes, platform::errors::ResourceExhausted("Not enough available NPU memory.")); diff --git a/paddle/fluid/platform/device/npu/npu_op_runner.cc b/paddle/fluid/platform/device/npu/npu_op_runner.cc index 72169ae303b..d38443acca3 100644 --- a/paddle/fluid/platform/device/npu/npu_op_runner.cc +++ b/paddle/fluid/platform/device/npu/npu_op_runner.cc @@ -23,7 +23,6 @@ limitations under the License. */ #include "acl/acl.h" #include "acl/acl_op_compiler.h" - #include "paddle/fluid/framework/framework.pb.h" DECLARE_string(npu_precision_mode); diff --git a/paddle/fluid/platform/device/npu/npu_resource_pool.cc b/paddle/fluid/platform/device/npu/npu_resource_pool.cc index d837e90c3c4..e7c302289db 100644 --- a/paddle/fluid/platform/device/npu/npu_resource_pool.cc +++ b/paddle/fluid/platform/device/npu/npu_resource_pool.cc @@ -14,6 +14,7 @@ #ifdef PADDLE_WITH_ASCEND_CL #include "paddle/fluid/platform/device/npu/npu_resource_pool.h" + #include "paddle/fluid/platform/device/npu/npu_info.h" namespace paddle { diff --git a/paddle/fluid/platform/device/npu/npu_stream.cc b/paddle/fluid/platform/device/npu/npu_stream.cc index 0b15a0d937e..55a73146815 100644 --- a/paddle/fluid/platform/device/npu/npu_stream.cc +++ b/paddle/fluid/platform/device/npu/npu_stream.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/device/npu/npu_stream.h" + #include "paddle/fluid/platform/enforce.h" namespace paddle { diff --git a/paddle/fluid/platform/device/xpu/bkcl_helper.h b/paddle/fluid/platform/device/xpu/bkcl_helper.h index 24fd8b5faa4..a7a3e4f0605 100644 --- a/paddle/fluid/platform/device/xpu/bkcl_helper.h +++ b/paddle/fluid/platform/device/xpu/bkcl_helper.h @@ -17,6 +17,7 @@ #pragma once #include + #include #include #include // NOLINT @@ -217,7 +218,7 @@ class BKCLCommunicator { *bkcl_all_reduce *parallelly. So create a new bkcl comm for sync_batch_norm_op. And these *codes should be polished with a unified bkcl management. - */ + */ BKCLContextMap *GetSyncBatchNormCtx( framework::Scope *scope, const std::vector &places) { auto *bkcl_id_var = scope->FindVar(BKCL_ID_VARNAME); diff --git a/paddle/fluid/platform/device/xpu/enforce_xpu.h b/paddle/fluid/platform/device/xpu/enforce_xpu.h index c55d91c3015..77d14aa712e 100644 --- a/paddle/fluid/platform/device/xpu/enforce_xpu.h +++ b/paddle/fluid/platform/device/xpu/enforce_xpu.h @@ -15,7 +15,6 @@ limitations under the License. */ #pragma once #include "paddle/fluid/platform/device/xpu/xpu_header.h" - #include "paddle/phi/backends/xpu/enforce_xpu.h" namespace paddle { diff --git a/paddle/fluid/platform/device/xpu/tests/enforce_xpu_test.cc b/paddle/fluid/platform/device/xpu/tests/enforce_xpu_test.cc index 8cba98f3fb3..0b528c3999e 100644 --- a/paddle/fluid/platform/device/xpu/tests/enforce_xpu_test.cc +++ b/paddle/fluid/platform/device/xpu/tests/enforce_xpu_test.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/device/xpu/enforce_xpu.h" + #include "gtest/gtest.h" template diff --git a/paddle/fluid/platform/device/xpu/xpu_info.cc b/paddle/fluid/platform/device/xpu/xpu_info.cc index cdd7ee7f806..dbc8ed4a51a 100644 --- a/paddle/fluid/platform/device/xpu/xpu_info.cc +++ b/paddle/fluid/platform/device/xpu/xpu_info.cc @@ -13,14 +13,13 @@ limitations under the License. */ #include #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/fluid/platform/device/xpu/enforce_xpu.h" #include "paddle/fluid/platform/device/xpu/xpu_header.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/place.h" - #include "paddle/phi/backends/xpu/xpu_info.h" namespace paddle { diff --git a/paddle/fluid/platform/device/xpu/xpu_info.h b/paddle/fluid/platform/device/xpu/xpu_info.h index 38b4defadc6..2dd0f327530 100644 --- a/paddle/fluid/platform/device/xpu/xpu_info.h +++ b/paddle/fluid/platform/device/xpu/xpu_info.h @@ -12,6 +12,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include + #include "paddle/fluid/platform/place.h" #include "paddle/phi/backends/xpu/xpu_info.h" #include "xpu/runtime.h" diff --git a/paddle/fluid/platform/device/xpu/xpu_op_list.cc b/paddle/fluid/platform/device/xpu/xpu_op_list.cc index 07385143362..8ace4d1a32c 100644 --- a/paddle/fluid/platform/device/xpu/xpu_op_list.cc +++ b/paddle/fluid/platform/device/xpu/xpu_op_list.cc @@ -9,6 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #ifdef PADDLE_WITH_XPU +#include "paddle/fluid/platform/device/xpu/xpu_op_list.h" + #include #include #include @@ -17,7 +19,6 @@ limitations under the License. */ #include "paddle/fluid/platform/device/xpu/xpu2_op_list.h" #include "paddle/fluid/platform/device/xpu/xpu_info.h" #include "paddle/fluid/platform/device/xpu/xpu_op_kpfirst_list.h" -#include "paddle/fluid/platform/device/xpu/xpu_op_list.h" namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/device_code.cc b/paddle/fluid/platform/device_code.cc index a4226dabf9d..4ee32ad5a03 100644 --- a/paddle/fluid/platform/device_code.cc +++ b/paddle/fluid/platform/device_code.cc @@ -12,12 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/platform/device_code.h" + #include + #include #include #include -#include "paddle/fluid/platform/device_code.h" #include "paddle/fluid/platform/enforce.h" DECLARE_string(cuda_dir); diff --git a/paddle/fluid/platform/device_code_test.cc b/paddle/fluid/platform/device_code_test.cc index 7da8c561385..cb2649686ec 100644 --- a/paddle/fluid/platform/device_code_test.cc +++ b/paddle/fluid/platform/device_code_test.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/device_code.h" + #include + #include "gtest/gtest.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/platform/init.h" diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 09a29c3429c..0bd606257f5 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -12,9 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/device_context.h" + #include #include #include + #include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/stream/cuda_stream.h" #include "paddle/phi/backends/gpu/gpu_context.h" diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index a63d41405f1..d0dae706ba5 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -21,13 +21,12 @@ limitations under the License. */ #include #include +#include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/platform/device/gpu/gpu_types.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/backends/custom/custom_context.h" #include "paddle/phi/backends/gpu/gpu_decls.h" #include "paddle/phi/core/device_context.h" - -#include "paddle/fluid/memory/malloc.h" #ifdef PADDLE_WITH_CUDA #include "paddle/fluid/platform/device/gpu/gpu_helper.h" #include "paddle/fluid/platform/dynload/cublas.h" diff --git a/paddle/fluid/platform/device_context_test.cu b/paddle/fluid/platform/device_context_test.cu index 08a04a9565a..2db29dc11ad 100644 --- a/paddle/fluid/platform/device_context_test.cu +++ b/paddle/fluid/platform/device_context_test.cu @@ -11,18 +11,17 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/platform/device_context.h" - #include #include "glog/logging.h" #include "gtest/gtest.h" #include "paddle/fluid/memory/allocation/allocator_facade.h" +#include "paddle/fluid/platform/device_context.h" TEST(Device, Init) { - using paddle::platform::DeviceContext; using paddle::platform::CUDADeviceContext; using paddle::platform::CUDAPlace; + using paddle::platform::DeviceContext; int count = paddle::platform::GetGPUDeviceCount(); for (int i = 0; i < count; i++) { @@ -94,11 +93,11 @@ TEST(Device, CUDADeviceContext) { } TEST(Device, DeviceContextPool) { - using paddle::platform::DeviceContextPool; - using paddle::platform::CUDADeviceContext; - using paddle::platform::Place; using paddle::platform::CPUPlace; + using paddle::platform::CUDADeviceContext; using paddle::platform::CUDAPlace; + using paddle::platform::DeviceContextPool; + using paddle::platform::Place; DeviceContextPool& pool = DeviceContextPool::Instance(); auto cpu_dev_ctx1 = pool.Get(CPUPlace()); diff --git a/paddle/fluid/platform/device_context_xpu_test.cc b/paddle/fluid/platform/device_context_xpu_test.cc index 3de2e3957a9..50cb0f98d33 100644 --- a/paddle/fluid/platform/device_context_xpu_test.cc +++ b/paddle/fluid/platform/device_context_xpu_test.cc @@ -11,12 +11,11 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/platform/device_context.h" - #include #include "glog/logging.h" #include "gtest/gtest.h" +#include "paddle/fluid/platform/device_context.h" TEST(Device, Init) { using paddle::platform::DeviceContext; @@ -33,10 +32,10 @@ TEST(Device, Init) { } TEST(Device, DeviceContextPool) { + using paddle::platform::CPUPlace; using paddle::platform::DeviceContextPool; - using paddle::platform::XPUDeviceContext; using paddle::platform::Place; - using paddle::platform::CPUPlace; + using paddle::platform::XPUDeviceContext; using paddle::platform::XPUPlace; DeviceContextPool& pool = DeviceContextPool::Instance(); diff --git a/paddle/fluid/platform/device_event.h b/paddle/fluid/platform/device_event.h index 463329d32c9..82d93dee398 100644 --- a/paddle/fluid/platform/device_event.h +++ b/paddle/fluid/platform/device_event.h @@ -23,8 +23,8 @@ * for USE_PASS from pass_library. */ -using ::paddle::platform::kCUDA; using ::paddle::platform::kCPU; +using ::paddle::platform::kCUDA; USE_EVENT(kCPU) USE_EVENT_WAIT(kCPU, kCPU) diff --git a/paddle/fluid/platform/device_event_base.cc b/paddle/fluid/platform/device_event_base.cc index 67fad3857f2..374de7d923f 100644 --- a/paddle/fluid/platform/device_event_base.cc +++ b/paddle/fluid/platform/device_event_base.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/platform/device_event_base.h" + #include "paddle/fluid/platform/device_event_cpu.h" #include "paddle/fluid/platform/event.h" diff --git a/paddle/fluid/platform/device_event_base.h b/paddle/fluid/platform/device_event_base.h index 8fe5ef9fcb1..4e751aa6d13 100644 --- a/paddle/fluid/platform/device_event_base.h +++ b/paddle/fluid/platform/device_event_base.h @@ -13,6 +13,7 @@ // limitations under the License. #pragma once #include + #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/platform/device_event_cpu.h b/paddle/fluid/platform/device_event_cpu.h index 6e2bf4c7ad1..1620dffdabd 100644 --- a/paddle/fluid/platform/device_event_cpu.h +++ b/paddle/fluid/platform/device_event_cpu.h @@ -16,6 +16,7 @@ #include #include #include + #include "paddle/fluid/platform/device_event_base.h" namespace paddle { diff --git a/paddle/fluid/platform/device_event_gpu.cc b/paddle/fluid/platform/device_event_gpu.cc index f42ccc5a1db..f176d1a0d5d 100644 --- a/paddle/fluid/platform/device_event_gpu.cc +++ b/paddle/fluid/platform/device_event_gpu.cc @@ -101,8 +101,8 @@ void EventResetCUDA(const DeviceEvent* event) { } // namespace platform } // namespace paddle -using ::paddle::platform::kCUDA; using ::paddle::platform::kCPU; +using ::paddle::platform::kCUDA; REGISTER_EVENT_CREATE_FUNCTION(kCUDA, paddle::platform::DeviceEventCreateCUDA) REGISTER_EVENT_RECORD_FUNCTION(kCUDA, paddle::platform::DeviceEventRecordCUDA) REGISTER_EVENT_QUERY_FUNCTION(kCUDA, paddle::platform::DeviceEventQueryCUDA) diff --git a/paddle/fluid/platform/device_event_test.cc b/paddle/fluid/platform/device_event_test.cc index d9f744b2625..92fe7c02bd0 100644 --- a/paddle/fluid/platform/device_event_test.cc +++ b/paddle/fluid/platform/device_event_test.cc @@ -13,15 +13,16 @@ // limitations under the License. #include "paddle/fluid/platform/device_event.h" + #include "glog/logging.h" #include "gtest/gtest.h" #include "paddle/fluid/platform/place.h" -using ::paddle::platform::kCUDA; using ::paddle::platform::kCPU; +using ::paddle::platform::kCUDA; -using paddle::platform::DeviceEvent; using paddle::platform::DeviceContextPool; +using paddle::platform::DeviceEvent; #ifdef PADDLE_WITH_CUDA #include diff --git a/paddle/fluid/platform/device_tracer.cc b/paddle/fluid/platform/device_tracer.cc index 73847ce24aa..fa345ed31cb 100644 --- a/paddle/fluid/platform/device_tracer.cc +++ b/paddle/fluid/platform/device_tracer.cc @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/platform/device_tracer.h" + #include #include #include @@ -20,7 +22,6 @@ limitations under the License. */ #include // NOLINT #include "glog/logging.h" -#include "paddle/fluid/platform/device_tracer.h" DECLARE_bool(enable_host_event_recorder_hook); @@ -255,7 +256,9 @@ void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, } break; } - default: { break; } + default: { + break; + } } } else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED) { // Seems not an error in this case. diff --git a/paddle/fluid/platform/dynload/cublas.h b/paddle/fluid/platform/dynload/cublas.h index d7d43cecc25..496b253dff5 100644 --- a/paddle/fluid/platform/dynload/cublas.h +++ b/paddle/fluid/platform/dynload/cublas.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include // NOLINT #include diff --git a/paddle/fluid/platform/dynload/cublasLt.h b/paddle/fluid/platform/dynload/cublasLt.h index 5157cfdad2e..3a1d28072c5 100644 --- a/paddle/fluid/platform/dynload/cublasLt.h +++ b/paddle/fluid/platform/dynload/cublasLt.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include + #include // NOLINT #include diff --git a/paddle/fluid/platform/dynload/cuda_driver.cc b/paddle/fluid/platform/dynload/cuda_driver.cc index a0f9647f089..c6851594b80 100644 --- a/paddle/fluid/platform/dynload/cuda_driver.cc +++ b/paddle/fluid/platform/dynload/cuda_driver.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/dynload/cuda_driver.h" + #include "paddle/phi/backends/dynload/cuda_driver.h" namespace paddle { diff --git a/paddle/fluid/platform/dynload/cuda_driver.h b/paddle/fluid/platform/dynload/cuda_driver.h index f5550e9f9fe..b696ffc1a3b 100644 --- a/paddle/fluid/platform/dynload/cuda_driver.h +++ b/paddle/fluid/platform/dynload/cuda_driver.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include // NOLINT #include "paddle/phi/backends/dynload/cuda_driver.h" diff --git a/paddle/fluid/platform/dynload/cudnn.cc b/paddle/fluid/platform/dynload/cudnn.cc index 553792d3bbf..05cacb74c86 100644 --- a/paddle/fluid/platform/dynload/cudnn.cc +++ b/paddle/fluid/platform/dynload/cudnn.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/dynload/cudnn.h" + #include "paddle/phi/backends/dynload/cudnn.h" namespace paddle { diff --git a/paddle/fluid/platform/dynload/cudnn.h b/paddle/fluid/platform/dynload/cudnn.h index bf4bb08a696..9af1e8065c4 100644 --- a/paddle/fluid/platform/dynload/cudnn.h +++ b/paddle/fluid/platform/dynload/cudnn.h @@ -16,6 +16,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_CUDA #include #include + #include // NOLINT #include "paddle/phi/backends/dynload/cudnn.h" diff --git a/paddle/fluid/platform/dynload/cufft.cc b/paddle/fluid/platform/dynload/cufft.cc index 1996ab16167..6a06c4bdb6a 100644 --- a/paddle/fluid/platform/dynload/cufft.cc +++ b/paddle/fluid/platform/dynload/cufft.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/dynload/cufft.h" + #include "paddle/phi/backends/dynload/cufft.h" namespace paddle { diff --git a/paddle/fluid/platform/dynload/cufft.h b/paddle/fluid/platform/dynload/cufft.h index 6c3a0992d75..d79603a5a01 100644 --- a/paddle/fluid/platform/dynload/cufft.h +++ b/paddle/fluid/platform/dynload/cufft.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include // NOLINT #include "paddle/phi/backends/dynload/cufft.h" diff --git a/paddle/fluid/platform/dynload/cupti.h b/paddle/fluid/platform/dynload/cupti.h index 854e5a7b9f0..8e08785f209 100644 --- a/paddle/fluid/platform/dynload/cupti.h +++ b/paddle/fluid/platform/dynload/cupti.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include // NOLINT #include "paddle/phi/backends/dynload/cupti.h" diff --git a/paddle/fluid/platform/dynload/curand.h b/paddle/fluid/platform/dynload/curand.h index 1fdd9240284..f4065a196d3 100644 --- a/paddle/fluid/platform/dynload/curand.h +++ b/paddle/fluid/platform/dynload/curand.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include // NOLINT #include "paddle/phi/backends/dynload/curand.h" diff --git a/paddle/fluid/platform/dynload/cusolver.h b/paddle/fluid/platform/dynload/cusolver.h index 212c350ebb2..854de23150c 100644 --- a/paddle/fluid/platform/dynload/cusolver.h +++ b/paddle/fluid/platform/dynload/cusolver.h @@ -15,6 +15,7 @@ limitations under the License. */ #include #include + #include // NOLINT #include "paddle/phi/backends/dynload/cusolver.h" diff --git a/paddle/fluid/platform/dynload/cusparse.h b/paddle/fluid/platform/dynload/cusparse.h index b4b93521678..925852bb415 100644 --- a/paddle/fluid/platform/dynload/cusparse.h +++ b/paddle/fluid/platform/dynload/cusparse.h @@ -15,6 +15,7 @@ limitations under the License. */ #include #include + #include // NOLINT #include "paddle/phi/backends/dynload/cusparse.h" diff --git a/paddle/fluid/platform/dynload/dynamic_loader.cc b/paddle/fluid/platform/dynload/dynamic_loader.cc index 5ce63b244ef..2f24e1b87da 100644 --- a/paddle/fluid/platform/dynload/dynamic_loader.cc +++ b/paddle/fluid/platform/dynload/dynamic_loader.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include #include + #include "gflags/gflags.h" #include "paddle/phi/backends/dynload/dynamic_loader.h" diff --git a/paddle/fluid/platform/dynload/hiprtc.cc b/paddle/fluid/platform/dynload/hiprtc.cc index 6c4a4bfd0de..d9bb3fd2c42 100644 --- a/paddle/fluid/platform/dynload/hiprtc.cc +++ b/paddle/fluid/platform/dynload/hiprtc.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/dynload/hiprtc.h" + #include "paddle/phi/backends/dynload/hiprtc.h" namespace paddle { diff --git a/paddle/fluid/platform/dynload/hiprtc.h b/paddle/fluid/platform/dynload/hiprtc.h index 851dadbac63..f27d5d808f7 100644 --- a/paddle/fluid/platform/dynload/hiprtc.h +++ b/paddle/fluid/platform/dynload/hiprtc.h @@ -15,7 +15,9 @@ limitations under the License. */ #pragma once #include + #include // NOLINT + #include "paddle/phi/backends/dynload/hiprtc.h" namespace paddle { diff --git a/paddle/fluid/platform/dynload/miopen.cc b/paddle/fluid/platform/dynload/miopen.cc index 9660188b68d..15012531b4c 100644 --- a/paddle/fluid/platform/dynload/miopen.cc +++ b/paddle/fluid/platform/dynload/miopen.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/dynload/miopen.h" + #include "paddle/phi/backends/dynload/cudnn.h" namespace paddle { diff --git a/paddle/fluid/platform/dynload/miopen.h b/paddle/fluid/platform/dynload/miopen.h index b99cd5ebb6e..20b92b17051 100644 --- a/paddle/fluid/platform/dynload/miopen.h +++ b/paddle/fluid/platform/dynload/miopen.h @@ -14,10 +14,11 @@ limitations under the License. */ #pragma once #include - #include #include + #include // NOLINT + #include "paddle/phi/backends/dynload/miopen.h" namespace paddle { diff --git a/paddle/fluid/platform/dynload/mklml.h b/paddle/fluid/platform/dynload/mklml.h index 1c7d0c17a0f..78cae9a0821 100644 --- a/paddle/fluid/platform/dynload/mklml.h +++ b/paddle/fluid/platform/dynload/mklml.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include // NOLINT #include "paddle/phi/backends/dynload/mklml.h" diff --git a/paddle/fluid/platform/dynload/mklrt.h b/paddle/fluid/platform/dynload/mklrt.h index 334b98a1c3d..e1a2bedfa8e 100644 --- a/paddle/fluid/platform/dynload/mklrt.h +++ b/paddle/fluid/platform/dynload/mklrt.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include // NOLINT #include "paddle/phi/backends/dynload/dynamic_loader.h" diff --git a/paddle/fluid/platform/dynload/nccl.h b/paddle/fluid/platform/dynload/nccl.h index a38d1d4272e..c2052719dd5 100644 --- a/paddle/fluid/platform/dynload/nccl.h +++ b/paddle/fluid/platform/dynload/nccl.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include // NOLINT #include "paddle/phi/backends/dynload/nccl.h" diff --git a/paddle/fluid/platform/dynload/nvjpeg.h b/paddle/fluid/platform/dynload/nvjpeg.h index 8aaf672fe67..026a3b64886 100644 --- a/paddle/fluid/platform/dynload/nvjpeg.h +++ b/paddle/fluid/platform/dynload/nvjpeg.h @@ -12,6 +12,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_CUDA #include + #include // NOLINT #include "paddle/phi/backends/dynload/nvjpeg.h" diff --git a/paddle/fluid/platform/dynload/nvrtc.cc b/paddle/fluid/platform/dynload/nvrtc.cc index a0322998277..242aa912ad8 100644 --- a/paddle/fluid/platform/dynload/nvrtc.cc +++ b/paddle/fluid/platform/dynload/nvrtc.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/dynload/nvrtc.h" + #include "paddle/phi/backends/dynload/nvrtc.h" namespace paddle { diff --git a/paddle/fluid/platform/dynload/nvrtc.h b/paddle/fluid/platform/dynload/nvrtc.h index 5ca8860c5ac..e03235e116f 100644 --- a/paddle/fluid/platform/dynload/nvrtc.h +++ b/paddle/fluid/platform/dynload/nvrtc.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include // NOLINT #include "paddle/phi/backends/dynload/nvrtc.h" diff --git a/paddle/fluid/platform/dynload/nvtx.h b/paddle/fluid/platform/dynload/nvtx.h index 3f974eca1d0..c3dc9e31df3 100644 --- a/paddle/fluid/platform/dynload/nvtx.h +++ b/paddle/fluid/platform/dynload/nvtx.h @@ -15,6 +15,7 @@ limitations under the License. */ #ifndef _WIN32 #include #include + #include // NOLINT #include "paddle/phi/backends/dynload/nvtx.h" diff --git a/paddle/fluid/platform/dynload/rccl.h b/paddle/fluid/platform/dynload/rccl.h index 7bb4992c89c..2f874bb59f5 100644 --- a/paddle/fluid/platform/dynload/rccl.h +++ b/paddle/fluid/platform/dynload/rccl.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include // NOLINT + #include "paddle/phi/backends/dynload/rccl.h" namespace paddle { diff --git a/paddle/fluid/platform/dynload/rocblas.h b/paddle/fluid/platform/dynload/rocblas.h index 04f4fdd9506..5cec6fb4879 100644 --- a/paddle/fluid/platform/dynload/rocblas.h +++ b/paddle/fluid/platform/dynload/rocblas.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include // NOLINT #include diff --git a/paddle/fluid/platform/dynload/rocm_driver.cc b/paddle/fluid/platform/dynload/rocm_driver.cc index 088129f3f8d..4fa20c5c4bb 100644 --- a/paddle/fluid/platform/dynload/rocm_driver.cc +++ b/paddle/fluid/platform/dynload/rocm_driver.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/dynload/rocm_driver.h" + #include "paddle/phi/backends/dynload/rocm_driver.h" namespace paddle { diff --git a/paddle/fluid/platform/dynload/rocm_driver.h b/paddle/fluid/platform/dynload/rocm_driver.h index 5a902239fef..5c8e18611c4 100644 --- a/paddle/fluid/platform/dynload/rocm_driver.h +++ b/paddle/fluid/platform/dynload/rocm_driver.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include // NOLINT #include "paddle/phi/backends/dynload/rocm_driver.h" diff --git a/paddle/fluid/platform/dynload/tensorrt.cc b/paddle/fluid/platform/dynload/tensorrt.cc index 8153877b7bb..8d700faac0c 100644 --- a/paddle/fluid/platform/dynload/tensorrt.cc +++ b/paddle/fluid/platform/dynload/tensorrt.cc @@ -13,6 +13,7 @@ limitations under the License. */ #include "paddle/fluid/platform/dynload/tensorrt.h" + #include namespace paddle { diff --git a/paddle/fluid/platform/enforce.h b/paddle/fluid/platform/enforce.h index 772a7750fe9..1106eef4559 100644 --- a/paddle/fluid/platform/enforce.h +++ b/paddle/fluid/platform/enforce.h @@ -36,6 +36,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/platform/external_error.pb.h" #endif // PADDLE_WITH_CUDA @@ -77,6 +78,7 @@ limitations under the License. */ #include "paddle/phi/backends/dynload/cusolver.h" #if !defined(__APPLE__) && defined(PADDLE_WITH_NCCL) #include + #include "paddle/phi/backends/dynload/nccl.h" #endif // __APPLE__ #endif // PADDLE_WITH_CUDA @@ -88,6 +90,7 @@ limitations under the License. */ #include "paddle/phi/backends/dynload/rocblas.h" #if !defined(__APPLE__) && defined(PADDLE_WITH_RCCL) #include // NOLINT + #include "paddle/phi/backends/dynload/rccl.h" #endif // __APPLE__ #endif // PADDLE_WITH_HIP diff --git a/paddle/fluid/platform/enforce_test.cc b/paddle/fluid/platform/enforce_test.cc index b9e42392991..771c4853f6f 100644 --- a/paddle/fluid/platform/enforce_test.cc +++ b/paddle/fluid/platform/enforce_test.cc @@ -63,19 +63,22 @@ TEST(ENFORCE, FAILED) { TEST(ENFORCE, NO_ARG_OK) { int a = 2; int b = 2; - PADDLE_ENFORCE_EQ(a, b, paddle::platform::errors::Unavailable( - "PADDLE_ENFORCE_EQ tests failed.")); + PADDLE_ENFORCE_EQ( + a, b, + paddle::platform::errors::Unavailable("PADDLE_ENFORCE_EQ tests failed.")); // test enforce with extra message. - PADDLE_ENFORCE_EQ(a, b, paddle::platform::errors::Unavailable( - "Some %s wrong in PADDLE_ENFORCE_EQ.", "info")); + PADDLE_ENFORCE_EQ(a, b, + paddle::platform::errors::Unavailable( + "Some %s wrong in PADDLE_ENFORCE_EQ.", "info")); } TEST(ENFORCE_EQ, NO_EXTRA_MSG_FAIL) { int a = 2; bool caught_exception = false; try { - PADDLE_ENFORCE_EQ(a, 1 + 3, paddle::platform::errors::InvalidArgument( - "The result is not equal correct result.")); + PADDLE_ENFORCE_EQ(a, 1 + 3, + paddle::platform::errors::InvalidArgument( + "The result is not equal correct result.")); } catch (paddle::platform::EnforceNotMet& error) { caught_exception = true; std::string ex_msg = error.what(); @@ -89,8 +92,9 @@ TEST(ENFORCE_EQ, EXTRA_MSG_FAIL) { int a = 2; bool caught_exception = false; try { - PADDLE_ENFORCE_EQ(a, 1 + 3, paddle::platform::errors::InvalidArgument( - "The result is not equal correct result.")); + PADDLE_ENFORCE_EQ(a, 1 + 3, + paddle::platform::errors::InvalidArgument( + "The result is not equal correct result.")); } catch (paddle::platform::EnforceNotMet& error) { caught_exception = true; std::string ex_msg = error.what(); @@ -102,10 +106,12 @@ TEST(ENFORCE_EQ, EXTRA_MSG_FAIL) { } TEST(ENFORCE_NE, OK) { - PADDLE_ENFORCE_NE(1, 2, paddle::platform::errors::Unavailable( - "PADDLE_ENFORCE_NE tests failed.")); - PADDLE_ENFORCE_NE(1.0, 2UL, paddle::platform::errors::Unavailable( - "PADDLE_ENFORCE_NE tests failed.")); + PADDLE_ENFORCE_NE( + 1, 2, + paddle::platform::errors::Unavailable("PADDLE_ENFORCE_NE tests failed.")); + PADDLE_ENFORCE_NE( + 1.0, 2UL, + paddle::platform::errors::Unavailable("PADDLE_ENFORCE_NE tests failed.")); } TEST(ENFORCE_NE, FAIL) { bool caught_exception = false; @@ -125,14 +131,16 @@ TEST(ENFORCE_NE, FAIL) { } TEST(ENFORCE_GT, OK) { - PADDLE_ENFORCE_GT(2, 1, paddle::platform::errors::Unavailable( - "PADDLE_ENFORCE_GT tests failed.")); + PADDLE_ENFORCE_GT( + 2, 1, + paddle::platform::errors::Unavailable("PADDLE_ENFORCE_GT tests failed.")); } TEST(ENFORCE_GT, FAIL) { bool caught_exception = false; try { - PADDLE_ENFORCE_GT(1, 2, paddle::platform::errors::InvalidArgument( - "Expected 1 > 2, but received 1:1 <= 2:2.")); + PADDLE_ENFORCE_GT(1, 2, + paddle::platform::errors::InvalidArgument( + "Expected 1 > 2, but received 1:1 <= 2:2.")); } catch (paddle::platform::EnforceNotMet& error) { caught_exception = true; std::string ex_msg = error.what(); @@ -143,18 +151,22 @@ TEST(ENFORCE_GT, FAIL) { } TEST(ENFORCE_GE, OK) { - PADDLE_ENFORCE_GE(2, 2, paddle::platform::errors::Unavailable( - "PADDLE_ENFORCE_GE tests failed.")); - PADDLE_ENFORCE_GE(3, 2, paddle::platform::errors::Unavailable( - "PADDLE_ENFORCE_GE tests failed.")); - PADDLE_ENFORCE_GE(3.21, 2.0, paddle::platform::errors::Unavailable( - "PADDLE_ENFORCE_GE tests failed.")); + PADDLE_ENFORCE_GE( + 2, 2, + paddle::platform::errors::Unavailable("PADDLE_ENFORCE_GE tests failed.")); + PADDLE_ENFORCE_GE( + 3, 2, + paddle::platform::errors::Unavailable("PADDLE_ENFORCE_GE tests failed.")); + PADDLE_ENFORCE_GE( + 3.21, 2.0, + paddle::platform::errors::Unavailable("PADDLE_ENFORCE_GE tests failed.")); } TEST(ENFORCE_GE, FAIL) { bool caught_exception = false; try { - PADDLE_ENFORCE_GE(1, 2, paddle::platform::errors::InvalidArgument( - "Expected 1 >= 2, but received 1:1 < 2:2.")); + PADDLE_ENFORCE_GE(1, 2, + paddle::platform::errors::InvalidArgument( + "Expected 1 >= 2, but received 1:1 < 2:2.")); } catch (paddle::platform::EnforceNotMet& error) { caught_exception = true; std::string ex_msg = error.what(); @@ -165,22 +177,28 @@ TEST(ENFORCE_GE, FAIL) { } TEST(ENFORCE_LE, OK) { - PADDLE_ENFORCE_LE(1, 1, paddle::platform::errors::Unavailable( - "PADDLE_ENFORCE_LE tests failed.")); - PADDLE_ENFORCE_LE(1UL, 1UL, paddle::platform::errors::Unavailable( - "PADDLE_ENFORCE_LE tests failed.")); - PADDLE_ENFORCE_LE(2, 3, paddle::platform::errors::Unavailable( - "PADDLE_ENFORCE_LE tests failed.")); - PADDLE_ENFORCE_LE(2UL, 3UL, paddle::platform::errors::Unavailable( - "PADDLE_ENFORCE_LE tests failed.")); - PADDLE_ENFORCE_LE(2.0, 3.2, paddle::platform::errors::Unavailable( - "PADDLE_ENFORCE_LE tests failed.")); + PADDLE_ENFORCE_LE( + 1, 1, + paddle::platform::errors::Unavailable("PADDLE_ENFORCE_LE tests failed.")); + PADDLE_ENFORCE_LE( + 1UL, 1UL, + paddle::platform::errors::Unavailable("PADDLE_ENFORCE_LE tests failed.")); + PADDLE_ENFORCE_LE( + 2, 3, + paddle::platform::errors::Unavailable("PADDLE_ENFORCE_LE tests failed.")); + PADDLE_ENFORCE_LE( + 2UL, 3UL, + paddle::platform::errors::Unavailable("PADDLE_ENFORCE_LE tests failed.")); + PADDLE_ENFORCE_LE( + 2.0, 3.2, + paddle::platform::errors::Unavailable("PADDLE_ENFORCE_LE tests failed.")); } TEST(ENFORCE_LE, FAIL) { bool caught_exception = false; try { - PADDLE_ENFORCE_GT(1, 2, paddle::platform::errors::InvalidArgument( - "Expected 1 > 2, but received 1:1 <= 2:2.")); + PADDLE_ENFORCE_GT(1, 2, + paddle::platform::errors::InvalidArgument( + "Expected 1 > 2, but received 1:1 <= 2:2.")); } catch (paddle::platform::EnforceNotMet& error) { caught_exception = true; std::string ex_msg = error.what(); @@ -191,12 +209,15 @@ TEST(ENFORCE_LE, FAIL) { } TEST(ENFORCE_LT, OK) { - PADDLE_ENFORCE_LT(3, 10, paddle::platform::errors::Unavailable( - "PADDLE_ENFORCE_LT tests failed.")); - PADDLE_ENFORCE_LT(2UL, 3UL, paddle::platform::errors::Unavailable( - "PADDLE_ENFORCE_LT tests failed.")); - PADDLE_ENFORCE_LT(2, 3, paddle::platform::errors::Unavailable( - "PADDLE_ENFORCE_LT tests failed.")); + PADDLE_ENFORCE_LT( + 3, 10, + paddle::platform::errors::Unavailable("PADDLE_ENFORCE_LT tests failed.")); + PADDLE_ENFORCE_LT( + 2UL, 3UL, + paddle::platform::errors::Unavailable("PADDLE_ENFORCE_LT tests failed.")); + PADDLE_ENFORCE_LT( + 2, 3, + paddle::platform::errors::Unavailable("PADDLE_ENFORCE_LT tests failed.")); } TEST(ENFORCE_LT, FAIL) { bool caught_exception = false; @@ -263,16 +284,18 @@ std::ostream& operator<<(std::ostream& os, const Dims& d) { TEST(ENFORCE_USER_DEFINED_CLASS, EQ) { Dims a{{1, 2, 3, 4}}, b{{1, 2, 3, 4}}; - PADDLE_ENFORCE_EQ(a, b, paddle::platform::errors::Unavailable( - "PADDLE_ENFORCE_EQ tests failed.")); + PADDLE_ENFORCE_EQ( + a, b, + paddle::platform::errors::Unavailable("PADDLE_ENFORCE_EQ tests failed.")); } TEST(ENFORCE_USER_DEFINED_CLASS, NE) { Dims a{{1, 2, 3, 4}}, b{{5, 6, 7, 8}}; bool caught_exception = false; try { - PADDLE_ENFORCE_EQ(a, b, paddle::platform::errors::Unavailable( - "PADDLE_ENFORCE_EQ tests failed.")); + PADDLE_ENFORCE_EQ(a, b, + paddle::platform::errors::Unavailable( + "PADDLE_ENFORCE_EQ tests failed.")); } catch (paddle::platform::EnforceNotMet&) { caught_exception = true; } @@ -481,10 +504,12 @@ TEST(enforce, cannot_to_string_type) { "int can be converted to string"); CannotToStringType obj1(3), obj2(4), obj3(3); - PADDLE_ENFORCE_NE(obj1, obj2, paddle::platform::errors::InvalidArgument( - "Object 1 is not equal to Object 2")); - PADDLE_ENFORCE_EQ(obj1, obj3, paddle::platform::errors::InvalidArgument( - "Object 1 is equal to Object 3")); + PADDLE_ENFORCE_NE(obj1, obj2, + paddle::platform::errors::InvalidArgument( + "Object 1 is not equal to Object 2")); + PADDLE_ENFORCE_EQ(obj1, obj3, + paddle::platform::errors::InvalidArgument( + "Object 1 is equal to Object 3")); std::string msg = "Compare obj1 with obj2"; try { diff --git a/paddle/fluid/platform/errors.h b/paddle/fluid/platform/errors.h index 57f5b3a7c93..758af3e2d91 100644 --- a/paddle/fluid/platform/errors.h +++ b/paddle/fluid/platform/errors.h @@ -18,5 +18,5 @@ namespace paddle { namespace platform { namespace errors = ::phi::errors; using error = ::phi::ErrorCode; -} -} +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/errors_test.cc b/paddle/fluid/platform/errors_test.cc index 712b67a654c..8b11c1d2d24 100644 --- a/paddle/fluid/platform/errors_test.cc +++ b/paddle/fluid/platform/errors_test.cc @@ -12,11 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/platform/errors.h" + #include #include "gtest/gtest.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/errors.h" using namespace paddle::platform::errors; // NOLINT diff --git a/paddle/fluid/platform/fast_divmod.h b/paddle/fluid/platform/fast_divmod.h index bef551078b3..f2a150c3012 100644 --- a/paddle/fluid/platform/fast_divmod.h +++ b/paddle/fluid/platform/fast_divmod.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/phi/kernels/funcs/aligned_vector.h" #define INT_BITS 32 diff --git a/paddle/fluid/platform/flags.h b/paddle/fluid/platform/flags.h index b9d78c2e9dc..0a38d612939 100644 --- a/paddle/fluid/platform/flags.h +++ b/paddle/fluid/platform/flags.h @@ -18,6 +18,7 @@ #include #include #include + #include "gflags/gflags.h" #include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/variant.h" diff --git a/paddle/fluid/platform/float16_test.cu b/paddle/fluid/platform/float16_test.cu index 2c00854e082..dc7fdc6b443 100644 --- a/paddle/fluid/platform/float16_test.cu +++ b/paddle/fluid/platform/float16_test.cu @@ -14,6 +14,7 @@ limitations under the License. */ #define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h #include #include + #include #include diff --git a/paddle/fluid/platform/gen_comm_id_helper.cc b/paddle/fluid/platform/gen_comm_id_helper.cc index bbec743d26f..45ca4a6f277 100644 --- a/paddle/fluid/platform/gen_comm_id_helper.cc +++ b/paddle/fluid/platform/gen_comm_id_helper.cc @@ -22,6 +22,7 @@ limitations under the License. */ #include #include #include + #include #include #include // NOLINT diff --git a/paddle/fluid/platform/init_test.cc b/paddle/fluid/platform/init_test.cc index 5301dd30759..bc5bd274bf8 100644 --- a/paddle/fluid/platform/init_test.cc +++ b/paddle/fluid/platform/init_test.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/init.h" + #include "gtest/gtest.h" #include "paddle/fluid/platform/device_context.h" #ifdef PADDLE_WITH_MLU diff --git a/paddle/fluid/platform/lock_guard_ptr.h b/paddle/fluid/platform/lock_guard_ptr.h index bff24e74a70..66d6e446d3f 100644 --- a/paddle/fluid/platform/lock_guard_ptr.h +++ b/paddle/fluid/platform/lock_guard_ptr.h @@ -14,6 +14,7 @@ #pragma once #include + #include #include // NOLINT namespace paddle { diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 5476d244f60..382f96e83bf 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -1061,16 +1061,18 @@ static void SetDstMemoryQuantized( const size_t dst_dims = dst_tz.size(); MKLDNNMemoryFormat dst_fmt; - PADDLE_ENFORCE_LE(dst_dims, 5, platform::errors::InvalidArgument( - "Dst memory for quantization can not have " - "dims > 5. But received dst_dims is %d.", - dst_dims)); + PADDLE_ENFORCE_LE(dst_dims, 5, + platform::errors::InvalidArgument( + "Dst memory for quantization can not have " + "dims > 5. But received dst_dims is %d.", + dst_dims)); dst_fmt = platform::MKLDNNFormatForSize(dst_dims, output_format); - auto tmp_dst_md = platform::MKLDNNMemDesc( - {dst_tz}, paddle::framework::ToMKLDNNDataType( - framework::DataTypeTrait::DataType()), - dst_fmt); + auto tmp_dst_md = + platform::MKLDNNMemDesc({dst_tz}, + paddle::framework::ToMKLDNNDataType( + framework::DataTypeTrait::DataType()), + dst_fmt); dst_md.reset(new dnnl::memory::desc(tmp_dst_md)); dst_memory.reset( new dnnl::memory(*dst_md, engine, to_void_cast(output_data))); diff --git a/paddle/fluid/platform/monitor.h b/paddle/fluid/platform/monitor.h index dc9abaf36d8..e7612f6dcb6 100644 --- a/paddle/fluid/platform/monitor.h +++ b/paddle/fluid/platform/monitor.h @@ -15,6 +15,7 @@ #pragma once #include + #include #include #include // NOLINT diff --git a/paddle/fluid/platform/os_info.cc b/paddle/fluid/platform/os_info.cc index 36dd7891d55..694f701b5ad 100644 --- a/paddle/fluid/platform/os_info.cc +++ b/paddle/fluid/platform/os_info.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/os_info.h" + #include #include #include diff --git a/paddle/fluid/platform/os_info_test.cc b/paddle/fluid/platform/os_info_test.cc index b3311f1d19e..149da6ba27a 100644 --- a/paddle/fluid/platform/os_info_test.cc +++ b/paddle/fluid/platform/os_info_test.cc @@ -12,13 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/platform/os_info.h" + #include + #include "gtest/gtest.h" TEST(ThreadInfo, TestThreadIdUtils) { - using paddle::platform::GetCurrentThreadStdId; - using paddle::platform::GetCurrentThreadId; using paddle::platform::GetAllThreadIds; + using paddle::platform::GetCurrentThreadId; + using paddle::platform::GetCurrentThreadStdId; EXPECT_EQ(std::hash()(std::this_thread::get_id()), GetCurrentThreadId().std_tid); auto ids = GetAllThreadIds(); @@ -26,10 +28,10 @@ TEST(ThreadInfo, TestThreadIdUtils) { } TEST(ThreadInfo, TestThreadNameUtils) { - using paddle::platform::GetCurrentThreadStdId; + using paddle::platform::GetAllThreadNames; using paddle::platform::GetCurrentThreadName; + using paddle::platform::GetCurrentThreadStdId; using paddle::platform::SetCurrentThreadName; - using paddle::platform::GetAllThreadNames; SetCurrentThreadName("MainThread"); EXPECT_FALSE(SetCurrentThreadName("MainThread")); auto names = GetAllThreadNames(); diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc index 75abf36e676..c573650f179 100644 --- a/paddle/fluid/platform/profiler.cc +++ b/paddle/fluid/platform/profiler.cc @@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/platform/profiler.h" + #include // NOLINT #include #include @@ -20,7 +22,6 @@ limitations under the License. */ #include "paddle/fluid/platform/device_tracer.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler/common_event.h" #include "paddle/fluid/platform/profiler/host_event_recorder.h" #include "paddle/fluid/platform/profiler/host_tracer.h" diff --git a/paddle/fluid/platform/profiler/chrometracing_logger.cc b/paddle/fluid/platform/profiler/chrometracing_logger.cc index 4ee95a530fb..f728a820bd7 100644 --- a/paddle/fluid/platform/profiler/chrometracing_logger.cc +++ b/paddle/fluid/platform/profiler/chrometracing_logger.cc @@ -12,15 +12,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/platform/profiler/chrometracing_logger.h" + #include #include #include #include "glog/logging.h" - #include "paddle/fluid/platform/device/gpu/gpu_info.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/profiler/chrometracing_logger.h" #include "paddle/fluid/platform/profiler/event_node.h" #include "paddle/fluid/platform/profiler/utils.h" @@ -304,9 +304,10 @@ void ChromeTracingLogger::HandleTypeKernel( blocks_per_sm = static_cast(kernel_info.grid_x * kernel_info.grid_y * kernel_info.grid_z) / device_property.multiProcessorCount; - warps_per_sm = blocks_per_sm * (kernel_info.block_x * kernel_info.block_y * - kernel_info.block_z) / - threads_per_warp; + warps_per_sm = + blocks_per_sm * + (kernel_info.block_x * kernel_info.block_y * kernel_info.block_z) / + threads_per_warp; occupancy = CalculateEstOccupancy( device_node.DeviceId(), kernel_info.registers_per_thread, kernel_info.static_shared_memory, kernel_info.dynamic_shared_memory, diff --git a/paddle/fluid/platform/profiler/chrometracing_logger.h b/paddle/fluid/platform/profiler/chrometracing_logger.h index 8977ab748c6..12d98d1ef0c 100644 --- a/paddle/fluid/platform/profiler/chrometracing_logger.h +++ b/paddle/fluid/platform/profiler/chrometracing_logger.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/platform/profiler/output_logger.h" namespace paddle { diff --git a/paddle/fluid/platform/profiler/common_event.h b/paddle/fluid/platform/profiler/common_event.h index cfdc3be110a..8fe3b150523 100644 --- a/paddle/fluid/platform/profiler/common_event.h +++ b/paddle/fluid/platform/profiler/common_event.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/platform/event.h" // import EventRole, TODO(TIEXING): remove later #include "paddle/fluid/platform/profiler/trace_event.h" diff --git a/paddle/fluid/platform/profiler/cpu_utilization.cc b/paddle/fluid/platform/profiler/cpu_utilization.cc index d507153d3f5..4319841c8a9 100644 --- a/paddle/fluid/platform/profiler/cpu_utilization.cc +++ b/paddle/fluid/platform/profiler/cpu_utilization.cc @@ -54,12 +54,13 @@ void CpuUtilization::RecordBeginTimeInfo() { if (stat_file != nullptr) { char temp_str[200]; uint64_t temp_lu; - int retval = fscanf( - stat_file, "%s %" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 - "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64, - temp_str, &system_tms_start_.tms_utime, &nice_time_start_, - &system_tms_start_.tms_stime, &idle_start_, &iowait_start_, &irq_start_, - &softirq_start_, &steal_start_, &temp_lu, &temp_lu); + int retval = + fscanf(stat_file, + "%s %" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 + "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64, + temp_str, &system_tms_start_.tms_utime, &nice_time_start_, + &system_tms_start_.tms_stime, &idle_start_, &iowait_start_, + &irq_start_, &softirq_start_, &steal_start_, &temp_lu, &temp_lu); if (retval != 11) { LOG(WARNING) << "Failed to read cpu utilization information at record beginning." @@ -87,12 +88,13 @@ void CpuUtilization::RecordEndTimeInfo() { if (stat_file != nullptr) { char temp_str[200]; uint64_t temp_lu; - int retval = fscanf( - stat_file, "%s %" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 - "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64, - temp_str, &system_tms_end_.tms_utime, &nice_time_end_, - &system_tms_end_.tms_stime, &idle_end_, &iowait_end_, &irq_end_, - &softirq_end_, &steal_end_, &temp_lu, &temp_lu); + int retval = + fscanf(stat_file, + "%s %" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 + "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64 "%" PRIu64, + temp_str, &system_tms_end_.tms_utime, &nice_time_end_, + &system_tms_end_.tms_stime, &idle_end_, &iowait_end_, &irq_end_, + &softirq_end_, &steal_end_, &temp_lu, &temp_lu); if (retval != 11) { LOG(WARNING) diff --git a/paddle/fluid/platform/profiler/cpu_utilization.h b/paddle/fluid/platform/profiler/cpu_utilization.h index 7b05a6302cd..aa25ae5a43c 100644 --- a/paddle/fluid/platform/profiler/cpu_utilization.h +++ b/paddle/fluid/platform/profiler/cpu_utilization.h @@ -15,8 +15,10 @@ #pragma once #include + #include #include + #include "glog/logging.h" #ifdef _MSC_VER #include diff --git a/paddle/fluid/platform/profiler/cuda_tracer.cc b/paddle/fluid/platform/profiler/cuda_tracer.cc index 2d3e354dc27..9e32f7bbf19 100644 --- a/paddle/fluid/platform/profiler/cuda_tracer.cc +++ b/paddle/fluid/platform/profiler/cuda_tracer.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/platform/profiler/cuda_tracer.h" + #include #include + #include "glog/logging.h" #include "paddle/fluid/framework/new_executor/workqueue/workqueue_utils.h" #include "paddle/fluid/platform/os_info.h" diff --git a/paddle/fluid/platform/profiler/cuda_tracer.h b/paddle/fluid/platform/profiler/cuda_tracer.h index 20a60521266..36c5ab4eb55 100644 --- a/paddle/fluid/platform/profiler/cuda_tracer.h +++ b/paddle/fluid/platform/profiler/cuda_tracer.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/platform/dynload/cupti.h" #include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/profiler/tracer_base.h" diff --git a/paddle/fluid/platform/profiler/cupti_data_process.cc b/paddle/fluid/platform/profiler/cupti_data_process.cc index da12dccb749..7cb8b597dcd 100644 --- a/paddle/fluid/platform/profiler/cupti_data_process.cc +++ b/paddle/fluid/platform/profiler/cupti_data_process.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/platform/profiler/cupti_data_process.h" + #include + #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/os_info.h" diff --git a/paddle/fluid/platform/profiler/cupti_data_process.h b/paddle/fluid/platform/profiler/cupti_data_process.h index 01b2e72ade4..7b800464734 100644 --- a/paddle/fluid/platform/profiler/cupti_data_process.h +++ b/paddle/fluid/platform/profiler/cupti_data_process.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/platform/dynload/cupti.h" #include "paddle/fluid/platform/profiler/trace_event_collector.h" diff --git a/paddle/fluid/platform/profiler/dump/deserialization_reader.cc b/paddle/fluid/platform/profiler/dump/deserialization_reader.cc index de3411579d3..82363fcff63 100644 --- a/paddle/fluid/platform/profiler/dump/deserialization_reader.cc +++ b/paddle/fluid/platform/profiler/dump/deserialization_reader.cc @@ -9,7 +9,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/profiler/dump/deserialization_reader.h" + #include + #include "paddle/fluid/platform/profiler/extra_info.h" namespace paddle { diff --git a/paddle/fluid/platform/profiler/dump/serialization_logger.cc b/paddle/fluid/platform/profiler/dump/serialization_logger.cc index 73021f4362a..b8afe2af0e7 100644 --- a/paddle/fluid/platform/profiler/dump/serialization_logger.cc +++ b/paddle/fluid/platform/profiler/dump/serialization_logger.cc @@ -9,9 +9,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "glog/logging.h" - #include "paddle/fluid/platform/profiler/dump/serialization_logger.h" + +#include "glog/logging.h" #include "paddle/fluid/platform/profiler/event_node.h" #include "paddle/fluid/platform/profiler/extra_info.h" #include "paddle/fluid/platform/profiler/utils.h" diff --git a/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc b/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc index d294bfee58c..5253ecc505d 100644 --- a/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc +++ b/paddle/fluid/platform/profiler/dump/test_serialization_logger.cc @@ -13,26 +13,25 @@ // limitations under the License. #include "gtest/gtest.h" - #include "paddle/fluid/platform/profiler/dump/deserialization_reader.h" #include "paddle/fluid/platform/profiler/dump/serialization_logger.h" #include "paddle/fluid/platform/profiler/event_node.h" #include "paddle/fluid/platform/profiler/event_python.h" -using paddle::platform::SerializationLogger; -using paddle::platform::DeserializationReader; -using paddle::platform::NodeTrees; -using paddle::platform::HostTraceEventNode; using paddle::platform::CudaRuntimeTraceEventNode; +using paddle::platform::DeserializationReader; +using paddle::platform::DeviceTraceEvent; using paddle::platform::DeviceTraceEventNode; using paddle::platform::HostTraceEvent; -using paddle::platform::RuntimeTraceEvent; -using paddle::platform::DeviceTraceEvent; -using paddle::platform::TracerEventType; +using paddle::platform::HostTraceEventNode; using paddle::platform::KernelEventInfo; using paddle::platform::MemcpyEventInfo; using paddle::platform::MemsetEventInfo; +using paddle::platform::NodeTrees; using paddle::platform::ProfilerResult; +using paddle::platform::RuntimeTraceEvent; +using paddle::platform::SerializationLogger; +using paddle::platform::TracerEventType; TEST(SerializationLoggerTest, dump_case0) { std::list host_events; diff --git a/paddle/fluid/platform/profiler/event_node.cc b/paddle/fluid/platform/profiler/event_node.cc index b909fb5f25a..e1af63ad890 100644 --- a/paddle/fluid/platform/profiler/event_node.cc +++ b/paddle/fluid/platform/profiler/event_node.cc @@ -12,6 +12,7 @@ limitations under the License. */ #include "paddle/fluid/platform/profiler/event_node.h" #include + #include #include #include diff --git a/paddle/fluid/platform/profiler/event_python.cc b/paddle/fluid/platform/profiler/event_python.cc index 5c42c8e8bf6..abde62c6b14 100644 --- a/paddle/fluid/platform/profiler/event_python.cc +++ b/paddle/fluid/platform/profiler/event_python.cc @@ -10,6 +10,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/profiler/event_python.h" + #include "paddle/fluid/platform/profiler/chrometracing_logger.h" #include "paddle/fluid/platform/profiler/dump/deserialization_reader.h" #include "paddle/fluid/platform/profiler/dump/serialization_logger.h" diff --git a/paddle/fluid/platform/profiler/event_tracing.h b/paddle/fluid/platform/profiler/event_tracing.h index fcaba9a43ca..fd81c15f92a 100644 --- a/paddle/fluid/platform/profiler/event_tracing.h +++ b/paddle/fluid/platform/profiler/event_tracing.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/platform/event.h" #include "paddle/fluid/platform/profiler/trace_event.h" @@ -70,10 +71,11 @@ class RecordEvent { * @param level: Used to filter events, works like glog VLOG(level). * RecordEvent will works if HostTraceLevel >= level. */ - explicit RecordEvent(const char* name, const TracerEventType type = - TracerEventType::UserDefined, - uint32_t level = kDefaultTraceLevel, - const EventRole role = EventRole::kOrdinary); + explicit RecordEvent( + const char* name, + const TracerEventType type = TracerEventType::UserDefined, + uint32_t level = kDefaultTraceLevel, + const EventRole role = EventRole::kOrdinary); RecordEvent(const std::string& name, const std::string& attr, const TracerEventType type = TracerEventType::UserDefined, diff --git a/paddle/fluid/platform/profiler/host_event_recorder.h b/paddle/fluid/platform/profiler/host_event_recorder.h index afd41352465..1359c3b85a0 100644 --- a/paddle/fluid/platform/profiler/host_event_recorder.h +++ b/paddle/fluid/platform/profiler/host_event_recorder.h @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h" #include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/os_info.h" @@ -58,7 +59,7 @@ class EventContainer { public: // Record an event template - void Record(Args &&... args) { + void Record(Args &&...args) { DoRecord(ContainsStdString(), std::forward(args)...); } @@ -112,7 +113,7 @@ class EventContainer { // Record an event with string arguments template - void DoRecord(std::true_type, Args &&... args) { + void DoRecord(std::true_type, Args &&...args) { auto *storage = GetEventStorage(); std::function allocator = [this](size_t size) { return GetStrBufFromArena(size); @@ -122,7 +123,7 @@ class EventContainer { // Record an event without any string argument template - void DoRecord(std::false_type, Args &&... args) { + void DoRecord(std::false_type, Args &&...args) { auto *storage = GetEventStorage(); new (storage) EventType(std::forward(args)...); } @@ -199,7 +200,7 @@ class ThreadEventRecorder { public: // Forward call to EventContainer::Record template - void RecordEvent(Args &&... args) { + void RecordEvent(Args &&...args) { base_evt_cntr_.Record(std::forward(args)...); } @@ -237,7 +238,7 @@ class HostEventRecorder { // Do your best to avoid using 'std::string' as the argument type. // It will cause deep-copy to harm performance. template - void RecordEvent(Args &&... args) { + void RecordEvent(Args &&...args) { GetThreadLocalRecorder()->RecordEvent(std::forward(args)...); } diff --git a/paddle/fluid/platform/profiler/host_tracer.cc b/paddle/fluid/platform/profiler/host_tracer.cc index b7eb53331b7..8a36a3a8bab 100644 --- a/paddle/fluid/platform/profiler/host_tracer.cc +++ b/paddle/fluid/platform/profiler/host_tracer.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/platform/profiler/host_tracer.h" + #include "glog/logging.h" #include "paddle/fluid/platform/flags.h" #include "paddle/fluid/platform/profiler/common_event.h" diff --git a/paddle/fluid/platform/profiler/mlu/cnpapi_data_process.cc b/paddle/fluid/platform/profiler/mlu/cnpapi_data_process.cc index 36abf77279d..7afdb5eb2a3 100644 --- a/paddle/fluid/platform/profiler/mlu/cnpapi_data_process.cc +++ b/paddle/fluid/platform/profiler/mlu/cnpapi_data_process.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/fluid/platform/profiler/mlu/cnpapi_data_process.h" + #include + #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/os_info.h" diff --git a/paddle/fluid/platform/profiler/mlu/mlu_tracer.cc b/paddle/fluid/platform/profiler/mlu/mlu_tracer.cc index 2d719a8bbfd..bbaafa3faa6 100644 --- a/paddle/fluid/platform/profiler/mlu/mlu_tracer.cc +++ b/paddle/fluid/platform/profiler/mlu/mlu_tracer.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/platform/profiler/mlu/mlu_tracer.h" + #include #include + #include "glog/logging.h" #include "paddle/fluid/framework/new_executor/workqueue/workqueue_utils.h" #include "paddle/fluid/platform/os_info.h" diff --git a/paddle/fluid/platform/profiler/profiler.cc b/paddle/fluid/platform/profiler/profiler.cc index a417eda1509..8bcf856c01a 100644 --- a/paddle/fluid/platform/profiler/profiler.cc +++ b/paddle/fluid/platform/profiler/profiler.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/platform/profiler/profiler.h" + #include "glog/logging.h" #ifdef PADDLE_WITH_CUDA #include diff --git a/paddle/fluid/platform/profiler/profiler.h b/paddle/fluid/platform/profiler/profiler.h index ea346a4fb74..65a3bcc02d8 100644 --- a/paddle/fluid/platform/profiler/profiler.h +++ b/paddle/fluid/platform/profiler/profiler.h @@ -20,6 +20,7 @@ #include #include #include + #include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/profiler/cpu_utilization.h" #include "paddle/fluid/platform/profiler/event_node.h" diff --git a/paddle/fluid/platform/profiler/profiler_test.cc b/paddle/fluid/platform/profiler/profiler_test.cc index f2c867ffff2..1f1fbcb71ec 100644 --- a/paddle/fluid/platform/profiler/profiler_test.cc +++ b/paddle/fluid/platform/profiler/profiler_test.cc @@ -14,6 +14,7 @@ #include #include + #include "glog/logging.h" #include "gtest/gtest.h" #ifdef PADDLE_WITH_CUDA @@ -27,11 +28,11 @@ #include "paddle/fluid/platform/profiler/profiler.h" TEST(ProfilerTest, TestHostTracer) { - using paddle::platform::ProfilerOptions; using paddle::platform::Profiler; + using paddle::platform::ProfilerOptions; + using paddle::platform::ProfilerResult; using paddle::platform::RecordInstantEvent; using paddle::platform::TracerEventType; - using paddle::platform::ProfilerResult; ProfilerOptions options; options.trace_level = 2; options.trace_switch = 3; @@ -58,8 +59,8 @@ TEST(ProfilerTest, TestHostTracer) { } TEST(ProfilerTest, TestCudaTracer) { - using paddle::platform::ProfilerOptions; using paddle::platform::Profiler; + using paddle::platform::ProfilerOptions; using paddle::platform::ProfilerResult; ProfilerOptions options; options.trace_level = 0; diff --git a/paddle/fluid/platform/profiler/test_event_node.cc b/paddle/fluid/platform/profiler/test_event_node.cc index b8d1306ad07..23ad917b57d 100644 --- a/paddle/fluid/platform/profiler/test_event_node.cc +++ b/paddle/fluid/platform/profiler/test_event_node.cc @@ -13,22 +13,21 @@ // limitations under the License. #include "gtest/gtest.h" - #include "paddle/fluid/platform/profiler/chrometracing_logger.h" #include "paddle/fluid/platform/profiler/event_node.h" using paddle::platform::ChromeTracingLogger; -using paddle::platform::NodeTrees; -using paddle::platform::HostTraceEventNode; using paddle::platform::CudaRuntimeTraceEventNode; +using paddle::platform::DeviceTraceEvent; using paddle::platform::DeviceTraceEventNode; using paddle::platform::HostTraceEvent; -using paddle::platform::RuntimeTraceEvent; -using paddle::platform::DeviceTraceEvent; -using paddle::platform::TracerEventType; +using paddle::platform::HostTraceEventNode; using paddle::platform::KernelEventInfo; using paddle::platform::MemcpyEventInfo; using paddle::platform::MemsetEventInfo; +using paddle::platform::NodeTrees; +using paddle::platform::RuntimeTraceEvent; +using paddle::platform::TracerEventType; TEST(NodeTreesTest, LogMe_case0) { std::list host_events; std::list runtime_events; @@ -194,8 +193,10 @@ TEST(NodeTreesTest, HandleTrees_case0) { } std::function host_event_node_handle( [&](HostTraceEventNode* a) { logger.LogHostTraceEventNode(*a); }); - std::function runtime_event_node_handle([&]( - CudaRuntimeTraceEventNode* a) { logger.LogRuntimeTraceEventNode(*a); }); + std::function runtime_event_node_handle( + [&](CudaRuntimeTraceEventNode* a) { + logger.LogRuntimeTraceEventNode(*a); + }); std::function device_event_node_handle( [&](DeviceTraceEventNode* a) { logger.LogDeviceTraceEventNode(*a); }); tree.HandleTrees(host_event_node_handle, runtime_event_node_handle, diff --git a/paddle/fluid/platform/profiler/trace_event_collector.h b/paddle/fluid/platform/profiler/trace_event_collector.h index 5f2bc9dc90d..d1593bc1bfc 100644 --- a/paddle/fluid/platform/profiler/trace_event_collector.h +++ b/paddle/fluid/platform/profiler/trace_event_collector.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include "paddle/fluid/platform/profiler/trace_event.h" namespace paddle { diff --git a/paddle/fluid/platform/profiler/utils.h b/paddle/fluid/platform/profiler/utils.h index 06d1636c461..433fd0b825a 100644 --- a/paddle/fluid/platform/profiler/utils.h +++ b/paddle/fluid/platform/profiler/utils.h @@ -15,6 +15,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/platform/dynload/cupti.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/os_info.h" @@ -26,8 +27,9 @@ template std::string string_format(const std::string& format, Args... args) { int size_s = std::snprintf(nullptr, 0, format.c_str(), args...) + 1; // Extra space for '\0' - PADDLE_ENFORCE_GE(size_s, 0, platform::errors::Fatal( - "Error during profiler data formatting.")); + PADDLE_ENFORCE_GE( + size_s, 0, + platform::errors::Fatal("Error during profiler data formatting.")); auto size = static_cast(size_s); auto buf = std::make_unique(size); std::snprintf(buf.get(), size, format.c_str(), args...); diff --git a/paddle/fluid/platform/profiler_helper.h b/paddle/fluid/platform/profiler_helper.h index f64e05504aa..ae856044f8f 100644 --- a/paddle/fluid/platform/profiler_helper.h +++ b/paddle/fluid/platform/profiler_helper.h @@ -390,8 +390,8 @@ void SetEvent(bool merge_thread, const Event &analyze_event, index++; } if (split_pos == -1 && !main_thread_event_name.count(rit->name())) { - event_name = "thread" + std::to_string(rit->thread_id()) + "::" + - rit->name(); + event_name = "thread" + std::to_string(rit->thread_id()) + + "::" + rit->name(); } else { if (!main_thread_event_name.count(rit->name())) { event_name = diff --git a/paddle/fluid/platform/profiler_test.cc b/paddle/fluid/platform/profiler_test.cc index e9f84a49246..18d4b4dc834 100644 --- a/paddle/fluid/platform/profiler_test.cc +++ b/paddle/fluid/platform/profiler_test.cc @@ -36,24 +36,24 @@ TEST(Event, CpuElapsedTime) { TEST(RecordEvent, RecordEvent) { using paddle::platform::Event; + using paddle::platform::EventRole; + using paddle::platform::EventSortingKey; using paddle::platform::EventType; - using paddle::platform::RecordEvent; - using paddle::platform::PushEvent; using paddle::platform::PopEvent; using paddle::platform::ProfilerState; - using paddle::platform::EventSortingKey; - using paddle::platform::EventRole; + using paddle::platform::PushEvent; + using paddle::platform::RecordEvent; ProfilerState state = ProfilerState::kCPU; EnableProfiler(state); /* Usage 1: - * PushEvent(evt_name); - * ... - * code to be analyzed - * ... - * PopEvent(evt_name); - */ + * PushEvent(evt_name); + * ... + * code to be analyzed + * ... + * PopEvent(evt_name); + */ LOG(INFO) << "Usage 1: PushEvent & PopEvent"; for (int loop = 0; loop < 3; ++loop) { for (int i = 1; i < 5; ++i) { diff --git a/paddle/fluid/platform/resource_pool.h b/paddle/fluid/platform/resource_pool.h index f01d006d5b2..737001a50ab 100644 --- a/paddle/fluid/platform/resource_pool.h +++ b/paddle/fluid/platform/resource_pool.h @@ -20,6 +20,7 @@ #include #include #include + #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/macros.h" diff --git a/paddle/fluid/platform/stream/cuda_stream.cc b/paddle/fluid/platform/stream/cuda_stream.cc index e3e735d03ab..d7f60e4019d 100644 --- a/paddle/fluid/platform/stream/cuda_stream.cc +++ b/paddle/fluid/platform/stream/cuda_stream.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/platform/stream/cuda_stream.h" + #include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/device/gpu/gpu_types.h" #include "paddle/fluid/platform/device_context.h" diff --git a/paddle/fluid/platform/stream_callback_manager.cc b/paddle/fluid/platform/stream_callback_manager.cc index 6fa326d57bc..bb9a405798b 100644 --- a/paddle/fluid/platform/stream_callback_manager.cc +++ b/paddle/fluid/platform/stream_callback_manager.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/platform/stream_callback_manager.h" + #include "paddle/fluid/platform/device/device_wrapper.h" namespace paddle { diff --git a/paddle/fluid/platform/transform.h b/paddle/fluid/platform/transform.h index 6f714a67703..32c759d0102 100644 --- a/paddle/fluid/platform/transform.h +++ b/paddle/fluid/platform/transform.h @@ -25,6 +25,7 @@ limitations under the License. */ #if defined(__NVCC__) || defined(__HIPCC__) #include #include + #include "paddle/fluid/platform/details/cuda_transform_iterator_cast.h" #endif diff --git a/paddle/fluid/platform/transform_test.cu b/paddle/fluid/platform/transform_test.cu index 2e7b8b402f6..1caa2e87707 100644 --- a/paddle/fluid/platform/transform_test.cu +++ b/paddle/fluid/platform/transform_test.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include + #include "paddle/fluid/memory/allocation/allocator_facade.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memory.h" @@ -38,10 +39,10 @@ class Multiply { using paddle::memory::Alloc; using paddle::memory::Copy; -using paddle::platform::CPUPlace; -using paddle::platform::CUDAPlace; using paddle::platform::CPUDeviceContext; +using paddle::platform::CPUPlace; using paddle::platform::CUDADeviceContext; +using paddle::platform::CUDAPlace; using paddle::platform::Transform; diff --git a/paddle/fluid/pybind/ascend_wrapper_py.cc b/paddle/fluid/pybind/ascend_wrapper_py.cc index fdf3a12a81f..8c1eb2c1b90 100644 --- a/paddle/fluid/pybind/ascend_wrapper_py.cc +++ b/paddle/fluid/pybind/ascend_wrapper_py.cc @@ -26,11 +26,13 @@ limitations under the License. */ #include #include #include + #include #include #include #include #include + #include "paddle/fluid/framework/fleet/ascend_wrapper.h" #include "paddle/fluid/platform/device/npu/ascend_npu_info.h" #include "paddle/fluid/platform/enforce.h" @@ -78,8 +80,9 @@ ge::Status ge_initialize( py::gil_scoped_release release; auto init_options = convert_map(options); ge::Status res = ge::GEInitialize(init_options); - PADDLE_ENFORCE_EQ(res, ge::SUCCESS, platform::errors::Fatal( - "ge initialize not success:%d", res)); + PADDLE_ENFORCE_EQ( + res, ge::SUCCESS, + platform::errors::Fatal("ge initialize not success:%d", res)); py::gil_scoped_acquire acquire; return res; } @@ -253,7 +256,7 @@ void BindAscendGraph(py::module *m) { return std::unique_ptr( new ge::Session(convert_map(options))); })) - .def("add_graph", (ge::Status (Session::*)(uint32_t, const Graph &)) & + .def("add_graph", (ge::Status(Session::*)(uint32_t, const Graph &)) & Session::AddGraph) .def("add_graph", [](Session &ss, uint32_t index, const Graph &graph, @@ -261,14 +264,15 @@ void BindAscendGraph(py::module *m) { return ss.AddGraph(index, graph, convert_map(options)); }) .def("remove_graph", &Session::RemoveGraph) - .def("run_graph", - [](Session &ss, uint32_t graphId, - const std::vector &inputs) -> py::tuple { - std::vector outputs; - ge::Status res = ss.RunGraph(graphId, inputs, outputs); - return py::make_tuple(outputs, res); - }, - py::call_guard()) + .def( + "run_graph", + [](Session &ss, uint32_t graphId, + const std::vector &inputs) -> py::tuple { + std::vector outputs; + ge::Status res = ss.RunGraph(graphId, inputs, outputs); + return py::make_tuple(outputs, res); + }, + py::call_guard()) .def("build_graph", &Session::BuildGraph) .def("run_graph_async", &Session::RunGraphAsync) #ifdef PADDLE_WITH_ASCEND_STRING @@ -385,7 +389,7 @@ void BindAscendGraph(py::module *m) { }) #ifdef PADDLE_WITH_ASCEND_STRING .def("get_input_desc", - (TensorDesc (Operator::*)(uint32_t) const) & Operator::GetInputDesc) + (TensorDesc(Operator::*)(uint32_t) const) & Operator::GetInputDesc) .def("get_input_desc", [](Operator &op, const std::string &name) { return op.GetInputDescByName(name.c_str()); @@ -420,7 +424,7 @@ void BindAscendGraph(py::module *m) { return op.GetOutputDescByName(name.c_str()); }) .def("get_output_desc", - (TensorDesc (Operator::*)(uint32_t) const) & Operator::GetOutputDesc) + (TensorDesc(Operator::*)(uint32_t) const) & Operator::GetOutputDesc) .def("update_output_desc", static_cast(&Operator::UpdateOutputDesc)) @@ -779,19 +783,18 @@ void BindAscendGraph(py::module *m) { .def("get_tensor_desc", &Tensor::GetTensorDesc) // .def("set_data", (graphStatus(Tensor::*)(std::vector &&)) & // Tensor::SetData) - .def("set_data", (graphStatus (Tensor::*)(const std::vector &)) & + .def("set_data", (graphStatus(Tensor::*)(const std::vector &)) & Tensor::SetData) .def("set_data", - (graphStatus (Tensor::*)(const uint8_t *, size_t)) & Tensor::SetData) + (graphStatus(Tensor::*)(const uint8_t *, size_t)) & Tensor::SetData) #ifdef PADDLE_WITH_ASCEND_STRING - .def("set_data", - (graphStatus (Tensor::*)(const char *)) & Tensor::SetData) + .def("set_data", (graphStatus(Tensor::*)(const char *)) & Tensor::SetData) #else .def("set_data", (graphStatus (Tensor::*)(const std::string &)) & Tensor::SetData) #endif .def("set_data", - (graphStatus (Tensor::*)(const std::vector &)) & + (graphStatus(Tensor::*)(const std::vector &)) & Tensor::SetData) .def("get_data", @@ -813,8 +816,9 @@ void BindAscendGraph(py::module *m) { .def(py::init(), py::arg("shape"), py::arg("format") = FORMAT_ND, py::arg("dt") = DT_FLOAT) .def(py::init()) - .def("update", (void (TensorDesc::*)(const Shape &, Format, DataType)) & - TensorDesc::Update, + .def("update", + (void(TensorDesc::*)(const Shape &, Format, DataType)) & + TensorDesc::Update, py::arg("shape"), py::arg("format") = FORMAT_ND, py::arg("dt") = DT_FLOAT) .def("set_shape", &TensorDesc::SetShape) diff --git a/paddle/fluid/pybind/bind_cost_model.cc b/paddle/fluid/pybind/bind_cost_model.cc index a4a40f1fd02..ef2fe0dd3d4 100644 --- a/paddle/fluid/pybind/bind_cost_model.cc +++ b/paddle/fluid/pybind/bind_cost_model.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/pybind/bind_cost_model.h" #include + #include "paddle/fluid/framework/ir/cost_model.h" #include "paddle/fluid/framework/program_desc.h" diff --git a/paddle/fluid/pybind/bind_fleet_executor.cc b/paddle/fluid/pybind/bind_fleet_executor.cc index 8491d1e2249..6bd03203744 100644 --- a/paddle/fluid/pybind/bind_fleet_executor.cc +++ b/paddle/fluid/pybind/bind_fleet_executor.cc @@ -13,10 +13,13 @@ // limitations under the License. #include "paddle/fluid/pybind/bind_fleet_executor.h" + #include #include + #include #include + #include "paddle/fluid/distributed/fleet_executor/dist_model.h" #include "paddle/fluid/distributed/fleet_executor/dist_model_tensor_wrapper.h" #include "paddle/fluid/distributed/fleet_executor/fleet_executor.h" @@ -62,13 +65,13 @@ struct npy_format_descriptor { namespace paddle { namespace pybind { -using paddle::distributed::FleetExecutor; -using paddle::distributed::TaskNode; -using paddle::distributed::DistModelConfig; using paddle::distributed::DistModel; +using paddle::distributed::DistModelConfig; using paddle::distributed::DistModelDataBuf; -using paddle::distributed::DistModelTensor; using paddle::distributed::DistModelDataType; +using paddle::distributed::DistModelTensor; +using paddle::distributed::FleetExecutor; +using paddle::distributed::TaskNode; using paddle::framework::OpDesc; using paddle::framework::ProgramDesc; @@ -217,33 +220,34 @@ void BindFleetExecutor(py::module* m) { .def("reset", &DistModelDataBufReset) .def("reset", &DistModelDataBufReset) .def("length", &DistModelDataBuf::length) - .def("tolist", [](DistModelDataBuf& self, - const std::string& dtype) -> py::list { - py::list l; - if (dtype == "int32") { - auto* data = static_cast(self.data()); - auto size = self.length() / sizeof(int32_t); - l = py::cast(std::vector(data, data + size)); - } else if (dtype == "int64") { - auto* data = static_cast(self.data()); - auto size = self.length() / sizeof(int64_t); - l = py::cast(std::vector(data, data + size)); - } else if (dtype == "float32") { - auto* data = static_cast(self.data()); - auto size = self.length() / sizeof(float); - l = py::cast(std::vector(data, data + size)); - } else if (dtype == "float16") { - auto* data = static_cast(self.data()); - auto size = self.length() / sizeof(paddle::platform::float16); - l = py::cast( - std::vector(data, data + size)); - } else { - PADDLE_THROW(platform::errors::Unimplemented( - "Unsupported data type. Now only supports INT32, INT64, " - "FLOAT16 and FLOAT32.")); - } - return l; - }); + .def("tolist", + [](DistModelDataBuf& self, const std::string& dtype) -> py::list { + py::list l; + if (dtype == "int32") { + auto* data = static_cast(self.data()); + auto size = self.length() / sizeof(int32_t); + l = py::cast(std::vector(data, data + size)); + } else if (dtype == "int64") { + auto* data = static_cast(self.data()); + auto size = self.length() / sizeof(int64_t); + l = py::cast(std::vector(data, data + size)); + } else if (dtype == "float32") { + auto* data = static_cast(self.data()); + auto size = self.length() / sizeof(float); + l = py::cast(std::vector(data, data + size)); + } else if (dtype == "float16") { + auto* data = + static_cast(self.data()); + auto size = self.length() / sizeof(paddle::platform::float16); + l = py::cast( + std::vector(data, data + size)); + } else { + PADDLE_THROW(platform::errors::Unimplemented( + "Unsupported data type. Now only supports INT32, INT64, " + "FLOAT16 and FLOAT32.")); + } + return l; + }); py::class_(*m, "DistModelTensor") .def(py::init<>()) diff --git a/paddle/fluid/pybind/communication.cc b/paddle/fluid/pybind/communication.cc index aef02d65b4d..418804df028 100644 --- a/paddle/fluid/pybind/communication.cc +++ b/paddle/fluid/pybind/communication.cc @@ -12,16 +12,18 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/pybind/communication.h" + #include #include #include #include #include + #include #include #include "paddle/fluid/distributed/store/tcp_store.h" -#include "paddle/fluid/pybind/communication.h" namespace py = pybind11; @@ -35,22 +37,24 @@ void BindTCPStore(py::module *m) { py::class_>( *m, "Store") .def(py::init<>()) - .def("set", - [](distributed::Store &self, const std::string &key, - const std::string &value) { - std::vector data(value.begin(), value.end()); - self.set(key, data); - }, - py::arg("key"), py::arg("value"), - py::call_guard()) - .def("get", - [](distributed::Store &self, - const std::string &key) -> py::bytes { - auto data = self.get(key); - return py::bytes(reinterpret_cast(data.data()), - data.size()); - }, - py::arg("key"), py::call_guard()) + .def( + "set", + [](distributed::Store &self, const std::string &key, + const std::string &value) { + std::vector data(value.begin(), value.end()); + self.set(key, data); + }, + py::arg("key"), py::arg("value"), + py::call_guard()) + .def( + "get", + [](distributed::Store &self, + const std::string &key) -> py::bytes { + auto data = self.get(key); + return py::bytes(reinterpret_cast(data.data()), + data.size()); + }, + py::arg("key"), py::call_guard()) .def("add", &distributed::Store::add, py::call_guard()) .def("wait", &distributed::Store::wait, diff --git a/paddle/fluid/pybind/communicator_py.cc b/paddle/fluid/pybind/communicator_py.cc index 723d7f31972..0cb5aa6ef70 100644 --- a/paddle/fluid/pybind/communicator_py.cc +++ b/paddle/fluid/pybind/communicator_py.cc @@ -15,16 +15,17 @@ limitations under the License. */ #include "paddle/fluid/pybind/communicator_py.h" #include + #include #include #include #include -#include "paddle/fluid/framework/program_desc.h" -#include "pybind11/pybind11.h" +#include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/operators/distributed/communicator.h" #include "paddle/fluid/operators/distributed/large_scale_kv.h" #include "paddle/fluid/operators/distributed/ps/service/communicator/communicator_common.h" +#include "pybind11/pybind11.h" namespace py = pybind11; diff --git a/paddle/fluid/pybind/compatible.cc b/paddle/fluid/pybind/compatible.cc index cfe87a86cf0..013d0cc0c60 100644 --- a/paddle/fluid/pybind/compatible.cc +++ b/paddle/fluid/pybind/compatible.cc @@ -13,23 +13,25 @@ // limitations under the License. #include "paddle/fluid/pybind/compatible.h" + #include #include + #include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/pybind/pybind_boost_headers.h" namespace py = pybind11; -using paddle::framework::compatible::OpAttrVariantT; -using paddle::framework::compatible::OpUpdateInfo; using paddle::framework::compatible::OpAttrInfo; -using paddle::framework::compatible::OpInputOutputInfo; +using paddle::framework::compatible::OpAttrVariantT; using paddle::framework::compatible::OpBugfixInfo; -using paddle::framework::compatible::OpUpdateType; -using paddle::framework::compatible::OpUpdateBase; -using paddle::framework::compatible::OpVersionDesc; using paddle::framework::compatible::OpCheckpoint; +using paddle::framework::compatible::OpInputOutputInfo; +using paddle::framework::compatible::OpUpdateBase; +using paddle::framework::compatible::OpUpdateInfo; +using paddle::framework::compatible::OpUpdateType; using paddle::framework::compatible::OpVersion; +using paddle::framework::compatible::OpVersionDesc; namespace paddle { namespace pybind { diff --git a/paddle/fluid/pybind/const_value.cc b/paddle/fluid/pybind/const_value.cc index 8b48d0b4e44..89a3904d000 100644 --- a/paddle/fluid/pybind/const_value.cc +++ b/paddle/fluid/pybind/const_value.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/pybind/const_value.h" + #include "paddle/fluid/framework/ir/node.h" #include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/operator.h" diff --git a/paddle/fluid/pybind/crypto.cc b/paddle/fluid/pybind/crypto.cc index 8fbf395bf18..07a9e4021ce 100644 --- a/paddle/fluid/pybind/crypto.cc +++ b/paddle/fluid/pybind/crypto.cc @@ -97,11 +97,12 @@ void BindAESCipher(py::module* m) { void BindCipherFactory(py::module* m) { py::class_(*m, "CipherFactory") .def(py::init<>()) - .def_static("create_cipher", - [](const std::string& config_file) { - return CipherFactory::CreateCipher(config_file); - }, - py::arg("config_file") = std::string()); + .def_static( + "create_cipher", + [](const std::string& config_file) { + return CipherFactory::CreateCipher(config_file); + }, + py::arg("config_file") = std::string()); } void BindCipherUtils(py::module* m) { diff --git a/paddle/fluid/pybind/cuda_streams_py.cc b/paddle/fluid/pybind/cuda_streams_py.cc index 64c145c94f9..54080d5e096 100644 --- a/paddle/fluid/pybind/cuda_streams_py.cc +++ b/paddle/fluid/pybind/cuda_streams_py.cc @@ -12,13 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/pybind/cuda_streams_py.h" + #include #include #include "paddle/fluid/platform/device_event_base.h" #include "paddle/fluid/platform/event.h" #include "paddle/fluid/platform/stream/cuda_stream.h" -#include "paddle/fluid/pybind/cuda_streams_py.h" namespace py = pybind11; @@ -28,29 +29,31 @@ void BindCudaStream(py::module *m_ptr) { auto &m = *m_ptr; // Bind Methods - m.def("_get_current_stream", - [](int deviceId) { + m.def( + "_get_current_stream", + [](int deviceId) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - return paddle::platform::stream::get_current_stream(deviceId); + return paddle::platform::stream::get_current_stream(deviceId); #else - PADDLE_THROW(platform::errors::Unavailable( - "Paddle is not compiled with CUDA. Cannot visit cuda current" - "stream.")); + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CUDA. Cannot visit cuda current" + "stream.")); #endif - }, - py::return_value_policy::reference); + }, + py::return_value_policy::reference); - m.def("_set_current_stream", - [](paddle::platform::stream::CUDAStream &stream) { + m.def( + "_set_current_stream", + [](paddle::platform::stream::CUDAStream &stream) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - return paddle::platform::stream::set_current_stream(&stream); + return paddle::platform::stream::set_current_stream(&stream); #else - PADDLE_THROW(platform::errors::Unavailable( - "Paddle is not compiled with CUDA. Cannot set cuda current " - "stream.")); + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CUDA. Cannot set cuda current " + "stream.")); #endif - }, - py::return_value_policy::reference); + }, + py::return_value_policy::reference); m.def("_device_synchronize", [](int device_id) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) @@ -94,12 +97,13 @@ void BindCudaStream(py::module *m_ptr) { )DOC") #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - .def("wait_event", - [](paddle::platform::stream::CUDAStream &self, - paddle::platform::CudaEvent &event) { - self.WaitEvent(event.GetRawCudaEvent()); - }, - R"DOC( + .def( + "wait_event", + [](paddle::platform::stream::CUDAStream &self, + paddle::platform::CudaEvent &event) { + self.WaitEvent(event.GetRawCudaEvent()); + }, + R"DOC( Makes all future work submitted to stream wait for all work captured in event. Parameters: @@ -115,15 +119,16 @@ void BindCudaStream(py::module *m_ptr) { s.wait_event(event) )DOC") - .def("wait_stream", - [](paddle::platform::stream::CUDAStream &self, - paddle::platform::stream::CUDAStream &stream) { - paddle::platform::CudaEvent event; - event.Record(stream.raw_stream()); - - self.WaitEvent(event.GetRawCudaEvent()); - }, - R"DOC( + .def( + "wait_stream", + [](paddle::platform::stream::CUDAStream &self, + paddle::platform::stream::CUDAStream &stream) { + paddle::platform::CudaEvent event; + event.Record(stream.raw_stream()); + + self.WaitEvent(event.GetRawCudaEvent()); + }, + R"DOC( Synchronizes with the given stream. Parameters: @@ -139,11 +144,12 @@ void BindCudaStream(py::module *m_ptr) { s1.wait_stream(s2) )DOC") - .def("query", - [](paddle::platform::stream::CUDAStream &self) { - return self.Query(); - }, - R"DOC( + .def( + "query", + [](paddle::platform::stream::CUDAStream &self) { + return self.Query(); + }, + R"DOC( Return the status whether if all operations in stream have completed. Returns: A boolean value. @@ -157,11 +163,12 @@ void BindCudaStream(py::module *m_ptr) { is_done = s.query() )DOC") - .def("synchronize", - [](paddle::platform::stream::CUDAStream &self) { - self.Synchronize(); - }, - R"DOC( + .def( + "synchronize", + [](paddle::platform::stream::CUDAStream &self) { + self.Synchronize(); + }, + R"DOC( Waits for stream tasks to complete. Examples: @@ -173,16 +180,17 @@ void BindCudaStream(py::module *m_ptr) { s.synchronize() )DOC") - .def("record_event", - [](paddle::platform::stream::CUDAStream &self, - paddle::platform::CudaEvent *event) { - if (event == nullptr) { - event = new paddle::platform::CudaEvent(); - } - event->Record(self.raw_stream()); - return event; - }, - R"DOC( + .def( + "record_event", + [](paddle::platform::stream::CUDAStream &self, + paddle::platform::CudaEvent *event) { + if (event == nullptr) { + event = new paddle::platform::CudaEvent(); + } + event->Record(self.raw_stream()); + return event; + }, + R"DOC( Record a CUDA event in the stream. Parameters: @@ -201,7 +209,7 @@ void BindCudaStream(py::module *m_ptr) { event = s.record_event() )DOC", - py::arg("event") = nullptr) + py::arg("event") = nullptr) .def_property_readonly( "cuda_stream", [](paddle::platform::stream::CUDAStream &self) { @@ -225,32 +233,33 @@ void BindCudaStream(py::module *m_ptr) { )DOC") #endif - .def("__init__", - [](paddle::platform::stream::CUDAStream &self, - platform::CUDAPlace *device, int priority) { + .def( + "__init__", + [](paddle::platform::stream::CUDAStream &self, + platform::CUDAPlace *device, int priority) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - if (priority != 1 && priority != 2) { - PADDLE_THROW(platform::errors::InvalidArgument( - "Priority should be 1(high) or 2(normal) ")); - } - auto prio = paddle::platform::stream::Priority(priority); - auto stream_flag = - paddle::platform::stream::StreamFlag::kStreamNonBlocking; - - if (device == nullptr) { - int curr_device_id = platform::GetCurrentDeviceId(); - auto device_tmp = platform::CUDAPlace(curr_device_id); - device = &device_tmp; - } - - new (&self) paddle::platform::stream::CUDAStream(*device, prio, - stream_flag); + if (priority != 1 && priority != 2) { + PADDLE_THROW(platform::errors::InvalidArgument( + "Priority should be 1(high) or 2(normal) ")); + } + auto prio = paddle::platform::stream::Priority(priority); + auto stream_flag = + paddle::platform::stream::StreamFlag::kStreamNonBlocking; + + if (device == nullptr) { + int curr_device_id = platform::GetCurrentDeviceId(); + auto device_tmp = platform::CUDAPlace(curr_device_id); + device = &device_tmp; + } + + new (&self) paddle::platform::stream::CUDAStream(*device, prio, + stream_flag); #else PADDLE_THROW(platform::errors::Unavailable( "Class CUDAStream can only be initialized on the GPU platform.")); #endif - }, - py::arg("device") = nullptr, py::arg("priority") = 2) + }, + py::arg("device") = nullptr, py::arg("priority") = 2) .def( "__init__", [](paddle::platform::stream::CUDAStream &self, int device, @@ -315,15 +324,16 @@ void BindCudaStream(py::module *m_ptr) { )DOC") #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - .def("record", - [](paddle::platform::CudaEvent &self, - paddle::platform::stream::CUDAStream *stream) { - if (stream == nullptr) { - stream = paddle::platform::stream::get_current_stream(-1); - } - self.Record(stream->raw_stream()); - }, - R"DOC( + .def( + "record", + [](paddle::platform::CudaEvent &self, + paddle::platform::stream::CUDAStream *stream) { + if (stream == nullptr) { + stream = paddle::platform::stream::get_current_stream(-1); + } + self.Record(stream->raw_stream()); + }, + R"DOC( Records the event in the given stream. Parameters: @@ -338,10 +348,11 @@ void BindCudaStream(py::module *m_ptr) { event.record() )DOC", - py::arg("stream") = nullptr) - .def("query", - [](paddle::platform::CudaEvent &self) { return self.Query(); }, - R"DOC( + py::arg("stream") = nullptr) + .def( + "query", + [](paddle::platform::CudaEvent &self) { return self.Query(); }, + R"DOC( Queries the event's status. Returns: A boolean which indicates all work currently captured by the event has been completed. @@ -355,8 +366,9 @@ void BindCudaStream(py::module *m_ptr) { is_done = event.query() )DOC") - .def("synchronize", - [](paddle::platform::CudaEvent &self) { self.Synchronize(); }, R"DOC( + .def( + "synchronize", + [](paddle::platform::CudaEvent &self) { self.Synchronize(); }, R"DOC( Waits for an event to complete. Examples: @@ -369,22 +381,23 @@ void BindCudaStream(py::module *m_ptr) { )DOC") #endif - .def("__init__", - [](paddle::platform::CudaEvent &self, bool enable_timing, - bool blocking, bool interprocess) { + .def( + "__init__", + [](paddle::platform::CudaEvent &self, bool enable_timing, + bool blocking, bool interprocess) { #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - unsigned int flags = platform::GenerateDeviceEventFlag( - enable_timing, blocking, interprocess); - new (&self) paddle::platform::CudaEvent(flags); + unsigned int flags = platform::GenerateDeviceEventFlag( + enable_timing, blocking, interprocess); + new (&self) paddle::platform::CudaEvent(flags); #else - PADDLE_THROW(platform::errors::Unavailable( - "Class CUDAEvent can only be initialized on the GPU " - "platform.")); + PADDLE_THROW(platform::errors::Unavailable( + "Class CUDAEvent can only be initialized on the GPU " + "platform.")); #endif - }, - py::arg("enable_timing") = false, py::arg("blocking") = false, - py::arg("interprocess") = false); + }, + py::arg("enable_timing") = false, py::arg("blocking") = false, + py::arg("interprocess") = false); } } // namespace pybind diff --git a/paddle/fluid/pybind/data_set_py.cc b/paddle/fluid/pybind/data_set_py.cc index 5e2274cb651..700bd458a58 100644 --- a/paddle/fluid/pybind/data_set_py.cc +++ b/paddle/fluid/pybind/data_set_py.cc @@ -24,6 +24,7 @@ limitations under the License. */ #include #include #include + #include "google/protobuf/io/zero_copy_stream_impl.h" #include "google/protobuf/text_format.h" #include "paddle/fluid/framework/async_executor.h" diff --git a/paddle/fluid/pybind/distributed_py.cc b/paddle/fluid/pybind/distributed_py.cc index 6636fc8aca5..3d1a81da6f3 100644 --- a/paddle/fluid/pybind/distributed_py.cc +++ b/paddle/fluid/pybind/distributed_py.cc @@ -109,132 +109,141 @@ void BindDistributed(py::module *m) { .def("rank", &distributed::ProcessGroup::GetRank) .def("size", &distributed::ProcessGroup::GetSize) .def("name", &distributed::ProcessGroup::GetBackendName) - .def("allreduce", - [](distributed::ProcessGroup &self, py::handle py_tensor, - distributed::ReduceOp op) { - auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0); - distributed::AllreduceOptions opts; - opts.reduce_op = op; - auto dense = - std::dynamic_pointer_cast(tensor.impl()); - std::vector tensors = {*dense}; - return self.AllReduce(tensors, tensors, opts); - }, - py::arg("tensor"), py::arg("op") = distributed::ReduceOp::SUM, - py::call_guard()) - - .def("broadcast", - [](distributed::ProcessGroup &self, py::handle py_tensor, - int source_rank) { - auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0); - distributed::BroadcastOptions opts; - opts.source_rank = source_rank; - auto dense = - std::dynamic_pointer_cast(tensor.impl()); - std::vector tensors = {*dense}; - return self.Broadcast(tensors, tensors, opts); - }, - py::arg("tensor"), py::arg("source_rank"), - py::call_guard()) - - .def("barrier", - [](distributed::ProcessGroup &self, std::vector place_ids) { - distributed::BarrierOptions opts; - opts.place_ids = place_ids; - return self.Barrier(opts); - }, - py::arg("place_ids") = std::vector{}, - py::call_guard()) - - .def("send", - [](distributed::ProcessGroup &self, py::handle py_tensor, - int dst) { - auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0); - auto dense = - std::dynamic_pointer_cast(tensor.impl()); - std::vector tensors = {*dense}; - return self.Send(tensors, dst); - }, - py::arg("tensor"), py::arg("dst"), - py::call_guard()) - - .def("recv", - [](distributed::ProcessGroup &self, py::handle py_tensor, - int src) { - auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0); - auto dense = - std::dynamic_pointer_cast(tensor.impl()); - std::vector tensors = {*dense}; - return self.Recv(tensors, src); - }, - py::arg("tensor"), py::arg("src"), - py::call_guard()) - - .def("all_gather", - [](distributed::ProcessGroup &self, py::handle py_in_tensor, - py::handle py_out_tensor) { - auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0); - auto out_tensor = CastPyArg2Tensor(py_out_tensor.ptr(), 0); - auto in_dense = std::dynamic_pointer_cast( - in_tensor.impl()); - auto out_dense = std::dynamic_pointer_cast( - out_tensor.impl()); - std::vector in_tensors = {*in_dense}; - std::vector out_tensors = {*out_dense}; - return self.AllGather(in_tensors, out_tensors); - }, - py::arg("in"), py::arg("out"), - py::call_guard()) - - .def("alltoall", - [](distributed::ProcessGroup &self, py::handle py_in_tensor, - py::handle py_out_tensor) { - auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0); - auto out_tensor = CastPyArg2Tensor(py_out_tensor.ptr(), 0); - auto in_dense = std::dynamic_pointer_cast( - in_tensor.impl()); - auto out_dense = std::dynamic_pointer_cast( - out_tensor.impl()); - std::vector in_tensors = {*in_dense}; - std::vector out_tensors = {*out_dense}; - return self.AllToAll(in_tensors, out_tensors); - }, - py::arg("in"), py::arg("out"), - py::call_guard()) - - .def("reduce", - [](distributed::ProcessGroup &self, py::handle py_in_tensor, - int dst, distributed::ReduceOp op) { - auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0); - distributed::ReduceOptions opts; - opts.reduce_op = op; - opts.root_rank = dst; - auto dense = std::dynamic_pointer_cast( - in_tensor.impl()); - std::vector tensors = {*dense}; - return self.Reduce(tensors, tensors, opts); - }, - py::arg("tensor"), py::arg("dst"), - py::arg("op") = distributed::ReduceOp::SUM, - py::call_guard()) - - .def("scatter", - [](distributed::ProcessGroup &self, py::handle py_in_tensor, - py::handle py_out_tensor, int src) { - auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0); - auto out_tensor = CastPyArg2Tensor(py_out_tensor.ptr(), 0); - distributed::ScatterOptions opts; - opts.root_rank = src; - auto in_dense = std::dynamic_pointer_cast( - in_tensor.impl()); - auto out_dense = std::dynamic_pointer_cast( - out_tensor.impl()); - std::vector in_tensors = {*in_dense}; - std::vector out_tensors = {*out_dense}; - return self.Scatter(in_tensors, out_tensors, opts); - }, - py::arg("in"), py::arg("out"), py::arg("src"), - py::call_guard()); + .def( + "allreduce", + [](distributed::ProcessGroup &self, py::handle py_tensor, + distributed::ReduceOp op) { + auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0); + distributed::AllreduceOptions opts; + opts.reduce_op = op; + auto dense = + std::dynamic_pointer_cast(tensor.impl()); + std::vector tensors = {*dense}; + return self.AllReduce(tensors, tensors, opts); + }, + py::arg("tensor"), py::arg("op") = distributed::ReduceOp::SUM, + py::call_guard()) + + .def( + "broadcast", + [](distributed::ProcessGroup &self, py::handle py_tensor, + int source_rank) { + auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0); + distributed::BroadcastOptions opts; + opts.source_rank = source_rank; + auto dense = + std::dynamic_pointer_cast(tensor.impl()); + std::vector tensors = {*dense}; + return self.Broadcast(tensors, tensors, opts); + }, + py::arg("tensor"), py::arg("source_rank"), + py::call_guard()) + + .def( + "barrier", + [](distributed::ProcessGroup &self, std::vector place_ids) { + distributed::BarrierOptions opts; + opts.place_ids = place_ids; + return self.Barrier(opts); + }, + py::arg("place_ids") = std::vector{}, + py::call_guard()) + + .def( + "send", + [](distributed::ProcessGroup &self, py::handle py_tensor, + int dst) { + auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0); + auto dense = + std::dynamic_pointer_cast(tensor.impl()); + std::vector tensors = {*dense}; + return self.Send(tensors, dst); + }, + py::arg("tensor"), py::arg("dst"), + py::call_guard()) + + .def( + "recv", + [](distributed::ProcessGroup &self, py::handle py_tensor, + int src) { + auto tensor = CastPyArg2Tensor(py_tensor.ptr(), 0); + auto dense = + std::dynamic_pointer_cast(tensor.impl()); + std::vector tensors = {*dense}; + return self.Recv(tensors, src); + }, + py::arg("tensor"), py::arg("src"), + py::call_guard()) + + .def( + "all_gather", + [](distributed::ProcessGroup &self, py::handle py_in_tensor, + py::handle py_out_tensor) { + auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0); + auto out_tensor = CastPyArg2Tensor(py_out_tensor.ptr(), 0); + auto in_dense = std::dynamic_pointer_cast( + in_tensor.impl()); + auto out_dense = std::dynamic_pointer_cast( + out_tensor.impl()); + std::vector in_tensors = {*in_dense}; + std::vector out_tensors = {*out_dense}; + return self.AllGather(in_tensors, out_tensors); + }, + py::arg("in"), py::arg("out"), + py::call_guard()) + + .def( + "alltoall", + [](distributed::ProcessGroup &self, py::handle py_in_tensor, + py::handle py_out_tensor) { + auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0); + auto out_tensor = CastPyArg2Tensor(py_out_tensor.ptr(), 0); + auto in_dense = std::dynamic_pointer_cast( + in_tensor.impl()); + auto out_dense = std::dynamic_pointer_cast( + out_tensor.impl()); + std::vector in_tensors = {*in_dense}; + std::vector out_tensors = {*out_dense}; + return self.AllToAll(in_tensors, out_tensors); + }, + py::arg("in"), py::arg("out"), + py::call_guard()) + + .def( + "reduce", + [](distributed::ProcessGroup &self, py::handle py_in_tensor, + int dst, distributed::ReduceOp op) { + auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0); + distributed::ReduceOptions opts; + opts.reduce_op = op; + opts.root_rank = dst; + auto dense = std::dynamic_pointer_cast( + in_tensor.impl()); + std::vector tensors = {*dense}; + return self.Reduce(tensors, tensors, opts); + }, + py::arg("tensor"), py::arg("dst"), + py::arg("op") = distributed::ReduceOp::SUM, + py::call_guard()) + + .def( + "scatter", + [](distributed::ProcessGroup &self, py::handle py_in_tensor, + py::handle py_out_tensor, int src) { + auto in_tensor = CastPyArg2Tensor(py_in_tensor.ptr(), 0); + auto out_tensor = CastPyArg2Tensor(py_out_tensor.ptr(), 0); + distributed::ScatterOptions opts; + opts.root_rank = src; + auto in_dense = std::dynamic_pointer_cast( + in_tensor.impl()); + auto out_dense = std::dynamic_pointer_cast( + out_tensor.impl()); + std::vector in_tensors = {*in_dense}; + std::vector out_tensors = {*out_dense}; + return self.Scatter(in_tensors, out_tensors, opts); + }, + py::arg("in"), py::arg("out"), py::arg("src"), + py::call_guard()); #if defined(PADDLE_WITH_NCCL) py::class_def("eager_assign_group_by_size", - [](py::handle py_tensors, std::vector is_sparse_gradient, - std::vector group_size_limits, - std::vector tensor_indices) { - auto tensors = CastPyArg2VectorOfTensor(py_tensors.ptr(), 0); - return distributed::Eager_AssignGroupBySize( - tensors, is_sparse_gradient, group_size_limits, tensor_indices); - }, - py::arg("tensors"), py::arg("is_sparse_gradient"), - py::arg("group_size_limits") = std::vector{25 * 1024 * 1024}, - py::arg("tensor_indices") = std::vector{}, - py::call_guard()); + m->def( + "eager_assign_group_by_size", + [](py::handle py_tensors, std::vector is_sparse_gradient, + std::vector group_size_limits, + std::vector tensor_indices) { + auto tensors = CastPyArg2VectorOfTensor(py_tensors.ptr(), 0); + return distributed::Eager_AssignGroupBySize( + tensors, is_sparse_gradient, group_size_limits, tensor_indices); + }, + py::arg("tensors"), py::arg("is_sparse_gradient"), + py::arg("group_size_limits") = std::vector{25 * 1024 * 1024}, + py::arg("tensor_indices") = std::vector{}, + py::call_guard()); py::class_>(*m, "EagerReducer", R"DOC()DOC") .def(py::init(&CreateEagerReducer)) - .def("prepare_for_backward", - [](distributed::EagerReducer &self, py::handle py_tensors) { - auto params = CastPyArg2VectorOfTensor(py_tensors.ptr(), 0); - self.PrepareForBackward(params); - }, - py::arg("tensors"), py::call_guard()); + .def( + "prepare_for_backward", + [](distributed::EagerReducer &self, py::handle py_tensors) { + auto params = CastPyArg2VectorOfTensor(py_tensors.ptr(), 0); + self.PrepareForBackward(params); + }, + py::arg("tensors"), py::call_guard()); } } // end namespace pybind diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc index c1b26ee0b79..f9325d1b9ca 100644 --- a/paddle/fluid/pybind/eager.cc +++ b/paddle/fluid/pybind/eager.cc @@ -9,6 +9,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ // disable numpy compile error +#include "paddle/fluid/pybind/eager.h" + #include #include @@ -22,7 +24,6 @@ limitations under the License. */ #include "paddle/fluid/memory/allocation/allocator.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/pybind/eager.h" #include "paddle/fluid/pybind/eager_utils.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/compat/convert_utils.h" @@ -488,45 +489,45 @@ void AutoInitStringTensorByStringTensor( } /** We should have init function with signature: - * 1. - * def __init__ () - * 2. - * def __init__ ( - * ** dtype: paddle::framework::proto::VarType::Type, - * ** dims: vector, - * ** name: std::string, - * ** type: paddle::framework::proto::VarType::LodTensor, - * ** persistable: bool) - * 3. (multi-place) - * (should have at least one parameter, one parameter equals to case 4, zero - * parameter equals to case 1) - * def __init__ ( - * ** value: ndarray, - * ** place: paddle::platform::Place, - * ** persistable: bool, - * ** zero_copy: bool, - * ** name: std::string, - * ** stop_gradient: bool) - * 4. - * def __init__ ( - * ** value: ndarray) - * 5. - * def __init__ ( - * ** tensor: Tensor) - * 6. (multi-place) - * (should have at least one parameter, one parameter equals to case 5, zero - * parameter equals to case 1.) - * def __init__ ( - * ** tensor: Tensor, - * ** place: paddle::platform::Place, - * ** name: std::string) - * 7. (multi-place) (should have at least one parameter, one parameter similar - * to case 5, zero parameter equals to case 1.) - * def __init__ ( - * ** tensor: FrameworkTensor, - * ** place: paddle::platform::Place, - * ** name: std::string) - * **/ + * 1. + * def __init__ () + * 2. + * def __init__ ( + * ** dtype: paddle::framework::proto::VarType::Type, + * ** dims: vector, + * ** name: std::string, + * ** type: paddle::framework::proto::VarType::LodTensor, + * ** persistable: bool) + * 3. (multi-place) + * (should have at least one parameter, one parameter equals to case 4, zero + * parameter equals to case 1) + * def __init__ ( + * ** value: ndarray, + * ** place: paddle::platform::Place, + * ** persistable: bool, + * ** zero_copy: bool, + * ** name: std::string, + * ** stop_gradient: bool) + * 4. + * def __init__ ( + * ** value: ndarray) + * 5. + * def __init__ ( + * ** tensor: Tensor) + * 6. (multi-place) + * (should have at least one parameter, one parameter equals to case 5, zero + * parameter equals to case 1.) + * def __init__ ( + * ** tensor: Tensor, + * ** place: paddle::platform::Place, + * ** name: std::string) + * 7. (multi-place) (should have at least one parameter, one parameter similar + * to case 5, zero parameter equals to case 1.) + * def __init__ ( + * ** tensor: FrameworkTensor, + * ** place: paddle::platform::Place, + * ** name: std::string) + * **/ int TensorInit(PyObject* self, PyObject* args, PyObject* kwargs) { EAGER_TRY // set a flag to record use kwargs or not @@ -828,37 +829,37 @@ int TensorInit(PyObject* self, PyObject* args, PyObject* kwargs) { } /** We should have init function with signature: - * 1. - * def __init__ () - * - * 2. - * def __init__ ( - * ** dims: vector, - * ** name: std::string) - * - * 3. - * (should have at least one parameter, one parameter equals to case 4, zero - * parameter equals to case 1) - * def __init__ ( - * ** value: ndarray, - * ** zero_copy: bool, - * ** name: std::string) - * - * 4. - * def __init__ ( - * ** value: ndarray) - * - * 5. - * def __init__ ( - * ** tensor: Tensor) - * - * 6. - * (should have at least one parameter, one parameter equals to case 5, zero - * parameter equals to case 1.) - * def __init__ ( - * ** tensor: Tensor, - * ** name: std::string) - * **/ + * 1. + * def __init__ () + * + * 2. + * def __init__ ( + * ** dims: vector, + * ** name: std::string) + * + * 3. + * (should have at least one parameter, one parameter equals to case 4, zero + * parameter equals to case 1) + * def __init__ ( + * ** value: ndarray, + * ** zero_copy: bool, + * ** name: std::string) + * + * 4. + * def __init__ ( + * ** value: ndarray) + * + * 5. + * def __init__ ( + * ** tensor: Tensor) + * + * 6. + * (should have at least one parameter, one parameter equals to case 5, zero + * parameter equals to case 1.) + * def __init__ ( + * ** tensor: Tensor, + * ** name: std::string) + * **/ int StringTensorInit(PyObject* self, PyObject* args, PyObject* kwargs) { // set a flag to record use kwargs or not bool flag_kwargs = false; @@ -916,8 +917,9 @@ int StringTensorInit(PyObject* self, PyObject* args, PyObject* kwargs) { // case 1 VLOG(6) << "Calling case1's string initializer."; EmptyStringTensorInitializer( - py_tensor_ptr, egr::Controller::Instance().GenerateUniqueName( - "generated_string_tensor"), + py_tensor_ptr, + egr::Controller::Instance().GenerateUniqueName( + "generated_string_tensor"), egr::Controller::Instance().GetExpectedPlace()); return 0; } else { diff --git a/paddle/fluid/pybind/eager.h b/paddle/fluid/pybind/eager.h index a3eac7ab470..db2b438c3bd 100644 --- a/paddle/fluid/pybind/eager.h +++ b/paddle/fluid/pybind/eager.h @@ -11,11 +11,11 @@ limitations under the License. */ #pragma once #include -#include "pybind11/pybind11.h" -#include "pybind11/stl.h" #include "paddle/fluid/eager/pylayer/py_layer_node.h" #include "paddle/phi/core/dense_tensor.h" +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" namespace paddle { namespace pybind { diff --git a/paddle/fluid/pybind/eager_custom_python_api.h b/paddle/fluid/pybind/eager_custom_python_api.h index a3e996dbcbf..df4920a5e69 100644 --- a/paddle/fluid/pybind/eager_custom_python_api.h +++ b/paddle/fluid/pybind/eager_custom_python_api.h @@ -14,6 +14,7 @@ #pragma once #include + #include "paddle/phi/core/enforce.h" static PyObject *eager_api_run_program(PyObject *self, PyObject *args, diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc index 628e808ef99..c75ac0b52c5 100644 --- a/paddle/fluid/pybind/eager_functions.cc +++ b/paddle/fluid/pybind/eager_functions.cc @@ -20,9 +20,6 @@ typedef SSIZE_T ssize_t; #include #include -#include "pybind11/numpy.h" -#include "pybind11/pybind11.h" - #include "paddle/fluid/eager/accumulation/accumulation_node.h" #include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/autograd_meta.h" @@ -51,6 +48,8 @@ typedef SSIZE_T ssize_t; #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" +#include "pybind11/numpy.h" +#include "pybind11/pybind11.h" namespace paddle { namespace pybind { diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 7831530bff0..ab6b8edd52e 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -21,9 +21,6 @@ typedef SSIZE_T ssize_t; #include #include -#include "pybind11/numpy.h" -#include "pybind11/pybind11.h" - #include "paddle/fluid/eager/accumulation/accumulation_node.h" #include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/api/generated/fluid_generated/dygraph_forward_api.h" @@ -47,6 +44,8 @@ typedef SSIZE_T ssize_t; #include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/core/sparse_csr_tensor.h" #include "pybind11/detail/internals.h" +#include "pybind11/numpy.h" +#include "pybind11/pybind11.h" #pragma GCC diagnostic ignored "-Wmissing-field-initializers" #include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h" #include "paddle/fluid/framework/python_headers.h" @@ -1007,10 +1006,11 @@ static PyObject* tensor_method__setitem_eager_tensor(TensorObject* self, PADDLE_ENFORCE_EQ( egr::egr_utils_api::IsLeafTensor(self->tensor) && !egr::EagerUtils::autograd_meta(&self->tensor)->StopGradient(), - false, platform::errors::InvalidArgument( - "Leaf Tensor (%s) that doesn't stop gradient can't use " - "inplace strategy.", - self->tensor.name())); + false, + platform::errors::InvalidArgument( + "Leaf Tensor (%s) that doesn't stop gradient can't use " + "inplace strategy.", + self->tensor.name())); } paddle::experimental::Tensor value_tensor; @@ -1232,9 +1232,10 @@ static PyObject* tensor_register_reduce_hook(TensorObject* self, PyObject* args, "Only can register backward hook for leaf Tensor.")); PADDLE_ENFORCE_EQ( !egr::EagerUtils::unsafe_autograd_meta(self->tensor)->StopGradient(), - true, platform::errors::InvalidArgument( - "Cannot register backward hook on a Tensor that stop " - "gradient.")); + true, + platform::errors::InvalidArgument( + "Cannot register backward hook on a Tensor that stop " + "gradient.")); PADDLE_ENFORCE( grad_node.get() != nullptr, paddle::platform::errors::Fatal("Detected NULL grad_node," @@ -1667,8 +1668,8 @@ PyMethodDef variable_methods[] = { (PyCFunction)(void (*)(void))tensor_method__is_initialized, METH_VARARGS | METH_KEYWORDS, NULL}, {"_is_dense_tensor_hold_allocation", - (PyCFunction)( - void (*)(void))tensor_method__is_dense_tensor_hold_allocation, + (PyCFunction)(void (*)( + void))tensor_method__is_dense_tensor_hold_allocation, METH_VARARGS | METH_KEYWORDS, NULL}, {"_copy_to", (PyCFunction)(void (*)(void))tensor_method__copy_to, METH_VARARGS | METH_KEYWORDS, NULL}, @@ -1793,8 +1794,8 @@ PyMethodDef string_tensor_variable_methods[] = { (PyCFunction)(void (*)(void))tensor_method__is_initialized, METH_VARARGS | METH_KEYWORDS, NULL}, {"_is_string_tensor_hold_allocation", - (PyCFunction)( - void (*)(void))tensor_method__is_string_tensor_hold_allocation, + (PyCFunction)(void (*)( + void))tensor_method__is_string_tensor_hold_allocation, METH_VARARGS | METH_KEYWORDS, NULL}, // TODO(zhoushunjie): Need to add _copy_to, copy_ for StringTensor. {NULL, NULL, 0, NULL}}; diff --git a/paddle/fluid/pybind/eager_op_function_generator.cc b/paddle/fluid/pybind/eager_op_function_generator.cc index b546aa2d76b..f58f3ce9453 100644 --- a/paddle/fluid/pybind/eager_op_function_generator.cc +++ b/paddle/fluid/pybind/eager_op_function_generator.cc @@ -486,7 +486,8 @@ int main(int argc, char* argv[]) { "\"paddle/fluid/pybind/op_function_common.h\"", "\"paddle/fluid/eager/api/generated/fluid_generated/" "dygraph_forward_api.h\"", - "\"paddle/fluid/pybind/exception.h\"", ""}; + "\"paddle/fluid/pybind/exception.h\"", + ""}; std::ofstream out(argv[1], std::ios::out); diff --git a/paddle/fluid/pybind/eager_py_layer.cc b/paddle/fluid/pybind/eager_py_layer.cc index 47a5309d691..a0cef6388c1 100644 --- a/paddle/fluid/pybind/eager_py_layer.cc +++ b/paddle/fluid/pybind/eager_py_layer.cc @@ -16,8 +16,6 @@ limitations under the License. */ #include #pragma GCC diagnostic ignored "-Wattributes" -#include "pybind11/pytypes.h" - #include "paddle/fluid/eager/accumulation/accumulation_node.h" #include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/autograd_meta.h" @@ -34,6 +32,7 @@ limitations under the License. */ #include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/dense_tensor.h" #include "pybind11/detail/internals.h" +#include "pybind11/pytypes.h" #pragma GCC diagnostic ignored "-Wwrite-strings" #pragma GCC diagnostic ignored "-Wmissing-field-initializers" @@ -323,10 +322,11 @@ PyObject* pylayer_method_apply(PyObject* cls, PyObject* args, egr::EagerUtils::autograd_meta(dirty_tensor); PADDLE_ENFORCE_EQ(!dirty_tensor_autograd_meta->StopGradient() && egr::egr_utils_api::IsLeafTensor(*dirty_tensor), - false, paddle::platform::errors::InvalidArgument( - "Leaf Var (%s) that doesn't stop gradient " - "can't use inplace strategy.", - dirty_tensor->name())); + false, + paddle::platform::errors::InvalidArgument( + "Leaf Var (%s) that doesn't stop gradient " + "can't use inplace strategy.", + dirty_tensor->name())); dirty_tensor->bump_inplace_version(); VLOG(3) << "Tensor(" << dirty_tensor->name() << ") uses Inplace Strategy."; @@ -466,16 +466,19 @@ PyMethodDef pylayer_methods[] = { METH_O, NULL}, {NULL, NULL, 0, NULL}}; -struct PyGetSetDef pylayer_properties[]{ - {"container", (getter)tensor_properties_get_container, - (setter)tensor_properties_set_container, nullptr, nullptr}, - {"non_differentiable", (getter)tensor_properties_get_non_differentiable, - (setter)tensor_properties_set_non_differentiable, nullptr, nullptr}, - {"dirty_tensors", (getter)tensor_properties_get_dirty_tensors, - (setter)tensor_properties_set_dirty_tensors, nullptr, nullptr}, - {"materialize_grads", nullptr, - (setter)tensor_properties_set_materialize_grads, nullptr, nullptr}, - {nullptr, nullptr, nullptr, nullptr, nullptr}}; +struct PyGetSetDef pylayer_properties[] { + {"container", (getter)tensor_properties_get_container, + (setter)tensor_properties_set_container, nullptr, nullptr}, + {"non_differentiable", (getter)tensor_properties_get_non_differentiable, + (setter)tensor_properties_set_non_differentiable, nullptr, nullptr}, + {"dirty_tensors", (getter)tensor_properties_get_dirty_tensors, + (setter)tensor_properties_set_dirty_tensors, nullptr, nullptr}, + {"materialize_grads", nullptr, + (setter)tensor_properties_set_materialize_grads, nullptr, nullptr}, + { + nullptr, nullptr, nullptr, nullptr, nullptr + } +}; void BindEagerPyLayer(PyObject* module) { auto heap_type = reinterpret_cast( diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index efa0fe2cb58..9bcac35037d 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -14,6 +14,9 @@ limitations under the License. */ #include #include +// clang-format will try to move eager_utils.h in front of other headers +// according to google c++ style, and that cause compiling problems. +// clang-format off #include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/framework/convert_utils.h" @@ -31,6 +34,7 @@ limitations under the License. */ #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/dense_tensor.h" +// clang-format on namespace paddle { namespace pybind { diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h index 7f94f6c90e5..beab99877bd 100644 --- a/paddle/fluid/pybind/eager_utils.h +++ b/paddle/fluid/pybind/eager_utils.h @@ -16,12 +16,12 @@ typedef SSIZE_T ssize_t; #endif #include + #include "paddle/phi/common/backend.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/int_array.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/dense_tensor.h" - #include "pybind11/pybind11.h" #include "pybind11/stl.h" namespace paddle { @@ -112,8 +112,9 @@ struct TupleTensorResult { PyObject* args, ssize_t arg_idx) { TupleTensorResult::Run(out, result, value_idx, args, arg_idx); if (N - 1 == value_idx) { - PyTuple_SET_ITEM(result, N - 1, ToPyObject(std::get(out), - value_idx, args, arg_idx)); + PyTuple_SET_ITEM( + result, N - 1, + ToPyObject(std::get(out), value_idx, args, arg_idx)); } else { PyTuple_SET_ITEM(result, N - 1, ToPyObject(std::get(out))); } diff --git a/paddle/fluid/pybind/exception.cc b/paddle/fluid/pybind/exception.cc index 4f25a6f1a5c..934a9ef97fb 100644 --- a/paddle/fluid/pybind/exception.cc +++ b/paddle/fluid/pybind/exception.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/pybind/exception.h" + #include "paddle/phi/api/ext/exception.h" namespace paddle { namespace pybind { diff --git a/paddle/fluid/pybind/fleet_py.cc b/paddle/fluid/pybind/fleet_py.cc index 4ffb513671c..25f2c910028 100644 --- a/paddle/fluid/pybind/fleet_py.cc +++ b/paddle/fluid/pybind/fleet_py.cc @@ -18,8 +18,6 @@ limitations under the License. */ #undef _XOPEN_SOURCE #endif -#include "paddle/fluid/pybind/fleet_py.h" - #include #include #include @@ -35,17 +33,18 @@ limitations under the License. */ #include "paddle/fluid/distributed/ps/service/ps_service/graph_py_service.h" #include "paddle/fluid/distributed/ps/wrapper/fleet.h" #include "paddle/fluid/framework/fleet/heter_ps/graph_gpu_wrapper.h" +#include "paddle/fluid/pybind/fleet_py.h" namespace py = pybind11; using paddle::distributed::CommContext; using paddle::distributed::Communicator; +using paddle::distributed::FeatureNode; using paddle::distributed::FleetWrapper; -using paddle::distributed::HeterClient; -using paddle::distributed::GraphPyService; using paddle::distributed::GraphNode; -using paddle::distributed::GraphPyServer; using paddle::distributed::GraphPyClient; -using paddle::distributed::FeatureNode; +using paddle::distributed::GraphPyServer; +using paddle::distributed::GraphPyService; +using paddle::distributed::HeterClient; namespace paddle { namespace pybind { @@ -246,13 +245,13 @@ void BindGraphPyClient(py::module* m) { .def("bind_local_server", &GraphPyClient::bind_local_server); } -using paddle::distributed::TreeIndex; -using paddle::distributed::IndexWrapper; using paddle::distributed::IndexNode; +using paddle::distributed::IndexWrapper; +using paddle::distributed::TreeIndex; #ifdef PADDLE_WITH_HETERPS using paddle::framework::GraphGpuWrapper; -using paddle::framework::NeighborSampleResult; using paddle::framework::NeighborSampleQuery; +using paddle::framework::NeighborSampleResult; using paddle::framework::NodeQueryResult; #endif diff --git a/paddle/fluid/pybind/fleet_wrapper_py.cc b/paddle/fluid/pybind/fleet_wrapper_py.cc index af1c3da727d..0e1d4cd76ad 100644 --- a/paddle/fluid/pybind/fleet_wrapper_py.cc +++ b/paddle/fluid/pybind/fleet_wrapper_py.cc @@ -46,10 +46,10 @@ void BindFleetWrapper(py::module* m) { .def("push_dense", &framework::FleetWrapper::PushDenseVarsSync) .def("pull_dense", &framework::FleetWrapper::PullDenseVarsSync) .def("init_server", &framework::FleetWrapper::InitServer) - .def("run_server", (uint64_t (framework::FleetWrapper::*)(void)) & + .def("run_server", (uint64_t(framework::FleetWrapper::*)(void)) & framework::FleetWrapper::RunServer) - .def("run_server", (uint64_t (framework::FleetWrapper::*)( // NOLINT - const std::string&, uint32_t)) & // NOLINT + .def("run_server", (uint64_t(framework::FleetWrapper::*)( // NOLINT + const std::string&, uint32_t)) & // NOLINT framework::FleetWrapper::RunServer) .def("init_worker", &framework::FleetWrapper::InitWorker) .def("init_model", &framework::FleetWrapper::PushDenseParamSync) diff --git a/paddle/fluid/pybind/generator_py.cc b/paddle/fluid/pybind/generator_py.cc index 6bb85da8c46..e456526f844 100644 --- a/paddle/fluid/pybind/generator_py.cc +++ b/paddle/fluid/pybind/generator_py.cc @@ -8,9 +8,10 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/phi/core/generator.h" #include +#include "paddle/phi/core/generator.h" + #ifdef _POSIX_C_SOURCE #undef _POSIX_C_SOURCE #endif diff --git a/paddle/fluid/pybind/gloo_context_py.cc b/paddle/fluid/pybind/gloo_context_py.cc index 2314ceac76e..b4ee1bcd02b 100644 --- a/paddle/fluid/pybind/gloo_context_py.cc +++ b/paddle/fluid/pybind/gloo_context_py.cc @@ -43,13 +43,14 @@ void BindGlooContext(py::module *m) { py::class_ gloo_parallel_strategy( *m, "GlooParallelStrategy", ""); gloo_parallel_strategy.def(py::init()) - .def_property("rank_num", - [](const platform::GlooParallelStrategy &self) { - return self.rank_num; - }, - [](platform::GlooParallelStrategy &self, int nranks) { - self.rank_num = nranks; - }) + .def_property( + "rank_num", + [](const platform::GlooParallelStrategy &self) { + return self.rank_num; + }, + [](platform::GlooParallelStrategy &self, int nranks) { + self.rank_num = nranks; + }) .def_property( "rank", [](const platform::GlooParallelStrategy &self) { return self.rank; }, @@ -62,20 +63,22 @@ void BindGlooContext(py::module *m) { [](platform::GlooParallelStrategy &self, const std::string &iface) { self.iface = iface; }) - .def_property("init_seconds", - [](const platform::GlooParallelStrategy &self) { - return self.init_seconds; - }, - [](platform::GlooParallelStrategy &self, int init_seconds) { - self.init_seconds = init_seconds; - }) - .def_property("run_seconds", - [](const platform::GlooParallelStrategy &self) { - return self.run_seconds; - }, - [](platform::GlooParallelStrategy &self, int run_seconds) { - self.run_seconds = run_seconds; - }) + .def_property( + "init_seconds", + [](const platform::GlooParallelStrategy &self) { + return self.init_seconds; + }, + [](platform::GlooParallelStrategy &self, int init_seconds) { + self.init_seconds = init_seconds; + }) + .def_property( + "run_seconds", + [](const platform::GlooParallelStrategy &self) { + return self.run_seconds; + }, + [](platform::GlooParallelStrategy &self, int run_seconds) { + self.run_seconds = run_seconds; + }) .def_property( "ip_address", [](const platform::GlooParallelStrategy &self) { @@ -83,13 +86,14 @@ void BindGlooContext(py::module *m) { }, [](platform::GlooParallelStrategy &self, const std::string &ip_address) { self.ip_address = ip_address; }) - .def_property("ip_port", - [](const platform::GlooParallelStrategy &self) { - return self.ip_port; - }, - [](platform::GlooParallelStrategy &self, int ip_port) { - self.ip_port = ip_port; - }); + .def_property( + "ip_port", + [](const platform::GlooParallelStrategy &self) { + return self.ip_port; + }, + [](platform::GlooParallelStrategy &self, int ip_port) { + self.ip_port = ip_port; + }); py::class_ gloo_ctx(*m, "GlooParallelContext"); gloo_ctx.def(py::init()) diff --git a/paddle/fluid/pybind/gloo_context_py.h b/paddle/fluid/pybind/gloo_context_py.h index 89bd183097b..51f736ed060 100644 --- a/paddle/fluid/pybind/gloo_context_py.h +++ b/paddle/fluid/pybind/gloo_context_py.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "pybind11/pybind11.h" #include "pybind11/stl.h" diff --git a/paddle/fluid/pybind/imperative.cc b/paddle/fluid/pybind/imperative.cc index 954bac00ddb..3de6c64617d 100644 --- a/paddle/fluid/pybind/imperative.cc +++ b/paddle/fluid/pybind/imperative.cc @@ -159,10 +159,9 @@ static const platform::Place PyObjectToPlace(const py::object &place_obj) { // only initialize varbase, but not its tensor. static void InitVarBaseOnly(imperative::VarBase *self, const std::string &name, bool persistable = false, int stop_gradient = -1) { - auto name_ = name == "" - ? imperative::GetCurrentTracer()->GenerateUniqueName( - "generated_tensor") - : name; + auto name_ = name == "" ? imperative::GetCurrentTracer()->GenerateUniqueName( + "generated_tensor") + : name; VLOG(5) << "Init Tensor as: / name: " << name_ << " / persistable: " << persistable @@ -274,10 +273,9 @@ static void InitVarBaseFromTensorWithArgDefault(imperative::VarBase *self, const std::string &name) { VLOG(4) << "Init VarBase"; auto place = imperative::GetCurrentTracer()->ExpectedPlace(); - auto name_ = name == "" - ? imperative::GetCurrentTracer()->GenerateUniqueName( - "generated_tensor") - : name; + auto name_ = name == "" ? imperative::GetCurrentTracer()->GenerateUniqueName( + "generated_tensor") + : name; new (self) imperative::VarBase(name_); self->SetPersistable(false); self->SetType(framework::proto::VarType::LOD_TENSOR); @@ -299,10 +297,9 @@ static void InitVarBaseFromTensorWithArg(imperative::VarBase *self, const P &place, const std::string &name) { VLOG(4) << "Init VarBase"; - auto name_ = name == "" - ? imperative::GetCurrentTracer()->GenerateUniqueName( - "generated_tensor") - : name; + auto name_ = name == "" ? imperative::GetCurrentTracer()->GenerateUniqueName( + "generated_tensor") + : name; new (self) imperative::VarBase(name_); self->SetPersistable(false); self->SetType(framework::proto::VarType::LOD_TENSOR); @@ -556,38 +553,39 @@ void BindImperative(py::module *m_ptr) { }, py::return_value_policy::take_ownership); - m.def("_array_to_share_memory_tensor", - [](py::object &obj) { - // 1. cast to python array - auto array = obj.cast(); - PADDLE_ENFORCE_NE( - string::Sprintf("%s", array.dtype()).compare("object"), 0, - platform::errors::InvalidArgument( - "Faild to convert input data to a regular ndarray.\n * " - "Usually this means the input data contains nested " - "lists with different lengths.\n * Check the reader " - "function passed to 'set_(sample/sample_list/batch)" - "_generator' to locate the data causes this issue.")); - // 2. construcct LoDTensor - framework::LoDTensor t; - SetTensorFromPyArray(&t, array, - platform::CPUPlace(), true); - // 3. allocate shared memory - void *data_ptr = t.data(); - size_t data_size = t.numel() * framework::DataTypeSize(t.dtype()); - auto shared_writer_holder = - memory::allocation::AllocateMemoryMapWriterAllocation(data_size); - // 4. maintain mmap fd set & backup ipc_name - const std::string &ipc_name = shared_writer_holder->ipc_name(); - memory::allocation::MemoryMapFdSet::Instance().Insert(ipc_name); - // 5. copy data & reset holder - memory::Copy(platform::CPUPlace(), shared_writer_holder->ptr(), - platform::CPUPlace(), data_ptr, data_size); - t.ResetHolder(shared_writer_holder); - - return t; - }, - py::return_value_policy::take_ownership); + m.def( + "_array_to_share_memory_tensor", + [](py::object &obj) { + // 1. cast to python array + auto array = obj.cast(); + PADDLE_ENFORCE_NE( + string::Sprintf("%s", array.dtype()).compare("object"), 0, + platform::errors::InvalidArgument( + "Faild to convert input data to a regular ndarray.\n * " + "Usually this means the input data contains nested " + "lists with different lengths.\n * Check the reader " + "function passed to 'set_(sample/sample_list/batch)" + "_generator' to locate the data causes this issue.")); + // 2. construcct LoDTensor + framework::LoDTensor t; + SetTensorFromPyArray(&t, array, + platform::CPUPlace(), true); + // 3. allocate shared memory + void *data_ptr = t.data(); + size_t data_size = t.numel() * framework::DataTypeSize(t.dtype()); + auto shared_writer_holder = + memory::allocation::AllocateMemoryMapWriterAllocation(data_size); + // 4. maintain mmap fd set & backup ipc_name + const std::string &ipc_name = shared_writer_holder->ipc_name(); + memory::allocation::MemoryMapFdSet::Instance().Insert(ipc_name); + // 5. copy data & reset holder + memory::Copy(platform::CPUPlace(), shared_writer_holder->ptr(), + platform::CPUPlace(), data_ptr, data_size); + t.ResetHolder(shared_writer_holder); + + return t; + }, + py::return_value_policy::take_ownership); m.def("_remove_tensor_list_mmap_fds", [](py::list &tensor_list) { for (size_t i = 0; i < tensor_list.size(); ++i) { @@ -1089,31 +1087,32 @@ void BindImperative(py::module *m_ptr) { self.Name())); return var->CurrentInplaceVersion(); }) - .def("_bump_inplace_version", - [](std::shared_ptr &self) { - // NOTE(liym27): _bump_inplace_version is only used for inplace - // operation - self->BumpInplaceVersion(); - }, - R"DOC( + .def( + "_bump_inplace_version", + [](std::shared_ptr &self) { + // NOTE(liym27): _bump_inplace_version is only used for inplace + // operation + self->BumpInplaceVersion(); + }, + R"DOC( **Notes**: **This API is ONLY available in Dygraph mode.** **This is a very low level API. Users should not use it directly. ** Bump the version whenever the Tensor is modified through an inplace operation. )DOC") - .def("numpy", + .def( + "numpy", - [](imperative::VarBase &self) -> py::array { - const auto &tensor = - self.MutableVar()->Get(); - PADDLE_ENFORCE_EQ( - tensor.IsInitialized(), true, - platform::errors::InvalidArgument( - "Tensor of %s is Empty, please check if it has no data.", - self.Name())); - return TensorToPyArray(tensor, true); - }, - R"DOC( + [](imperative::VarBase &self) -> py::array { + const auto &tensor = self.MutableVar()->Get(); + PADDLE_ENFORCE_EQ( + tensor.IsInitialized(), true, + platform::errors::InvalidArgument( + "Tensor of %s is Empty, please check if it has no data.", + self.Name())); + return TensorToPyArray(tensor, true); + }, + R"DOC( Returns a numpy array shows the value of current Tensor. Returns: @@ -1133,68 +1132,69 @@ void BindImperative(py::module *m_ptr) { x = linear(data) print(x.numpy()) )DOC") - .def("detach", - [](const imperative::VarBase - &self) -> std::shared_ptr { - PADDLE_ENFORCE_EQ( - self.Var().IsInitialized(), true, - platform::errors::InvalidArgument( - "Tensor %s has not been initialized!", self.Name())); + .def( + "detach", + [](const imperative::VarBase &self) + -> std::shared_ptr { + PADDLE_ENFORCE_EQ( + self.Var().IsInitialized(), true, + platform::errors::InvalidArgument( + "Tensor %s has not been initialized!", self.Name())); - PADDLE_ENFORCE_EQ( - self.Var().IsType() || - self.Var().IsType(), - true, - platform::errors::InvalidArgument( - "Type of Tensor[%s] must be LoDTensor or SelectedRows!", - self.Name())); + PADDLE_ENFORCE_EQ( + self.Var().IsType() || + self.Var().IsType(), + true, + platform::errors::InvalidArgument( + "Type of Tensor[%s] must be LoDTensor or SelectedRows!", + self.Name())); - auto detach_var = std::make_shared( - true, "detach_" + self.Name()); + auto detach_var = std::make_shared( + true, "detach_" + self.Name()); - detach_var->SetPersistable(self.Persistable()); - detach_var->SetType(self.Type()); - detach_var->SetDataType(self.DataType()); + detach_var->SetPersistable(self.Persistable()); + detach_var->SetType(self.Type()); + detach_var->SetDataType(self.DataType()); - if (self.Var().IsType()) { - const auto &origin_tensor = - self.Var().Get(); - PADDLE_ENFORCE_EQ( - origin_tensor.IsInitialized(), true, - platform::errors::InvalidArgument( - "Tensor %s has not been initialized!", self.Name())); - - auto *detach_tensor = - detach_var->MutableVar()->GetMutable(); - detach_tensor->ShareDataWith(origin_tensor); - // NOTE(liym27): Call ShareInplaceVersionCounterWith to share the - // same TensorInplaceVersion, which is used to check whether - // inplace - // operations are correct. - detach_tensor->ShareInplaceVersionCounterWith(origin_tensor); - } else { - const auto &origin_selected_rows = - self.Var().Get(); - PADDLE_ENFORCE_EQ( - origin_selected_rows.value().IsInitialized(), true, - platform::errors::InvalidArgument( - "Tensor %s has not been initialized!", self.Name())); - - auto *detach_selected_rows = - detach_var->MutableVar()->GetMutable(); - detach_selected_rows->set_height(origin_selected_rows.height()); - detach_selected_rows->set_rows(origin_selected_rows.rows()); - detach_selected_rows->mutable_value()->ShareDataWith( - origin_selected_rows.value()); - detach_selected_rows->mutable_value() - ->ShareInplaceVersionCounterWith( - origin_selected_rows.value()); - } - VLOG(3) << "The detached Tensor(" << detach_var->Name() - << ") share data with " << self.Name(); - return detach_var; - }, - py::return_value_policy::take_ownership, R"DOC( + if (self.Var().IsType()) { + const auto &origin_tensor = + self.Var().Get(); + PADDLE_ENFORCE_EQ( + origin_tensor.IsInitialized(), true, + platform::errors::InvalidArgument( + "Tensor %s has not been initialized!", self.Name())); + + auto *detach_tensor = + detach_var->MutableVar()->GetMutable(); + detach_tensor->ShareDataWith(origin_tensor); + // NOTE(liym27): Call ShareInplaceVersionCounterWith to share the + // same TensorInplaceVersion, which is used to check whether + // inplace + // operations are correct. + detach_tensor->ShareInplaceVersionCounterWith(origin_tensor); + } else { + const auto &origin_selected_rows = + self.Var().Get(); + PADDLE_ENFORCE_EQ( + origin_selected_rows.value().IsInitialized(), true, + platform::errors::InvalidArgument( + "Tensor %s has not been initialized!", self.Name())); + + auto *detach_selected_rows = + detach_var->MutableVar()->GetMutable(); + detach_selected_rows->set_height(origin_selected_rows.height()); + detach_selected_rows->set_rows(origin_selected_rows.rows()); + detach_selected_rows->mutable_value()->ShareDataWith( + origin_selected_rows.value()); + detach_selected_rows->mutable_value() + ->ShareInplaceVersionCounterWith( + origin_selected_rows.value()); + } + VLOG(3) << "The detached Tensor(" << detach_var->Name() + << ") share data with " << self.Name(); + return detach_var; + }, + py::return_value_policy::take_ownership, R"DOC( Returns a new Tensor, detached from the current graph. It will share data with origin Tensor and always doesn't have a Tensor copy. @@ -1256,23 +1256,23 @@ void BindImperative(py::module *m_ptr) { .def("_gradient_set_empty", &imperative::VarBase::_GradientSetEmpty, py::arg("set_is_empty") = true) .def("_is_gradient_set_empty", &imperative::VarBase::_IsGradientSetEmpty) - .def("clone", - [](std::shared_ptr &self) { - const auto &tensor = self->Var().Get(); - PADDLE_ENFORCE_EQ( - tensor.IsInitialized(), true, - platform::errors::InvalidArgument( - "%s has not been initialized", self->Name())); - auto tracer = imperative::GetCurrentTracer(); - auto new_var = std::make_shared( - true, tracer->GenerateUniqueName(self->Name() + "_clone")); - framework::AttributeMap attrs; - imperative::NameVarBaseMap ins = {{"X", {self}}}; - imperative::NameVarBaseMap outs = {{"Out", {new_var}}}; - tracer->TraceOp("assign", ins, outs, attrs); - return new_var; - }, - py::return_value_policy::copy, R"DOC( + .def( + "clone", + [](std::shared_ptr &self) { + const auto &tensor = self->Var().Get(); + PADDLE_ENFORCE_EQ(tensor.IsInitialized(), true, + platform::errors::InvalidArgument( + "%s has not been initialized", self->Name())); + auto tracer = imperative::GetCurrentTracer(); + auto new_var = std::make_shared( + true, tracer->GenerateUniqueName(self->Name() + "_clone")); + framework::AttributeMap attrs; + imperative::NameVarBaseMap ins = {{"X", {self}}}; + imperative::NameVarBaseMap outs = {{"Out", {new_var}}}; + tracer->TraceOp("assign", ins, outs, attrs); + return new_var; + }, + py::return_value_policy::copy, R"DOC( Returns a new Tensor, which is clone of origin Tensor, and it remains in the current graph. It will always have a Tensor copy. @@ -1305,11 +1305,12 @@ void BindImperative(py::module *m_ptr) { print(x.grad) # None )DOC") .def("_grad_name", &imperative::VarBase::GradVarName) - .def("_grad_value", - [](imperative::VarBase &self) { - return self.MutableGradVar()->Get(); - }, - py::return_value_policy::reference) + .def( + "_grad_value", + [](imperative::VarBase &self) { + return self.MutableGradVar()->Get(); + }, + py::return_value_policy::reference) .def("_set_grad_type", [](imperative::VarBase &self, framework::proto::VarType::Type type) { self.MutableGradVarBase()->SetType(type); @@ -1337,26 +1338,27 @@ void BindImperative(py::module *m_ptr) { } } }) - .def("_grad_ivar", - [](const imperative::VarBase &self) { - auto &grad_var = self.GradVarBase(); - - if (grad_var && grad_var->Var().IsInitialized()) { - auto *tensor = - grad_var->MutableVar()->IsType() - ? grad_var->MutableVar() - ->GetMutable() - : grad_var->MutableVar() - ->GetMutable() - ->mutable_value(); - - if (tensor->IsInitialized()) { - return grad_var; - } - } - return std::shared_ptr(nullptr); - }, - py::return_value_policy::copy) + .def( + "_grad_ivar", + [](const imperative::VarBase &self) { + auto &grad_var = self.GradVarBase(); + + if (grad_var && grad_var->Var().IsInitialized()) { + auto *tensor = + grad_var->MutableVar()->IsType() + ? grad_var->MutableVar() + ->GetMutable() + : grad_var->MutableVar() + ->GetMutable() + ->mutable_value(); + + if (tensor->IsInitialized()) { + return grad_var; + } + } + return std::shared_ptr(nullptr); + }, + py::return_value_policy::copy) .def("_set_grad_ivar", [](imperative::VarBase &self, imperative::VarBase &grad) { self.SetGradVarBase(grad); @@ -1365,13 +1367,14 @@ void BindImperative(py::module *m_ptr) { [](imperative::VarBase &self) { return self.Var().IsType(); }) - .def("_allreduce", - [](imperative::VarBase &self, - const imperative::ParallelStrategy &strategy) { - if (strategy.nranks_ > 1) { + .def( + "_allreduce", + [](imperative::VarBase &self, + const imperative::ParallelStrategy &strategy) { + if (strategy.nranks_ > 1) { #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) #if NCCL_VERSION_CODE >= 2212 - imperative::AllReduce(self.Var(), self.MutableVar(), strategy); + imperative::AllReduce(self.Var(), self.MutableVar(), strategy); #else if (!self.Var().IsType()) { imperative::AllReduce(self.Var(), self.MutableVar(), strategy); @@ -1388,9 +1391,9 @@ void BindImperative(py::module *m_ptr) { "Imperative allreduce is not supported when paddle is " "not compiled with NCCL.")); #endif // PADDLE_WITH_NCCL or PADDLE_WITH_RCCL - } - }, - py::call_guard()) + } + }, + py::call_guard()) .def("_register_grad_hook", [](imperative::VarBase &self, const py::handle &hook) { PADDLE_ENFORCE_EQ( @@ -1425,22 +1428,23 @@ void BindImperative(py::module *m_ptr) { std::make_shared>(py_func)); } }) - .def("_register_backward_hook", - [](imperative::VarBase &self, const py::handle &hook) { - PADDLE_ENFORCE_EQ( - self.IsLeaf(), true, - platform::errors::InvalidArgument( - "Only can register backward hook for leaf Tensor.")); - PADDLE_ENFORCE_EQ( - !self.OverridedStopGradient() && self.HasGradVar(), true, - platform::errors::InvalidArgument( - "Cannot register backward hook on a Tensor that stop " - "gradient or without gradient.")); - auto py_func = PyObjectCast>(hook.ptr()); - self.GradVarBase()->AddVoidHook( - std::make_shared>(py_func)); - }, - R"DOC( + .def( + "_register_backward_hook", + [](imperative::VarBase &self, const py::handle &hook) { + PADDLE_ENFORCE_EQ( + self.IsLeaf(), true, + platform::errors::InvalidArgument( + "Only can register backward hook for leaf Tensor.")); + PADDLE_ENFORCE_EQ( + !self.OverridedStopGradient() && self.HasGradVar(), true, + platform::errors::InvalidArgument( + "Cannot register backward hook on a Tensor that stop " + "gradient or without gradient.")); + auto py_func = PyObjectCast>(hook.ptr()); + self.GradVarBase()->AddVoidHook( + std::make_shared>(py_func)); + }, + R"DOC( Registers a backward hook for current Tensor. This hook will be called every time the gradient of current Tensor has been fully calculated. @@ -1461,17 +1465,18 @@ void BindImperative(py::module *m_ptr) { Returns: None )DOC") - .def("cpu", - [](const std::shared_ptr &self) { - if (platform::is_cpu_place(self->Place())) { - return self; - } else { - auto new_var = self->NewVarBase(platform::CPUPlace(), true); - new_var->SetOverridedStopGradient(self->OverridedStopGradient()); - return new_var; - } - }, - R"DOC( + .def( + "cpu", + [](const std::shared_ptr &self) { + if (platform::is_cpu_place(self->Place())) { + return self; + } else { + auto new_var = self->NewVarBase(platform::CPUPlace(), true); + new_var->SetOverridedStopGradient(self->OverridedStopGradient()); + return new_var; + } + }, + R"DOC( Returns a copy of this Tensor in CPU memory. If this Tensor is already in CPU memory, then no copy is performed and the original Tensor is returned. @@ -1487,24 +1492,25 @@ void BindImperative(py::module *m_ptr) { print(y.place) # CPUPlace )DOC") - .def("pin_memory", - [](const std::shared_ptr &self) { + .def( + "pin_memory", + [](const std::shared_ptr &self) { #if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP) - PADDLE_THROW(platform::errors::PermissionDenied( - "Cannot copy this Tensor to pinned memory in CPU version " - "Paddle, " - "Please recompile or reinstall Paddle with CUDA support.")); + PADDLE_THROW(platform::errors::PermissionDenied( + "Cannot copy this Tensor to pinned memory in CPU version " + "Paddle, " + "Please recompile or reinstall Paddle with CUDA support.")); #endif - if (platform::is_cuda_pinned_place(self->Place())) { - return self; - } else { - auto new_var = - self->NewVarBase(platform::CUDAPinnedPlace(), true); - new_var->SetOverridedStopGradient(self->OverridedStopGradient()); - return new_var; - } - }, - R"DOC( + if (platform::is_cuda_pinned_place(self->Place())) { + return self; + } else { + auto new_var = + self->NewVarBase(platform::CUDAPinnedPlace(), true); + new_var->SetOverridedStopGradient(self->OverridedStopGradient()); + return new_var; + } + }, + R"DOC( Returns a copy of this Tensor in pin memory. If this Tensor is already in pin memory, then no copy is performed and the original Tensor is returned. @@ -1520,13 +1526,14 @@ void BindImperative(py::module *m_ptr) { print(y.place) # CUDAPinnedPlace )DOC") - .def("cuda", - [](const std::shared_ptr &self, - py::handle &handle, bool blocking) { + .def( + "cuda", + [](const std::shared_ptr &self, + py::handle &handle, bool blocking) { #if !defined(PADDLE_WITH_CUDA) && !defined(PADDLE_WITH_HIP) - PADDLE_THROW(platform::errors::PermissionDenied( - "Cannot copy this Tensor to GPU in CPU version Paddle, " - "Please recompile or reinstall Paddle with CUDA support.")); + PADDLE_THROW(platform::errors::PermissionDenied( + "Cannot copy this Tensor to GPU in CPU version Paddle, " + "Please recompile or reinstall Paddle with CUDA support.")); #else int device_count = platform::GetGPUDeviceCount(); int device_id = 0; @@ -1563,8 +1570,8 @@ void BindImperative(py::module *m_ptr) { return new_var; } #endif - }, - py::arg("device_id") = py::none(), py::arg("blocking") = true, R"DOC( + }, + py::arg("device_id") = py::none(), py::arg("blocking") = true, R"DOC( Returns a copy of this Tensor in GPU memory. If this Tensor is already in GPU memory and device_id is default, @@ -1592,49 +1599,51 @@ void BindImperative(py::module *m_ptr) { y = x.cuda(1) print(y.place) # CUDAPlace(1) )DOC") - .def("_share_memory", - [](const std::shared_ptr &self) { + .def( + "_share_memory", + [](const std::shared_ptr &self) { #ifndef _WIN32 - PADDLE_ENFORCE_EQ( - platform::is_cpu_place(self->Place()), true, - platform::errors::InvalidArgument( - "Sharing memory only support CPU Tensor currently")); - // 1. get LoDTensor - auto *t = self->MutableVar()->GetMutable(); - // 2. allocate shared memory - void *data_ptr = t->data(); - size_t data_size = - t->numel() * framework::SizeOfType( - framework::TransToProtoVarType(t->dtype())); - auto shared_writer_holder = - memory::allocation::AllocateMemoryMapWriterAllocation( - data_size); - // 3. maintain mmap fd set & backup ipc_name - const std::string &ipc_name = shared_writer_holder->ipc_name(); - memory::allocation::MemoryMapFdSet::Instance().Insert(ipc_name); - // 4. copy data & reset holder - memory::Copy(platform::CPUPlace(), shared_writer_holder->ptr(), - platform::CPUPlace(), data_ptr, data_size); - t->ResetHolder(shared_writer_holder); - return *t; + PADDLE_ENFORCE_EQ( + platform::is_cpu_place(self->Place()), true, + platform::errors::InvalidArgument( + "Sharing memory only support CPU Tensor currently")); + // 1. get LoDTensor + auto *t = self->MutableVar()->GetMutable(); + // 2. allocate shared memory + void *data_ptr = t->data(); + size_t data_size = + t->numel() * framework::SizeOfType( + framework::TransToProtoVarType(t->dtype())); + auto shared_writer_holder = + memory::allocation::AllocateMemoryMapWriterAllocation( + data_size); + // 3. maintain mmap fd set & backup ipc_name + const std::string &ipc_name = shared_writer_holder->ipc_name(); + memory::allocation::MemoryMapFdSet::Instance().Insert(ipc_name); + // 4. copy data & reset holder + memory::Copy(platform::CPUPlace(), shared_writer_holder->ptr(), + platform::CPUPlace(), data_ptr, data_size); + t->ResetHolder(shared_writer_holder); + return *t; #else PADDLE_THROW(platform::errors::PermissionDenied( "Sharing memory in Windows OS is not supported currently")); #endif - }, - py::return_value_policy::reference) + }, + py::return_value_policy::reference) #if defined(PADDLE_WITH_CUDA) - .def("_uva", - [](const std::shared_ptr &self, int device_id) { - PADDLE_ENFORCE_EQ(platform::is_cpu_place(self->Place()), true, - platform::errors::InvalidArgument( - "Unified virtual addressing only support " - "CPU Tensor currently.")); - auto *self_tensor = - self->MutableVar()->GetMutable(); - tensor_uva(self_tensor, device_id); - }, - py::arg("device_id") = 0, py::return_value_policy::reference, R"DOC( + .def( + "_uva", + [](const std::shared_ptr &self, int device_id) { + PADDLE_ENFORCE_EQ(platform::is_cpu_place(self->Place()), true, + platform::errors::InvalidArgument( + "Unified virtual addressing only support " + "CPU Tensor currently.")); + auto *self_tensor = + self->MutableVar()->GetMutable(); + tensor_uva(self_tensor, device_id); + }, + py::arg("device_id") = 0, py::return_value_policy::reference, R"DOC( Returns self tensor with the UVA(unified virtual addressing). Args: @@ -1651,86 +1660,94 @@ void BindImperative(py::module *m_ptr) { )DOC") #endif .def("copy_", &imperative::VarBase::CopyFrom) - .def("_copy_to", - [](const std::shared_ptr &self, - const platform::CPUPlace &place, bool blocking) { - auto new_var = self->NewVarBase(place, blocking); - // Note(zhiqiu): Since NewVarBase may use GpuCopyAsync to - // copy data from the tensor of self to the tensor of new varbase, - // we need to ensure that the varbase self is not destructed until - // the GpuCopyAsync is completed. Otherwise, the memory may be - // freed - // when varbase self is destructed. - // To do that, we increase the reference count of self by 1 and - // add a cuda event to wait the GpuCopyAsync's completion. - if (!blocking) { - IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); - } - return new_var; - }, - py::return_value_policy::copy) - .def("_copy_to", - [](const std::shared_ptr &self, - const platform::CUDAPinnedPlace &place, bool blocking) { - auto new_var = self->NewVarBase(place, blocking); - if (!blocking) { - IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); - } - return new_var; - }, - py::return_value_policy::copy) - .def("_copy_to", - [](const std::shared_ptr &self, - const platform::XPUPlace &place, bool blocking) { - auto new_var = self->NewVarBase(place, blocking); - if (!blocking) { - IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); - } - return new_var; - }, - py::return_value_policy::copy) - .def("_copy_to", - [](const std::shared_ptr &self, - const platform::CUDAPlace &place, bool blocking) { - auto new_var = self->NewVarBase(place, blocking); - if (!blocking) { - IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); - } - return new_var; - }, - py::return_value_policy::copy) - .def("_copy_to", - [](const std::shared_ptr &self, - const platform::NPUPlace &place, bool blocking) { - auto new_var = self->NewVarBase(place, blocking); - if (!blocking) { - IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); - } - return new_var; - }, - py::return_value_policy::copy) - .def("_copy_to", - [](const std::shared_ptr &self, - const platform::MLUPlace &place, bool blocking) { - auto new_var = self->NewVarBase(place, blocking); - if (!blocking) { - IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); - } - return new_var; - }, - py::return_value_policy::copy) - .def("_copy_to", - [](const std::shared_ptr &self, - const platform::Place &place, bool blocking) { - auto new_var = self->NewVarBase(place, blocking); - if (!blocking) { - IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); - } - return new_var; - }, - py::return_value_policy::copy) - .def("value", [](imperative::VarBase &self) { return self.MutableVar(); }, - py::return_value_policy::reference) + .def( + "_copy_to", + [](const std::shared_ptr &self, + const platform::CPUPlace &place, bool blocking) { + auto new_var = self->NewVarBase(place, blocking); + // Note(zhiqiu): Since NewVarBase may use GpuCopyAsync to + // copy data from the tensor of self to the tensor of new varbase, + // we need to ensure that the varbase self is not destructed until + // the GpuCopyAsync is completed. Otherwise, the memory may be + // freed + // when varbase self is destructed. + // To do that, we increase the reference count of self by 1 and + // add a cuda event to wait the GpuCopyAsync's completion. + if (!blocking) { + IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); + } + return new_var; + }, + py::return_value_policy::copy) + .def( + "_copy_to", + [](const std::shared_ptr &self, + const platform::CUDAPinnedPlace &place, bool blocking) { + auto new_var = self->NewVarBase(place, blocking); + if (!blocking) { + IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); + } + return new_var; + }, + py::return_value_policy::copy) + .def( + "_copy_to", + [](const std::shared_ptr &self, + const platform::XPUPlace &place, bool blocking) { + auto new_var = self->NewVarBase(place, blocking); + if (!blocking) { + IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); + } + return new_var; + }, + py::return_value_policy::copy) + .def( + "_copy_to", + [](const std::shared_ptr &self, + const platform::CUDAPlace &place, bool blocking) { + auto new_var = self->NewVarBase(place, blocking); + if (!blocking) { + IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); + } + return new_var; + }, + py::return_value_policy::copy) + .def( + "_copy_to", + [](const std::shared_ptr &self, + const platform::NPUPlace &place, bool blocking) { + auto new_var = self->NewVarBase(place, blocking); + if (!blocking) { + IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); + } + return new_var; + }, + py::return_value_policy::copy) + .def( + "_copy_to", + [](const std::shared_ptr &self, + const platform::MLUPlace &place, bool blocking) { + auto new_var = self->NewVarBase(place, blocking); + if (!blocking) { + IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); + } + return new_var; + }, + py::return_value_policy::copy) + .def( + "_copy_to", + [](const std::shared_ptr &self, + const platform::Place &place, bool blocking) { + auto new_var = self->NewVarBase(place, blocking); + if (!blocking) { + IncreaseVarbaseReferenceCountUntilCopyComplete(self, place); + } + return new_var; + }, + py::return_value_policy::copy) + .def( + "value", [](imperative::VarBase &self) { return self.MutableVar(); }, + py::return_value_policy::reference) .def("_clear", [](const std::shared_ptr &self) { auto *t = self->MutableVar()->GetMutable(); @@ -1842,39 +1859,28 @@ void BindImperative(py::module *m_ptr) { &imperative::VarBase::SetOverridedStopGradient) .def_property("persistable", &imperative::VarBase::Persistable, &imperative::VarBase::SetPersistable) - .def_property_readonly("shape", - [](imperative::VarBase &self) { - if (self.Var().IsType()) { - return phi::vectorize( - self.Var() - .Get() - .dims()); - } else if (self.Var() - .IsType()) { - return phi::vectorize( - self.Var() - .Get() - .value() - .dims()); - } else if (self.Var() - .IsType()) { - return std::vector{static_cast( - self.Var() - .Get() - .size())}; - } else if (self.Var() - .IsType()) { - return std::vector{static_cast( - self.Var() - .Get() - .size())}; - } else { - VLOG(2) << "It is meaningless to get shape of " - "variable type " - << GetTypeName(self); - return std::vector(); - } - }) + .def_property_readonly( + "shape", + [](imperative::VarBase &self) { + if (self.Var().IsType()) { + return phi::vectorize( + self.Var().Get().dims()); + } else if (self.Var().IsType()) { + return phi::vectorize( + self.Var().Get().value().dims()); + } else if (self.Var().IsType()) { + return std::vector{static_cast( + self.Var().Get().size())}; + } else if (self.Var().IsType()) { + return std::vector{ + static_cast(self.Var().Get().size())}; + } else { + VLOG(2) << "It is meaningless to get shape of " + "variable type " + << GetTypeName(self); + return std::vector(); + } + }) .def_property_readonly("is_leaf", &imperative::VarBase::IsLeaf, R"DOC( Whether a Tensor is leaf Tensor. @@ -2157,13 +2163,14 @@ void BindImperative(py::module *m_ptr) { [](imperative::ParallelStrategy &self, int nranks) { self.nranks_ = nranks; }) - .def_property("local_rank", - [](const imperative::ParallelStrategy &self) { - return self.local_rank_; - }, - [](imperative::ParallelStrategy &self, int local_rank) { - self.local_rank_ = local_rank; - }) + .def_property( + "local_rank", + [](const imperative::ParallelStrategy &self) { + return self.local_rank_; + }, + [](imperative::ParallelStrategy &self, int local_rank) { + self.local_rank_ = local_rank; + }) .def_property( "trainer_endpoints", [](const imperative::ParallelStrategy &self) { @@ -2172,12 +2179,14 @@ void BindImperative(py::module *m_ptr) { [](imperative::ParallelStrategy &self, std::vector eps) { self.trainer_endpoints_ = eps; }) - .def_property("current_endpoint", - [](const imperative::ParallelStrategy &self) { - return self.current_endpoint_; - }, - [](imperative::ParallelStrategy &self, - const std::string &ep) { self.current_endpoint_ = ep; }) + .def_property( + "current_endpoint", + [](const imperative::ParallelStrategy &self) { + return self.current_endpoint_; + }, + [](imperative::ParallelStrategy &self, const std::string &ep) { + self.current_endpoint_ = ep; + }) .def_property( "nrings", [](const imperative::ParallelStrategy &self) { return self.nrings_; }, @@ -2359,43 +2368,44 @@ void BindImperative(py::module *m_ptr) { }); #if defined(PADDLE_WITH_CUDA) - m.def("to_uva_tensor", - [](const py::object &obj, int device_id) { - const auto &tracer = imperative::GetCurrentTracer(); - auto new_tensor = std::shared_ptr( - new imperative::VarBase(tracer->GenerateUniqueName())); - auto array = obj.cast(); - if (py::isinstance>(array)) { - SetUVATensorFromPyArray(new_tensor, array, device_id); - } else if (py::isinstance>(array)) { - SetUVATensorFromPyArray(new_tensor, array, device_id); - } else if (py::isinstance>(array)) { - SetUVATensorFromPyArray(new_tensor, array, device_id); - } else if (py::isinstance>(array)) { - SetUVATensorFromPyArray(new_tensor, array, device_id); - } else if (py::isinstance>(array)) { - SetUVATensorFromPyArray(new_tensor, array, device_id); - } else if (py::isinstance>(array)) { - SetUVATensorFromPyArray(new_tensor, array, device_id); - } else if (py::isinstance>( - array)) { - SetUVATensorFromPyArray( - new_tensor, array, device_id); - } else if (py::isinstance>(array)) { - SetUVATensorFromPyArray(new_tensor, array, device_id); - } else { - // obj may be any type, obj.cast() may be failed, - // then the array.dtype will be string of unknown meaning. - PADDLE_THROW(platform::errors::InvalidArgument( - "Input object type error or incompatible array data type. " - "tensor.set() supports array with bool, float16, float32, " - "float64, int8, int16, int32, int64," - "please check your input or input array data type.")); - } - return new_tensor; - }, - py::arg("obj"), py::arg("device_id") = 0, - py::return_value_policy::reference, R"DOC( + m.def( + "to_uva_tensor", + [](const py::object &obj, int device_id) { + const auto &tracer = imperative::GetCurrentTracer(); + auto new_tensor = std::shared_ptr( + new imperative::VarBase(tracer->GenerateUniqueName())); + auto array = obj.cast(); + if (py::isinstance>(array)) { + SetUVATensorFromPyArray(new_tensor, array, device_id); + } else if (py::isinstance>(array)) { + SetUVATensorFromPyArray(new_tensor, array, device_id); + } else if (py::isinstance>(array)) { + SetUVATensorFromPyArray(new_tensor, array, device_id); + } else if (py::isinstance>(array)) { + SetUVATensorFromPyArray(new_tensor, array, device_id); + } else if (py::isinstance>(array)) { + SetUVATensorFromPyArray(new_tensor, array, device_id); + } else if (py::isinstance>(array)) { + SetUVATensorFromPyArray(new_tensor, array, device_id); + } else if (py::isinstance>( + array)) { + SetUVATensorFromPyArray(new_tensor, array, + device_id); + } else if (py::isinstance>(array)) { + SetUVATensorFromPyArray(new_tensor, array, device_id); + } else { + // obj may be any type, obj.cast() may be failed, + // then the array.dtype will be string of unknown meaning. + PADDLE_THROW(platform::errors::InvalidArgument( + "Input object type error or incompatible array data type. " + "tensor.set() supports array with bool, float16, float32, " + "float64, int8, int16, int32, int64," + "please check your input or input array data type.")); + } + return new_tensor; + }, + py::arg("obj"), py::arg("device_id") = 0, + py::return_value_policy::reference, R"DOC( Returns tensor with the UVA(unified virtual addressing) created from numpy array. Args: diff --git a/paddle/fluid/pybind/imperative.h b/paddle/fluid/pybind/imperative.h index 0e3e98512d6..91b92944215 100644 --- a/paddle/fluid/pybind/imperative.h +++ b/paddle/fluid/pybind/imperative.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "pybind11/pybind11.h" #include "pybind11/stl.h" diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index d4c19364d48..d6ffbf01001 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/fluid/pybind/inference_api.h" + #include #include + #include #include #include @@ -26,6 +28,7 @@ #include #include #include + #include "paddle/fluid/inference/api/analysis_predictor.h" #include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/inference/api/paddle_infer_contrib.h" @@ -75,8 +78,8 @@ using paddle::AnalysisPredictor; using paddle::NativeConfig; using paddle::NativePaddlePredictor; using paddle::PaddleBuf; -using paddle::PaddleDType; using paddle::PaddleDataLayout; +using paddle::PaddleDType; using paddle::PaddlePassBuilder; using paddle::PaddlePlace; using paddle::PaddlePredictor; @@ -379,13 +382,13 @@ void BindInferenceApi(py::module *m) { &paddle::CreatePaddlePredictor, py::arg("config")); m->def("create_paddle_predictor", &paddle::CreatePaddlePredictor, py::arg("config")); - m->def("create_predictor", [](const paddle_infer::Config &config) - -> std::unique_ptr { - auto pred = - std::unique_ptr( - new paddle_infer::Predictor(config)); - return pred; - }); + m->def("create_predictor", + [](const paddle_infer::Config &config) + -> std::unique_ptr { + auto pred = std::unique_ptr( + new paddle_infer::Predictor(config)); + return pred; + }); m->def("copy_tensor", &CopyPaddleInferTensor); m->def("paddle_dtype_size", &paddle::PaddleDtypeSize); m->def("paddle_tensor_to_bytes", &SerializePDTensorToBytes); @@ -578,11 +581,11 @@ void BindAnalysisConfig(py::module *m) { .def(py::init()) .def(py::init()) .def("summary", &AnalysisConfig::Summary) - .def("set_model", (void (AnalysisConfig::*)(const std::string &)) & - AnalysisConfig::SetModel) - .def("set_model", (void (AnalysisConfig::*)(const std::string &, - const std::string &)) & + .def("set_model", (void(AnalysisConfig::*)(const std::string &)) & AnalysisConfig::SetModel) + .def("set_model", + (void(AnalysisConfig::*)(const std::string &, const std::string &)) & + AnalysisConfig::SetModel) .def("set_prog_file", &AnalysisConfig::SetProgFile) .def("set_params_file", &AnalysisConfig::SetParamsFile) .def("model_dir", &AnalysisConfig::model_dir) @@ -716,11 +719,12 @@ void BindAnalysisConfig(py::module *m) { [](AnalysisConfig &self, const std::string &pass) { self.pass_builder()->DeletePass(pass); }) - .def("pass_builder", - [](AnalysisConfig &self) { - return dynamic_cast(self.pass_builder()); - }, - py::return_value_policy::reference) + .def( + "pass_builder", + [](AnalysisConfig &self) { + return dynamic_cast(self.pass_builder()); + }, + py::return_value_policy::reference) .def("nnadapter", &AnalysisConfig::NNAdapter) .def("set_dist_config", &AnalysisConfig::SetDistConfig) .def("dist_config", &AnalysisConfig::dist_config); diff --git a/paddle/fluid/pybind/io.cc b/paddle/fluid/pybind/io.cc index a7222abf45c..c8806962421 100644 --- a/paddle/fluid/pybind/io.cc +++ b/paddle/fluid/pybind/io.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/pybind/io.h" + #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/selected_rows_utils.h" #include "paddle/fluid/platform/enforce.h" diff --git a/paddle/fluid/pybind/io.h b/paddle/fluid/pybind/io.h index 942c93deccf..7f10306e919 100644 --- a/paddle/fluid/pybind/io.h +++ b/paddle/fluid/pybind/io.h @@ -20,6 +20,7 @@ typedef SSIZE_T ssize_t; #endif #include + #include "paddle/fluid/pybind/pybind_boost_headers.h" namespace paddle { diff --git a/paddle/fluid/pybind/ir.cc b/paddle/fluid/pybind/ir.cc index ecbacd37d56..ef005ee8b10 100644 --- a/paddle/fluid/pybind/ir.cc +++ b/paddle/fluid/pybind/ir.cc @@ -13,12 +13,14 @@ // limitations under the License. #include "paddle/fluid/pybind/ir.h" + #include #include #include #include #include #include + #include "paddle/fluid/framework/ir/graph.h" #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/framework/ir/graph_pattern_detector.h" @@ -31,18 +33,18 @@ #include "pybind11/stl.h" namespace py = pybind11; -using paddle::framework::ir::Graph; -using paddle::framework::ir::Node; -using paddle::framework::ir::NodeComp; -using paddle::framework::ir::GraphSafeRemoveNodes; -using paddle::framework::ir::HasCircle; -using paddle::framework::ir::GraphNum; -using paddle::framework::ir::TopologySortOperations; -using paddle::framework::ir::BuildOperationAdjList; using paddle::framework::OpDesc; using paddle::framework::ProgramDesc; using paddle::framework::Scope; using paddle::framework::VarDesc; +using paddle::framework::ir::BuildOperationAdjList; +using paddle::framework::ir::Graph; +using paddle::framework::ir::GraphNum; +using paddle::framework::ir::GraphSafeRemoveNodes; +using paddle::framework::ir::HasCircle; +using paddle::framework::ir::Node; +using paddle::framework::ir::NodeComp; +using paddle::framework::ir::TopologySortOperations; using pybind11::return_value_policy; namespace paddle { @@ -104,16 +106,18 @@ void BindGraph(py::module *m) { }) .def("erase", &Graph::Erase) .def("nodes", &Graph::Nodes, return_value_policy::reference) - .def("create_var_node", - [](Graph &self, VarDesc &var_desc) { - return self.CreateVarNode(&var_desc); - }, - return_value_policy::reference) - .def("create_op_node", - [](Graph &self, OpDesc &op_desc) { - return self.CreateOpNode(&op_desc); - }, - return_value_policy::reference) + .def( + "create_var_node", + [](Graph &self, VarDesc &var_desc) { + return self.CreateVarNode(&var_desc); + }, + return_value_policy::reference) + .def( + "create_op_node", + [](Graph &self, OpDesc &op_desc) { + return self.CreateOpNode(&op_desc); + }, + return_value_policy::reference) .def("create_control_dep_var", &Graph::CreateControlDepVar, return_value_policy::reference) .def("create_empty_node", &Graph::CreateEmptyNode, diff --git a/paddle/fluid/pybind/ir.h b/paddle/fluid/pybind/ir.h index 2cc1459bbe0..ad2d6aa11bf 100644 --- a/paddle/fluid/pybind/ir.h +++ b/paddle/fluid/pybind/ir.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/ir/graph.h" namespace paddle { diff --git a/paddle/fluid/pybind/op_function_common.cc b/paddle/fluid/pybind/op_function_common.cc index 8b9b98eba12..a3c6fa14765 100644 --- a/paddle/fluid/pybind/op_function_common.cc +++ b/paddle/fluid/pybind/op_function_common.cc @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/pybind/op_function_common.h" + #include #include #include @@ -28,7 +30,6 @@ #include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/imperative/type_defs.h" #include "paddle/fluid/pybind/imperative.h" -#include "paddle/fluid/pybind/op_function_common.h" namespace py = pybind11; namespace paddle { diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc index 66bf8c95179..329b3b83337 100644 --- a/paddle/fluid/pybind/protobuf.cc +++ b/paddle/fluid/pybind/protobuf.cc @@ -76,11 +76,12 @@ void BindProgramDesc(pybind11::module *m) { platform::errors::InvalidArgument( "Failed to parse ProgramDesc from binary string.")); }) - .def("_set_version", - [](pd::ProgramDesc &self, int64_t version) { - return self.SetVersion(version); - }, - pybind11::arg("version") = pd::kCurProgramVersion) + .def( + "_set_version", + [](pd::ProgramDesc &self, int64_t version) { + return self.SetVersion(version); + }, + pybind11::arg("version") = pd::kCurProgramVersion) .def("_version", [](pd::ProgramDesc &self) -> int64_t { return self.Version(); }) .def("get_op_deps", [](const framework::ProgramDesc &program) { @@ -113,18 +114,20 @@ void BindBlockDesc(pybind11::module *m) { .def("_insert_op", &pd::BlockDesc::InsertOp, pybind11::return_value_policy::reference) .def("_remove_op", &pd::BlockDesc::RemoveOp) - .def("var", - [](pd::BlockDesc &self, pybind11::bytes byte_name) { - std::string name = byte_name; - return self.Var(name); - }, - pybind11::return_value_policy::reference) - .def("has_var", - [](pd::BlockDesc &self, pybind11::bytes byte_name) { - std::string name = byte_name; - return self.HasVar(name); - }, - pybind11::return_value_policy::reference) + .def( + "var", + [](pd::BlockDesc &self, pybind11::bytes byte_name) { + std::string name = byte_name; + return self.Var(name); + }, + pybind11::return_value_policy::reference) + .def( + "has_var", + [](pd::BlockDesc &self, pybind11::bytes byte_name) { + std::string name = byte_name; + return self.HasVar(name); + }, + pybind11::return_value_policy::reference) .def("_rename_var", [](pd::BlockDesc &self, const pybind11::bytes &byte_name, const pybind11::bytes &byte_name_new) { @@ -137,24 +140,27 @@ void BindBlockDesc(pybind11::module *m) { std::string name = byte_name; return self.HasVarRecursive(name); }) - .def("find_var", - [](pd::BlockDesc &self, pybind11::bytes byte_name) { - std::string name = byte_name; - return self.FindVar(name); - }, - pybind11::return_value_policy::reference) - .def("find_var_recursive", - [](pd::BlockDesc &self, pybind11::bytes byte_name) { - std::string name = byte_name; - return self.FindVarRecursive(name); - }, - pybind11::return_value_policy::reference) - .def("_remove_var", - [](pd::BlockDesc &self, pybind11::bytes byte_name) { - std::string name = byte_name; - return self.RemoveVar(name); - }, - pybind11::return_value_policy::reference) + .def( + "find_var", + [](pd::BlockDesc &self, pybind11::bytes byte_name) { + std::string name = byte_name; + return self.FindVar(name); + }, + pybind11::return_value_policy::reference) + .def( + "find_var_recursive", + [](pd::BlockDesc &self, pybind11::bytes byte_name) { + std::string name = byte_name; + return self.FindVarRecursive(name); + }, + pybind11::return_value_policy::reference) + .def( + "_remove_var", + [](pd::BlockDesc &self, pybind11::bytes byte_name) { + std::string name = byte_name; + return self.RemoveVar(name); + }, + pybind11::return_value_policy::reference) .def("all_vars", &pd::BlockDesc::AllVars, pybind11::return_value_policy::reference) .def("op_size", &pd::BlockDesc::OpSize) @@ -258,8 +264,9 @@ void BindOpDesc(pybind11::module *m) { pybind11::class_ op_desc(*m, "OpDesc", ""); op_desc - .def("__init__", [](pd::OpDesc &self) { new (&self) pd::OpDesc(); }, - pybind11::return_value_policy::reference) + .def( + "__init__", [](pd::OpDesc &self) { new (&self) pd::OpDesc(); }, + pybind11::return_value_policy::reference) .def("copy_from", &pd::OpDesc::CopyFrom) .def("type", &pd::OpDesc::Type) .def("set_type", &pd::OpDesc::SetType) @@ -304,8 +311,9 @@ void BindOpDesc(pybind11::module *m) { .def("infer_var_type", &pd::OpDesc::InferVarType) .def("set_is_target", &pd::OpDesc::SetIsTarget) .def("serialize_to_string", SerializeMessage) - .def("block", [](pd::OpDesc &self) { return self.Block(); }, - pybind11::return_value_policy::reference) + .def( + "block", [](pd::OpDesc &self) { return self.Block(); }, + pybind11::return_value_policy::reference) .def("id", &pd::OpDesc::Id) .def("original_id", &pd::OpDesc::OriginalId) .def("set_original_id", &pd::OpDesc::SetOriginalId) diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index d1c2b28dc80..cba7d036235 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -693,56 +693,56 @@ PYBIND11_MODULE(core_noavx, m) { m.def("_get_use_default_grad_op_desc_maker_ops", [] { return OpInfoMap::Instance().GetUseDefaultGradOpDescMakerOps(); }); - m.def("_get_all_register_op_kernels", - [](const std::string &lib) { - std::unordered_map> - all_kernels_info; - if (lib == "fluid" || lib == "all") { - auto &all_kernels = - paddle::framework::OperatorWithKernel::AllOpKernels(); - - for (auto &kernel_pair : all_kernels) { - auto op_type = kernel_pair.first; - std::vector kernel_types; - for (auto &info_pair : kernel_pair.second) { - paddle::framework::OpKernelType kernel_type = info_pair.first; - kernel_types.emplace_back( - paddle::framework::KernelTypeToString(kernel_type)); - } - all_kernels_info.emplace(op_type, kernel_types); + m.def( + "_get_all_register_op_kernels", + [](const std::string &lib) { + std::unordered_map> + all_kernels_info; + if (lib == "fluid" || lib == "all") { + auto &all_kernels = + paddle::framework::OperatorWithKernel::AllOpKernels(); + + for (auto &kernel_pair : all_kernels) { + auto op_type = kernel_pair.first; + std::vector kernel_types; + for (auto &info_pair : kernel_pair.second) { + paddle::framework::OpKernelType kernel_type = info_pair.first; + kernel_types.emplace_back( + paddle::framework::KernelTypeToString(kernel_type)); } + all_kernels_info.emplace(op_type, kernel_types); } - if (lib == "phi" || lib == "all") { - auto phi_kernels = phi::KernelFactory::Instance().kernels(); - for (auto &kernel_pair : phi_kernels) { - auto op_type = phi::TransToFluidOpName(kernel_pair.first); - std::vector kernel_types; - for (auto &info_pair : kernel_pair.second) { - framework::OpKernelType kernel_type = - framework::TransPhiKernelKeyToOpKernelType(info_pair.first); - auto kernel_type_str = - framework::KernelTypeToString(kernel_type); - if (all_kernels_info.count(op_type)) { - if (std::find(all_kernels_info[op_type].begin(), - all_kernels_info[op_type].end(), - kernel_type_str) == - all_kernels_info[op_type].end()) { - all_kernels_info[op_type].emplace_back(kernel_type_str); - } - } else { - kernel_types.emplace_back(kernel_type_str); + } + if (lib == "phi" || lib == "all") { + auto phi_kernels = phi::KernelFactory::Instance().kernels(); + for (auto &kernel_pair : phi_kernels) { + auto op_type = phi::TransToFluidOpName(kernel_pair.first); + std::vector kernel_types; + for (auto &info_pair : kernel_pair.second) { + framework::OpKernelType kernel_type = + framework::TransPhiKernelKeyToOpKernelType(info_pair.first); + auto kernel_type_str = framework::KernelTypeToString(kernel_type); + if (all_kernels_info.count(op_type)) { + if (std::find(all_kernels_info[op_type].begin(), + all_kernels_info[op_type].end(), + kernel_type_str) == + all_kernels_info[op_type].end()) { + all_kernels_info[op_type].emplace_back(kernel_type_str); } - } - if (!kernel_types.empty()) { - all_kernels_info.emplace(op_type, kernel_types); + } else { + kernel_types.emplace_back(kernel_type_str); } } + if (!kernel_types.empty()) { + all_kernels_info.emplace(op_type, kernel_types); + } } + } - return all_kernels_info; - }, - py::arg("lib") = "all", - R"DOC( + return all_kernels_info; + }, + py::arg("lib") = "all", + R"DOC( Return the registered kernels in paddle. Args: @@ -1011,9 +1011,10 @@ PYBIND11_MODULE(core_noavx, m) { t.set(np.ndarray([5, 30]), fluid.CPUPlace()) )DOC") - .def("shape", - [](framework::Tensor &self) { return vectorize(self.dims()); }, - R"DOC( + .def( + "shape", + [](framework::Tensor &self) { return vectorize(self.dims()); }, + R"DOC( Return the shape of Tensor. Returns: @@ -1101,20 +1102,21 @@ PYBIND11_MODULE(core_noavx, m) { // avoid misuse. // The discussion is here: // https://github.com/PaddlePaddle/Paddle/issues/10855 - .def("set_lod", - [](framework::Tensor &self, - const std::vector> &lod) { - // the input lod is offset-based level-of-detail info - LoD new_lod; - new_lod.reserve(lod.size()); - std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); - PADDLE_ENFORCE_EQ( - CheckLoD(new_lod, vectorize(self.dims()).front()), true, - platform::errors::InvalidArgument( - "The provided LoD is invalid, the LoD is %s", new_lod)); - self.set_lod(new_lod); - }, - py::arg("lod"), R"DOC( + .def( + "set_lod", + [](framework::Tensor &self, + const std::vector> &lod) { + // the input lod is offset-based level-of-detail info + LoD new_lod; + new_lod.reserve(lod.size()); + std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); + PADDLE_ENFORCE_EQ( + CheckLoD(new_lod, vectorize(self.dims()).front()), true, + platform::errors::InvalidArgument( + "The provided LoD is invalid, the LoD is %s", new_lod)); + self.set_lod(new_lod); + }, + py::arg("lod"), R"DOC( Set LoD of the Tensor. Args: @@ -1134,28 +1136,29 @@ PYBIND11_MODULE(core_noavx, m) { t.set_lod([[0, 2, 5]]) print(t.lod()) # [[0, 2, 5]] )DOC") - .def("set_recursive_sequence_lengths", - [](framework::Tensor &self, const std::vector> - &recursive_sequence_lengths) { - // the input recursive_sequence_lengths is length-based - // level-of-detail info - LoD new_lod; - new_lod.reserve(recursive_sequence_lengths.size()); - std::copy(recursive_sequence_lengths.begin(), - recursive_sequence_lengths.end(), - std::back_inserter(new_lod)); - LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod); - PADDLE_ENFORCE_EQ( - CheckLoD(new_offset_lod, vectorize(self.dims()).front()), true, - platform::errors::InvalidArgument( - "The provided recursive_sequence_lengths info is " - "invalid, " - "the LoD converted by recursive_sequence_lengths is " - "%s", - new_lod)); - self.set_lod(new_offset_lod); - }, - py::arg("recursive_sequence_lengths"), R"DOC( + .def( + "set_recursive_sequence_lengths", + [](framework::Tensor &self, const std::vector> + &recursive_sequence_lengths) { + // the input recursive_sequence_lengths is length-based + // level-of-detail info + LoD new_lod; + new_lod.reserve(recursive_sequence_lengths.size()); + std::copy(recursive_sequence_lengths.begin(), + recursive_sequence_lengths.end(), + std::back_inserter(new_lod)); + LoD new_offset_lod = ConvertToOffsetBasedLoD(new_lod); + PADDLE_ENFORCE_EQ( + CheckLoD(new_offset_lod, vectorize(self.dims()).front()), true, + platform::errors::InvalidArgument( + "The provided recursive_sequence_lengths info is " + "invalid, " + "the LoD converted by recursive_sequence_lengths is " + "%s", + new_lod)); + self.set_lod(new_offset_lod); + }, + py::arg("recursive_sequence_lengths"), R"DOC( Set LoD of the Tensor according to recursive sequence lengths. For example, if recursive_sequence_lengths=[[2, 3]], which means @@ -1180,16 +1183,17 @@ PYBIND11_MODULE(core_noavx, m) { print(t.recursive_sequence_lengths()) # [[2, 3]] print(t.lod()) # [[0, 2, 5]] )DOC") - .def("lod", - [](framework::Tensor &self) -> std::vector> { - // output the offset-based lod info - LoD lod = self.lod(); - std::vector> new_lod; - new_lod.reserve(lod.size()); - std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); - return new_lod; - }, - R"DOC( + .def( + "lod", + [](framework::Tensor &self) -> std::vector> { + // output the offset-based lod info + LoD lod = self.lod(); + std::vector> new_lod; + new_lod.reserve(lod.size()); + std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); + return new_lod; + }, + R"DOC( Return the LoD of the Tensor. Returns: @@ -1207,16 +1211,17 @@ PYBIND11_MODULE(core_noavx, m) { print(t.lod()) # [[0, 2, 5]] )DOC") // Set above comments of set_lod. - .def("recursive_sequence_lengths", - [](framework::Tensor &self) -> std::vector> { - // output the length-based lod info - LoD lod = phi::ConvertToLengthBasedLoD(self.lod()); - std::vector> new_lod; - new_lod.reserve(lod.size()); - std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); - return new_lod; - }, - R"DOC( + .def( + "recursive_sequence_lengths", + [](framework::Tensor &self) -> std::vector> { + // output the length-based lod info + LoD lod = phi::ConvertToLengthBasedLoD(self.lod()); + std::vector> new_lod; + new_lod.reserve(lod.size()); + std::copy(lod.begin(), lod.end(), std::back_inserter(new_lod)); + return new_lod; + }, + R"DOC( Return the recursive sequence lengths corresponding to of the LodD of the Tensor. @@ -1234,13 +1239,14 @@ PYBIND11_MODULE(core_noavx, m) { t.set_recursive_sequence_lengths([[2, 3]]) print(t.recursive_sequence_lengths()) # [[2, 3]] )DOC") - .def("has_valid_recursive_sequence_lengths", - [](framework::Tensor &self) -> bool { - // Check that the lod info is valid and match the outermost - // dimension of the Tensor data - return CheckLoD(self.lod(), vectorize(self.dims()).front()); - }, - R"DOC( + .def( + "has_valid_recursive_sequence_lengths", + [](framework::Tensor &self) -> bool { + // Check that the lod info is valid and match the outermost + // dimension of the Tensor data + return CheckLoD(self.lod(), vectorize(self.dims()).front()); + }, + R"DOC( Check whether the LoD of the Tensor is valid. Returns: @@ -1624,9 +1630,10 @@ PYBIND11_MODULE(core_noavx, m) { const int64_t &height) { new (&instance) phi::SelectedRows(rows, height); }) - .def("get_tensor", - [](phi::SelectedRows &self) { return self.mutable_value(); }, - py::return_value_policy::reference) + .def( + "get_tensor", + [](phi::SelectedRows &self) { return self.mutable_value(); }, + py::return_value_policy::reference) .def("numel", [](phi::SelectedRows &self) -> int64_t { return self.value().numel(); @@ -1668,11 +1675,12 @@ All parameter, weight, gradient are variables in Paddle. }) .def("get_float", [](const Variable &var) -> float { return var.Get(); }) - .def("get_tensor", - [](Variable &self) -> LoDTensor * { - return self.GetMutable(); - }, - py::return_value_policy::reference) + .def( + "get_tensor", + [](Variable &self) -> LoDTensor * { + return self.GetMutable(); + }, + py::return_value_policy::reference) .def("get_bytes", [](Variable &self) { return py::bytes(*self.GetMutable()); @@ -1683,53 +1691,60 @@ All parameter, weight, gradient are variables in Paddle. }) .def("set_vocab", [](Variable &self, Vocab vocab) { *self.GetMutable() = vocab; }) - .def("get_string_tensor", - [](Variable &self) { return self.GetMutable(); }, - py::return_value_policy::reference) - .def("get_map_tensor", - [](Variable &self) { return self.GetMutable(); }, - py::return_value_policy::reference) - .def("get_lod_rank_table", - [](Variable &self) { return self.GetMutable(); }, - py::return_value_policy::reference) - .def("get_selected_rows", - [](Variable &self) -> phi::SelectedRows * { - return self.GetMutable(); - }, - py::return_value_policy::reference) - .def("get_lod_tensor_array", - [](Variable &self) { return self.GetMutable(); }, - py::return_value_policy::reference) - .def("get_fetch_list", - [](Variable &self) { return self.GetMutable(); }, - py::return_value_policy::reference) + .def( + "get_string_tensor", + [](Variable &self) { return self.GetMutable(); }, + py::return_value_policy::reference) + .def( + "get_map_tensor", + [](Variable &self) { return self.GetMutable(); }, + py::return_value_policy::reference) + .def( + "get_lod_rank_table", + [](Variable &self) { return self.GetMutable(); }, + py::return_value_policy::reference) + .def( + "get_selected_rows", + [](Variable &self) -> phi::SelectedRows * { + return self.GetMutable(); + }, + py::return_value_policy::reference) + .def( + "get_lod_tensor_array", + [](Variable &self) { return self.GetMutable(); }, + py::return_value_policy::reference) + .def( + "get_fetch_list", + [](Variable &self) { return self.GetMutable(); }, + py::return_value_policy::reference) #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) - .def("get_communicator", - [](Variable &self) -> platform::Communicator * { - return self.GetMutable(); - }, - py::return_value_policy::reference) + .def( + "get_communicator", + [](Variable &self) -> platform::Communicator * { + return self.GetMutable(); + }, + py::return_value_policy::reference) #endif - .def("get_reader", - [](Variable &self) -> framework::ReaderHolder * { - PADDLE_ENFORCE_EQ( - self.IsType(), true, - platform::errors::InvalidArgument( - "The variable is not type of ReaderHolder.")); - return self.GetMutable(); - }, - py::return_value_policy::reference) - .def("get_scope", - [](Variable &self) -> Scope * { - auto scope_vec = - self.GetMutable>(); - PADDLE_ENFORCE_GT( - scope_vec->size(), 0, - platform::errors::InvalidArgument( - "The size of scope_vec should be greater than 0")); - return scope_vec->front(); - }, - py::return_value_policy::reference) + .def( + "get_reader", + [](Variable &self) -> framework::ReaderHolder * { + PADDLE_ENFORCE_EQ(self.IsType(), true, + platform::errors::InvalidArgument( + "The variable is not type of ReaderHolder.")); + return self.GetMutable(); + }, + py::return_value_policy::reference) + .def( + "get_scope", + [](Variable &self) -> Scope * { + auto scope_vec = self.GetMutable>(); + PADDLE_ENFORCE_GT( + scope_vec->size(), 0, + platform::errors::InvalidArgument( + "The size of scope_vec should be greater than 0")); + return scope_vec->front(); + }, + py::return_value_policy::reference) .def("set_scope", [](Variable &self, Scope &scope) { auto scope_vec = self.GetMutable>(); scope_vec->emplace_back(&scope); @@ -1762,12 +1777,13 @@ All parameter, weight, gradient are variables in Paddle. _Scope .def("_remove_from_pool", [](Scope &self) { ScopePool::Instance().Remove(&self); }) - .def("var", - [](Scope &self, const std::string &name) -> Variable * { - return self.Var(name); - }, - py::arg("name"), - R"DOC( + .def( + "var", + [](Scope &self, const std::string &name) -> Variable * { + return self.Var(name); + }, + py::arg("name"), + R"DOC( Find or create variable named :code:`name` in the current scope. If the variable named :code:`name` does not exist in the @@ -1780,7 +1796,7 @@ All parameter, weight, gradient are variables in Paddle. Returns: out (core.Variable): the found or created variable. )DOC", - py::return_value_policy::reference) + py::return_value_policy::reference) .def("find_var", &Scope::FindVar, py::arg("name"), R"DOC( Find variable named :code:`name` in the current scope or @@ -1806,33 +1822,35 @@ All parameter, weight, gradient are variables in Paddle. None )DOC", py::return_value_policy::reference) - .def("new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); }, - R"DOC( + .def( + "new_scope", [](Scope &self) -> Scope * { return &self.NewScope(); }, + R"DOC( Create a new sub-scope of the current scope. Returns: out (core._Scope): the created sub-scope. )DOC", - py::return_value_policy::reference) + py::return_value_policy::reference) .def("drop_kids", &Scope::DropKids, R"DOC( Delete all sub-scopes of the current scope. )DOC") .def("_kids", &Scope::kids); - m.def("Scope", - []() -> Scope * { - auto *s = new Scope(); - ScopePool::Instance().Insert(std::unique_ptr(s)); - return s; - }, - R"DOC( + m.def( + "Scope", + []() -> Scope * { + auto *s = new Scope(); + ScopePool::Instance().Insert(std::unique_ptr(s)); + return s; + }, + R"DOC( Create a new scope. Returns: out (core._Scope): the created scope. )DOC", - py::return_value_policy::reference); + py::return_value_policy::reference); //! @note: Be careful! PyBind will return std::string as an unicode, not //! Python str. If you want a str object, you should cast them in Python. @@ -1919,11 +1937,12 @@ All parameter, weight, gradient are variables in Paddle. return std::make_tuple(ProgramDesc(pruned_desc), pruned_origin_block_id_map); }); - m.def("prune_backward", - [](const framework::ProgramDesc &program) { - return PruneBackward(program); - }, - R"DOC( + m.def( + "prune_backward", + [](const framework::ProgramDesc &program) { + return PruneBackward(program); + }, + R"DOC( Prune the backward part of a program, mostly called in program.clone(for_test=True). @@ -2790,8 +2809,8 @@ All parameter, weight, gradient are variables in Paddle. .def("outputs", [](const OperatorBase &op) -> std::map> { - return op.Outputs(); - }) + return op.Outputs(); + }) .def("output_vars", [](const OperatorBase &op) { return op.OutputVars(true); }) .def("inputs", [](const OperatorBase &op) { return op.Inputs(); }) @@ -2806,11 +2825,12 @@ All parameter, weight, gradient are variables in Paddle. py::class_>( m, "TrainerBase") - .def("get_worker_scope", - [](TrainerBase &self, int thread_id) -> Scope * { - return self.GetWorkerScope(thread_id); - }, - py::return_value_policy::reference) + .def( + "get_worker_scope", + [](TrainerBase &self, int thread_id) -> Scope * { + return self.GetWorkerScope(thread_id); + }, + py::return_value_policy::reference) .def("finalize", &TrainerBase::Finalize) .def("ResetDataset", &TrainerBase::ResetDataset); @@ -3010,21 +3030,23 @@ All parameter, weight, gradient are variables in Paddle. m.def("device_memory_stat_current_value", memory::DeviceMemoryStatCurrentValue); m.def("device_memory_stat_peak_value", memory::DeviceMemoryStatPeakValue); - m.def("run_cmd", - [](const std::string &cmd, int time_out = -1, - int sleep_inter = -1) -> const std::string { - return paddle::framework::shell_get_command_output(cmd, time_out, - sleep_inter); - }, - py::arg("cmd"), py::arg("time_out") = -1, py::arg("sleep_inter") = -1); - m.def("shell_execute_cmd", - [](const std::string &cmd, int time_out = 0, int sleep_inter = 0, - bool redirect_stderr = false) -> std::vector { - return paddle::framework::shell_execute_cmd( - cmd, time_out, sleep_inter, redirect_stderr); - }, - py::arg("cmd"), py::arg("time_out") = 0, py::arg("sleep_inter") = 0, - py::arg("redirect_stderr") = false); + m.def( + "run_cmd", + [](const std::string &cmd, int time_out = -1, + int sleep_inter = -1) -> const std::string { + return paddle::framework::shell_get_command_output(cmd, time_out, + sleep_inter); + }, + py::arg("cmd"), py::arg("time_out") = -1, py::arg("sleep_inter") = -1); + m.def( + "shell_execute_cmd", + [](const std::string &cmd, int time_out = 0, int sleep_inter = 0, + bool redirect_stderr = false) -> std::vector { + return paddle::framework::shell_execute_cmd(cmd, time_out, sleep_inter, + redirect_stderr); + }, + py::arg("cmd"), py::arg("time_out") = 0, py::arg("sleep_inter") = 0, + py::arg("redirect_stderr") = false); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) m.def("is_float16_supported", [](const platform::CUDAPlace &place) -> bool { @@ -3092,9 +3114,10 @@ All parameter, weight, gradient are variables in Paddle. pylodtensorarray .def("__init__", [](LoDTensorArray &instance) { new (&instance) LoDTensorArray(); }) - .def("__getitem__", - [](LoDTensorArray &self, size_t i) { return &self.at(i); }, - py::return_value_policy::reference) + .def( + "__getitem__", + [](LoDTensorArray &self, size_t i) { return &self.at(i); }, + py::return_value_policy::reference) .def("__len__", [](LoDTensorArray &self) { return self.size(); }) .def("__setitem__", [](LoDTensorArray &self, size_t i, const LoDTensor &t) { @@ -3105,13 +3128,14 @@ All parameter, weight, gradient are variables in Paddle. self[i].ShareDataWith(t); self[i].set_lod(t.lod()); }) - .def("append", - [](LoDTensorArray &self, const LoDTensor &t) { - self.emplace_back(); - self.back().ShareDataWith(t); - self.back().set_lod(t.lod()); - }, - py::arg("tensor"), R"DOC( + .def( + "append", + [](LoDTensorArray &self, const LoDTensor &t) { + self.emplace_back(); + self.back().ShareDataWith(t); + self.back().set_lod(t.lod()); + }, + py::arg("tensor"), R"DOC( Append a LoDensor to LoDTensorArray. Args: @@ -3131,89 +3155,94 @@ All parameter, weight, gradient are variables in Paddle. t.set(np.ndarray([5, 30]), fluid.CPUPlace()) arr.append(t) )DOC") - .def("_move_to_list", - [](LoDTensorArray &self) -> py::list { - py::list res(self.size()); - for (size_t i = 0; i < self.size(); ++i) { - res[i] = py::cast(std::move(self[i])); - } - self.clear(); - return res; - }, - py::return_value_policy::take_ownership); + .def( + "_move_to_list", + [](LoDTensorArray &self) -> py::list { + py::list res(self.size()); + for (size_t i = 0; i < self.size(); ++i) { + res[i] = py::cast(std::move(self[i])); + } + self.clear(); + return res; + }, + py::return_value_policy::take_ownership); py::class_(m, "FetchList", R"DOC( FetchList is a vector of boost::variant. )DOC") - .def("_move_to_list", - [](FetchList &self) -> py::list { - py::list res(self.size()); - for (size_t i = 0; i < self.size(); ++i) { - if (data_is_lod_tensor(self[i])) { - auto &data = BOOST_GET(LoDTensor, self[i]); - res[i] = py::cast(std::move(data)); - } else { - auto &data = BOOST_GET(LoDTensorArray, self[i]); - py::list tmp(data.size()); - for (size_t j = 0; j < data.size(); ++j) { - tmp[j] = py::cast(std::move(data[j])); - } - res[i] = std::move(tmp); - } - } - self.clear(); - return res; - }, - py::return_value_policy::take_ownership) + .def( + "_move_to_list", + [](FetchList &self) -> py::list { + py::list res(self.size()); + for (size_t i = 0; i < self.size(); ++i) { + if (data_is_lod_tensor(self[i])) { + auto &data = BOOST_GET(LoDTensor, self[i]); + res[i] = py::cast(std::move(data)); + } else { + auto &data = BOOST_GET(LoDTensorArray, self[i]); + py::list tmp(data.size()); + for (size_t j = 0; j < data.size(); ++j) { + tmp[j] = py::cast(std::move(data[j])); + } + res[i] = std::move(tmp); + } + } + self.clear(); + return res; + }, + py::return_value_policy::take_ownership) - .def("append", - [](FetchList &self, const LoDTensor &t) { - self.emplace_back(); - auto &lod_tensor = BOOST_GET(LoDTensor, self.back()); - lod_tensor.ShareDataWith(t); - lod_tensor.set_lod(t.lod()); - }, - py::arg("var")) - - .def("append", - [](FetchList &self, const LoDTensorArray &t) { - self.emplace_back(); - auto &lod_tensor_array = BOOST_GET(LoDTensorArray, self.back()); - for (size_t i = 0; i < t.size(); ++i) { - lod_tensor_array[i].ShareDataWith(t[i]); - lod_tensor_array[i].set_lod(t[i].lod()); - } - }, - py::arg("var")); + .def( + "append", + [](FetchList &self, const LoDTensor &t) { + self.emplace_back(); + auto &lod_tensor = BOOST_GET(LoDTensor, self.back()); + lod_tensor.ShareDataWith(t); + lod_tensor.set_lod(t.lod()); + }, + py::arg("var")) + + .def( + "append", + [](FetchList &self, const LoDTensorArray &t) { + self.emplace_back(); + auto &lod_tensor_array = BOOST_GET(LoDTensorArray, self.back()); + for (size_t i = 0; i < t.size(); ++i) { + lod_tensor_array[i].ShareDataWith(t[i]); + lod_tensor_array[i].set_lod(t[i].lod()); + } + }, + py::arg("var")); py::class_(m, "FetchUnmergedList", R"DOC( FetchUnmergedList is 2-D array of FetchType(boost::variant(LoDTensor, LoDTensorArray)). )DOC") - .def("_move_to_list", - [](FetchUnmergedList &self) -> py::list { - py::list res(self.size()); - for (size_t i = 0; i < self.size(); ++i) { - py::list tmp(self[i].size()); - for (size_t j = 0; j < self[i].size(); ++j) { - if (data_is_lod_tensor(self[i][j])) { - auto &var = BOOST_GET(LoDTensor, self[i][j]); - tmp[j] = py::cast(std::move(var)); - } else { - auto &var = BOOST_GET(LoDTensorArray, self[i][j]); - py::list tmp_array(var.size()); - for (size_t k = 0; k < var.size(); ++k) { - tmp_array[k] = std::move(var[k]); - } - tmp[j] = std::move(tmp_array); - } - } - res[i] = std::move(tmp); - self[i].clear(); - } - self.clear(); - return res; - }, - py::return_value_policy::take_ownership); + .def( + "_move_to_list", + [](FetchUnmergedList &self) -> py::list { + py::list res(self.size()); + for (size_t i = 0; i < self.size(); ++i) { + py::list tmp(self[i].size()); + for (size_t j = 0; j < self[i].size(); ++j) { + if (data_is_lod_tensor(self[i][j])) { + auto &var = BOOST_GET(LoDTensor, self[i][j]); + tmp[j] = py::cast(std::move(var)); + } else { + auto &var = BOOST_GET(LoDTensorArray, self[i][j]); + py::list tmp_array(var.size()); + for (size_t k = 0; k < var.size(); ++k) { + tmp_array[k] = std::move(var[k]); + } + tmp[j] = std::move(tmp_array); + } + } + res[i] = std::move(tmp); + self[i].clear(); + } + self.clear(); + return res; + }, + py::return_value_policy::take_ownership); m.def("op_support_gpu", OpSupportGPU); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) @@ -3227,11 +3256,12 @@ All parameter, weight, gradient are variables in Paddle. } platform::EmptyCache(); }); - m.def("get_device_properties", - [](int id) -> const gpuDeviceProp & { - return platform::GetDeviceProperties(id); - }, - py::return_value_policy::copy); + m.def( + "get_device_properties", + [](int id) -> const gpuDeviceProp & { + return platform::GetDeviceProperties(id); + }, + py::return_value_policy::copy); py::class_(m, "_gpuDeviceProperties") .def_property_readonly( @@ -3409,15 +3439,16 @@ All parameter, weight, gradient are variables in Paddle. profiler->Prepare(); }) .def("start", &paddle::platform::Profiler::Start) - .def("stop", - [](paddle::platform::Profiler *profiler) { - platform::DisableHostEventRecorder(); - auto result = profiler->Stop(); - framework::StaticGraphExecutorPerfStatistics( - result->GetNodeTrees()); - return result; - }, - py::return_value_policy::automatic_reference); + .def( + "stop", + [](paddle::platform::Profiler *profiler) { + platform::DisableHostEventRecorder(); + auto result = profiler->Stop(); + framework::StaticGraphExecutorPerfStatistics( + result->GetNodeTrees()); + return result; + }, + py::return_value_policy::automatic_reference); py::class_(m, "ProfilerOptions") .def(py::init<>()) @@ -3666,11 +3697,12 @@ All parameter, weight, gradient are variables in Paddle. }, R"DOC(This config that the this is distributed training with parameter server )DOC") - .def_property("_dry_run", - [](const ExecutionStrategy &self) { return self.dry_run_; }, - [](ExecutionStrategy &self, bool dry_run) { - self.dry_run_ = dry_run; - }); + .def_property( + "_dry_run", + [](const ExecutionStrategy &self) { return self.dry_run_; }, + [](ExecutionStrategy &self, bool dry_run) { + self.dry_run_ = dry_run; + }); exec_strategy.def_property( "use_experimental_executor", @@ -3918,11 +3950,12 @@ All parameter, weight, gradient are variables in Paddle. const std::vector &trainers_endpoints) { self.trainers_endpoints_ = trainers_endpoints; }) - .def_property("trainer_id", - [](const BuildStrategy &self) { return self.trainer_id_; }, - [](BuildStrategy &self, int trainer_id) { - self.trainer_id_ = trainer_id; - }) + .def_property( + "trainer_id", + [](const BuildStrategy &self) { return self.trainer_id_; }, + [](BuildStrategy &self, int trainer_id) { + self.trainer_id_ = trainer_id; + }) .def_property( "nccl_comm_num", [](const BuildStrategy &self) { return self.nccl_comm_num_; }, @@ -3935,20 +3968,22 @@ All parameter, weight, gradient are variables in Paddle. [](BuildStrategy &self, int bkcl_comm_num) { self.bkcl_comm_num_ = bkcl_comm_num; }) - .def_property("use_hierarchical_allreduce", - [](const BuildStrategy &self) { - return self.use_hierarchical_allreduce_; - }, - [](BuildStrategy &self, bool use) { - self.use_hierarchical_allreduce_ = use; - }) - .def_property("hierarchical_allreduce_inter_nranks", - [](const BuildStrategy &self) { - return self.hierarchical_allreduce_inter_nranks_; - }, - [](BuildStrategy &self, int nranks) { - self.hierarchical_allreduce_inter_nranks_ = nranks; - }) + .def_property( + "use_hierarchical_allreduce", + [](const BuildStrategy &self) { + return self.use_hierarchical_allreduce_; + }, + [](BuildStrategy &self, bool use) { + self.use_hierarchical_allreduce_ = use; + }) + .def_property( + "hierarchical_allreduce_inter_nranks", + [](const BuildStrategy &self) { + return self.hierarchical_allreduce_inter_nranks_; + }, + [](BuildStrategy &self, int nranks) { + self.hierarchical_allreduce_inter_nranks_ = nranks; + }) .def_property( "fuse_elewise_add_act_ops", @@ -4107,19 +4142,20 @@ All parameter, weight, gradient are variables in Paddle. build_strategy = static.BuildStrategy() build_strategy.fuse_relu_depthwise_conv = True )DOC") - .def_property("fuse_broadcast_ops", - [](const BuildStrategy &self) { - return self.fuse_broadcast_ops_ == true || - self.fuse_broadcast_ops_ == paddle::none; - }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finlaized, " - "cannot be configured again.")); - self.fuse_broadcast_ops_ = b; - }, - R"DOC((bool, optional): fuse_broadcast_op indicates whether + .def_property( + "fuse_broadcast_ops", + [](const BuildStrategy &self) { + return self.fuse_broadcast_ops_ == true || + self.fuse_broadcast_ops_ == paddle::none; + }, + [](BuildStrategy &self, bool b) { + PADDLE_ENFORCE_NE(self.IsFinalized(), true, + platform::errors::PreconditionNotMet( + "BuildStrategy has been finlaized, " + "cannot be configured again.")); + self.fuse_broadcast_ops_ = b; + }, + R"DOC((bool, optional): fuse_broadcast_op indicates whether to fuse the broadcast ops. Note that, in Reduce mode, fusing broadcast ops may make the program faster. Because fusing broadcast OP equals delaying the execution of all @@ -4137,18 +4173,19 @@ All parameter, weight, gradient are variables in Paddle. build_strategy = static.BuildStrategy() build_strategy.fuse_broadcast_ops = True )DOC") - .def_property("fuse_all_optimizer_ops", - [](const BuildStrategy &self) { - return self.fuse_all_optimizer_ops_ == true || - self.fuse_all_optimizer_ops_ == paddle::none; - }, - [](BuildStrategy &self, bool b) { - PADDLE_ENFORCE_NE(self.IsFinalized(), true, - platform::errors::PreconditionNotMet( - "BuildStrategy has been finlaized, " - "cannot be configured again.")); - self.fuse_all_optimizer_ops_ = b; - }) + .def_property( + "fuse_all_optimizer_ops", + [](const BuildStrategy &self) { + return self.fuse_all_optimizer_ops_ == true || + self.fuse_all_optimizer_ops_ == paddle::none; + }, + [](BuildStrategy &self, bool b) { + PADDLE_ENFORCE_NE(self.IsFinalized(), true, + platform::errors::PreconditionNotMet( + "BuildStrategy has been finlaized, " + "cannot be configured again.")); + self.fuse_all_optimizer_ops_ = b; + }) .def_property( "sync_batch_norm", [](const BuildStrategy &self) { return self.sync_batch_norm_; }, @@ -4231,9 +4268,10 @@ All parameter, weight, gradient are variables in Paddle. self.is_distribution_ = b; #endif }) - .def_property("async_mode", - [](const BuildStrategy &self) { return self.async_mode_; }, - [](BuildStrategy &self, bool b) { self.async_mode_ = b; }) + .def_property( + "async_mode", + [](const BuildStrategy &self) { return self.async_mode_; }, + [](BuildStrategy &self, bool b) { self.async_mode_ = b; }) .def_property( "enable_inplace", [](const BuildStrategy &self) { return self.enable_inplace_; }, @@ -4249,13 +4287,14 @@ All parameter, weight, gradient are variables in Paddle. self.fuse_all_reduce_ops_ == paddle::none; }, [](BuildStrategy &self, bool b) { self.fuse_all_reduce_ops_ = b; }) - .def_property("enable_backward_optimizer_op_deps", - [](const BuildStrategy &self) { - return self.enable_backward_optimizer_op_deps_; - }, - [](BuildStrategy &self, bool b) { - self.enable_backward_optimizer_op_deps_ = b; - }) + .def_property( + "enable_backward_optimizer_op_deps", + [](const BuildStrategy &self) { + return self.enable_backward_optimizer_op_deps_; + }, + [](BuildStrategy &self, bool b) { + self.enable_backward_optimizer_op_deps_ = b; + }) .def_property( "cache_runtime_context", [](const BuildStrategy &self) { return self.cache_runtime_context_; }, @@ -4275,24 +4314,26 @@ All parameter, weight, gradient are variables in Paddle. [](BuildStrategy &self, bool fix_op_run_order) { self.fix_op_run_order_ = fix_op_run_order; }) - .def_property("allow_cuda_graph_capture", - [](const BuildStrategy &self) { - return self.allow_cuda_graph_capture_; - }, - [](BuildStrategy &self, bool allow_cuda_graph_capture) { - self.allow_cuda_graph_capture_ = allow_cuda_graph_capture; - }) + .def_property( + "allow_cuda_graph_capture", + [](const BuildStrategy &self) { + return self.allow_cuda_graph_capture_; + }, + [](BuildStrategy &self, bool allow_cuda_graph_capture) { + self.allow_cuda_graph_capture_ = allow_cuda_graph_capture; + }) .def("_copy", [](const BuildStrategy &self) { auto new_bs = self; new_bs.ClearFinalized(); return new_bs; }) - .def("_finalize_strategy_and_create_passes", - [](BuildStrategy &self) -> std::shared_ptr { - return self.CreatePassesFromStrategy(true); - }, - R"DOC(Allow user to customized passes. Normally model-specific + .def( + "_finalize_strategy_and_create_passes", + [](BuildStrategy &self) -> std::shared_ptr { + return self.CreatePassesFromStrategy(true); + }, + R"DOC(Allow user to customized passes. Normally model-specific optimization passes should be defined in this way. BuildStrategy cannot be updated after being finalized.)DOC"); @@ -4310,11 +4351,12 @@ All parameter, weight, gradient are variables in Paddle. // We still cannot get local_scope from this vector, since the element // of vec will be freed by Python GC. We can only return Scope* // one by one and mark them as reference. - .def("local_scopes", - [](ParallelExecutor &self) -> std::vector * { - return &self.GetLocalScopes(); - }, - py::return_value_policy::reference) + .def( + "local_scopes", + [](ParallelExecutor &self) -> std::vector * { + return &self.GetLocalScopes(); + }, + py::return_value_policy::reference) .def("drop_local_exe_scopes", &ParallelExecutor::DropLocalExeScopes) .def("_need_create_local_exe_scopes", &ParallelExecutor::NeedCreateLocalExeScope) @@ -4346,12 +4388,13 @@ All parameter, weight, gradient are variables in Paddle. std::unique_ptr>( m, "IpuBackend") // manage IpuBackend in C++ - .def("get_instance", - []() { - return std::unique_ptr( - platform::ipu::IpuBackend::GetInstance()); - }, - py::return_value_policy::reference) + .def( + "get_instance", + []() { + return std::unique_ptr( + platform::ipu::IpuBackend::GetInstance()); + }, + py::return_value_policy::reference) .def("weights_to_host", &platform::ipu::IpuBackend::WeightsToHost) .def("detach", &platform::ipu::IpuBackend::Detach) .def("reset", &platform::ipu::IpuBackend::Reset) diff --git a/paddle/fluid/pybind/reader_py.cc b/paddle/fluid/pybind/reader_py.cc index e0aab0dd06e..3e779ba41c0 100644 --- a/paddle/fluid/pybind/reader_py.cc +++ b/paddle/fluid/pybind/reader_py.cc @@ -13,12 +13,14 @@ // limitations under the License. #include "paddle/fluid/pybind/reader_py.h" + #include #include #include #include #include #include + #include "Python.h" #include "boost/optional.hpp" #include "gflags/gflags.h" @@ -337,32 +339,33 @@ void BindMultiDeviceReader(py::module *module, const char *reader_name) { py::call_guard()) .def("read_next_list", &ReaderType::ReadNextList, py::call_guard()) - .def("read_next_var_list", - [](ReaderType &self) { - auto result_list = self.ReadNextList(); - auto &tensor_list = result_list[0]; - std::vector> var_list; - var_list.reserve(tensor_list.size()); - auto func = [](framework::LoDTensor &lod_tensor) { - std::string act_name = - imperative::GetCurrentTracer()->GenerateUniqueName( - "generated_var"); - auto new_var = std::make_shared(act_name); - new_var->SetPersistable(false); - new_var->SetType(framework::proto::VarType::LOD_TENSOR); - new_var->SetDataType( - framework::TransToProtoVarType(lod_tensor.dtype())); - auto *tensor = - new_var->MutableVar()->GetMutable(); - *tensor = std::move(lod_tensor); - return new_var; - }; - for (auto &tensor : tensor_list) { - var_list.emplace_back(func(tensor)); - } - return var_list; - }, - py::call_guard()) + .def( + "read_next_var_list", + [](ReaderType &self) { + auto result_list = self.ReadNextList(); + auto &tensor_list = result_list[0]; + std::vector> var_list; + var_list.reserve(tensor_list.size()); + auto func = [](framework::LoDTensor &lod_tensor) { + std::string act_name = + imperative::GetCurrentTracer()->GenerateUniqueName( + "generated_var"); + auto new_var = std::make_shared(act_name); + new_var->SetPersistable(false); + new_var->SetType(framework::proto::VarType::LOD_TENSOR); + new_var->SetDataType( + framework::TransToProtoVarType(lod_tensor.dtype())); + auto *tensor = + new_var->MutableVar()->GetMutable(); + *tensor = std::move(lod_tensor); + return new_var; + }; + for (auto &tensor : tensor_list) { + var_list.emplace_back(func(tensor)); + } + return var_list; + }, + py::call_guard()) .def("reset", &ReaderType::Reset, py::call_guard()) .def("shutdown", &ReaderType::Shutdown, @@ -372,34 +375,35 @@ void BindMultiDeviceReader(py::module *module, const char *reader_name) { void BindReader(py::module *module) { auto &m = *module; - m.def("diff_tensor_shape", [](const framework::LoDTensor &tensor, - const framework::VarDesc &var_desc, - size_t num_places) -> py::object { - auto diff = DiffTensorShapeWithVarDesc(tensor, var_desc, num_places); - if (diff) { - return py::cast(std::move(diff.get())); - } else { - return py::cast(nullptr); - } - }); - - m.def("init_lod_tensor_blocking_queue", - [](framework::Variable &var, size_t capacity, - bool is_ordered) -> py::object { - VLOG(1) << "init_lod_tensor_blocking_queue"; - if (is_ordered) { - auto *holder = var.GetMutable< - reader::OrderedMultiDeviceLoDTensorBlockingQueueHolder>(); - holder->InitOnce(capacity, FLAGS_reader_queue_speed_test_mode); - return py::cast(holder->GetQueue()); - } else { - auto *holder = - var.GetMutable(); - holder->InitOnce(capacity, FLAGS_reader_queue_speed_test_mode); - return py::cast(holder->GetQueue()); - } - }, - py::return_value_policy::copy); + m.def( + "diff_tensor_shape", + [](const framework::LoDTensor &tensor, const framework::VarDesc &var_desc, + size_t num_places) -> py::object { + auto diff = DiffTensorShapeWithVarDesc(tensor, var_desc, num_places); + if (diff) { + return py::cast(std::move(diff.get())); + } else { + return py::cast(nullptr); + } + }); + + m.def( + "init_lod_tensor_blocking_queue", + [](framework::Variable &var, size_t capacity, + bool is_ordered) -> py::object { + VLOG(1) << "init_lod_tensor_blocking_queue"; + if (is_ordered) { + auto *holder = var.GetMutable< + reader::OrderedMultiDeviceLoDTensorBlockingQueueHolder>(); + holder->InitOnce(capacity, FLAGS_reader_queue_speed_test_mode); + return py::cast(holder->GetQueue()); + } else { + auto *holder = var.GetMutable(); + holder->InitOnce(capacity, FLAGS_reader_queue_speed_test_mode); + return py::cast(holder->GetQueue()); + } + }, + py::return_value_policy::copy); py::class_(m, "Reader", "") .def("start", &framework::ReaderHolder::Start) @@ -408,12 +412,13 @@ void BindReader(py::module *module) { py::class_>( m, "LoDTensorBlockingQueue", "") - .def("push", - [](reader::LoDTensorBlockingQueue &self, - const std::vector &lod_tensor_vec) { - return self.Push(lod_tensor_vec); - }, - py::call_guard()) + .def( + "push", + [](reader::LoDTensorBlockingQueue &self, + const std::vector &lod_tensor_vec) { + return self.Push(lod_tensor_vec); + }, + py::call_guard()) .def("size", &reader::LoDTensorBlockingQueue::Size) .def("capacity", &reader::LoDTensorBlockingQueue::Cap) .def("close", &reader::LoDTensorBlockingQueue::Close) @@ -424,12 +429,13 @@ void BindReader(py::module *module) { py::class_>( m, "OrderedMultiDeviceLoDTensorBlockingQueue", "") - .def("push", - [](reader::OrderedMultiDeviceLoDTensorBlockingQueue &self, - const std::vector &lod_tensor_vec) { - return self.Push(lod_tensor_vec); - }, - py::call_guard()) + .def( + "push", + [](reader::OrderedMultiDeviceLoDTensorBlockingQueue &self, + const std::vector &lod_tensor_vec) { + return self.Push(lod_tensor_vec); + }, + py::call_guard()) .def("size", &reader::OrderedMultiDeviceLoDTensorBlockingQueue::Size) .def("capacity", &reader::OrderedMultiDeviceLoDTensorBlockingQueue::Cap) .def("close", &reader::OrderedMultiDeviceLoDTensorBlockingQueue::Close) @@ -444,19 +450,20 @@ void BindReader(py::module *module) { BindMultiDeviceReader( module, "OrderedMultiDeviceFeedReader"); - m.def("create_py_reader", - [](const std::shared_ptr &queue, - const std::vector &names, - const std::vector> &shapes, - const std::vector &dtypes, - const std::vector &need_check_feed, - const std::vector &dst_places, - bool use_double_buffer, bool drop_last, bool pin_memory) { - return new MultiDeviceFeedReader( - queue, names, shapes, dtypes, need_check_feed, dst_places, - use_double_buffer, drop_last, pin_memory); - }, - py::return_value_policy::take_ownership); + m.def( + "create_py_reader", + [](const std::shared_ptr &queue, + const std::vector &names, + const std::vector> &shapes, + const std::vector &dtypes, + const std::vector &need_check_feed, + const std::vector &dst_places, bool use_double_buffer, + bool drop_last, bool pin_memory) { + return new MultiDeviceFeedReader( + queue, names, shapes, dtypes, need_check_feed, dst_places, + use_double_buffer, drop_last, pin_memory); + }, + py::return_value_policy::take_ownership); m.def( "create_py_reader", diff --git a/paddle/fluid/pybind/slice_utils.h b/paddle/fluid/pybind/slice_utils.h index add332abd30..109f3e5705b 100644 --- a/paddle/fluid/pybind/slice_utils.h +++ b/paddle/fluid/pybind/slice_utils.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/scope_guard.h" #include "paddle/fluid/operators/utils.h" diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index 63b36bd9173..ed7ce64032b 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -15,12 +15,14 @@ limitations under the License. */ #pragma once #include + #include #include #include #include #include #include + #include "paddle/fluid/framework/data_type.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/memory/memcpy.h" diff --git a/paddle/fluid/pybind/uva_utils.h b/paddle/fluid/pybind/uva_utils.h index 94f55769b73..3ea3d7ee1a7 100644 --- a/paddle/fluid/pybind/uva_utils.h +++ b/paddle/fluid/pybind/uva_utils.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/fluid/operators/utils.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/compat/convert_utils.h" diff --git a/paddle/fluid/string/pretty_log.h b/paddle/fluid/string/pretty_log.h index 45fe89e8b5b..d161b2a912f 100644 --- a/paddle/fluid/string/pretty_log.h +++ b/paddle/fluid/string/pretty_log.h @@ -17,6 +17,6 @@ #include #include #include -#include "gflags/gflags.h" +#include "gflags/gflags.h" #include "paddle/utils/string/pretty_log.h" diff --git a/paddle/infrt/api/infrt_api.cc b/paddle/infrt/api/infrt_api.cc index f0bf46567a5..2f4bbd5df35 100644 --- a/paddle/infrt/api/infrt_api.cc +++ b/paddle/infrt/api/infrt_api.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +// clang-format off #include "paddle/infrt/api/infrt_api.h" #include @@ -61,6 +62,7 @@ #include "paddle/infrt/dialect/tensorrt/trt_op_teller_pass.h" #include "paddle/infrt/dialect/tensorrt/trt_type_convert_pass.h" #endif +// clang-format on using namespace infrt::host_context; // NOLINT using namespace infrt::tensor; // NOLINT diff --git a/paddle/infrt/backends/host/phi_context.h b/paddle/infrt/backends/host/phi_context.h index 2af1fab1008..880d1f03d87 100644 --- a/paddle/infrt/backends/host/phi_context.h +++ b/paddle/infrt/backends/host/phi_context.h @@ -35,12 +35,12 @@ class CpuPhiContext : public ::phi::CPUContext { class GpuPhiContext : public ::phi::GPUContext { public: using Base = ::phi::GPUContext; - using ::phi::GPUContext::SetStream; - using ::phi::GPUContext::SetEigenDevice; using ::phi::GPUContext::SetBlasHandle; using ::phi::GPUContext::SetDnnHandle; + using ::phi::GPUContext::SetEigenDevice; using ::phi::GPUContext::SetSolverHandle; using ::phi::GPUContext::SetSparseHandle; + using ::phi::GPUContext::SetStream; }; } // namespace backends diff --git a/paddle/infrt/backends/tensorrt/plugin/pool_op_plugin.cu b/paddle/infrt/backends/tensorrt/plugin/pool_op_plugin.cu index 5a53777c8e3..f3e2fe35074 100644 --- a/paddle/infrt/backends/tensorrt/plugin/pool_op_plugin.cu +++ b/paddle/infrt/backends/tensorrt/plugin/pool_op_plugin.cu @@ -199,8 +199,8 @@ bool PoolPlugin::isOutputBroadcastAcrossBatch(int32_t outputIndex, return false; } -bool PoolPlugin::canBroadcastInputAcrossBatch(int32_t inputIndex) const - noexcept { +bool PoolPlugin::canBroadcastInputAcrossBatch( + int32_t inputIndex) const noexcept { return false; } diff --git a/paddle/infrt/backends/tensorrt/plugin/pool_op_plugin.h b/paddle/infrt/backends/tensorrt/plugin/pool_op_plugin.h index 0da1d158453..34189f95438 100644 --- a/paddle/infrt/backends/tensorrt/plugin/pool_op_plugin.h +++ b/paddle/infrt/backends/tensorrt/plugin/pool_op_plugin.h @@ -114,10 +114,10 @@ class PoolPlugin : public nvinfer1::IPluginV2IOExt { char const* getPluginNamespace() const noexcept override; // IPluginV2Ext methods - nvinfer1::DataType getOutputDataType(int32_t index, - nvinfer1::DataType const* inputTypes, - int32_t nbInputs) const - noexcept override; + nvinfer1::DataType getOutputDataType( + int32_t index, + nvinfer1::DataType const* inputTypes, + int32_t nbInputs) const noexcept override; bool isOutputBroadcastAcrossBatch(int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept override; diff --git a/paddle/infrt/backends/tensorrt/test_trt_engine.cc b/paddle/infrt/backends/tensorrt/test_trt_engine.cc index 89dd3b0dc7a..7e081362f9c 100644 --- a/paddle/infrt/backends/tensorrt/test_trt_engine.cc +++ b/paddle/infrt/backends/tensorrt/test_trt_engine.cc @@ -12,13 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include - #include #include #include #include #include +#include + #include "paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h" #include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" #include "paddle/fluid/memory/allocation/allocator_facade.h" diff --git a/paddle/infrt/backends/tensorrt/trt_engine.cc b/paddle/infrt/backends/tensorrt/trt_engine.cc index a2d49546189..a539078e4af 100644 --- a/paddle/infrt/backends/tensorrt/trt_engine.cc +++ b/paddle/infrt/backends/tensorrt/trt_engine.cc @@ -18,6 +18,7 @@ #include #include #include + #include "paddle/phi/backends/dynload/tensorrt.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/core/ddim.h" diff --git a/paddle/infrt/backends/tensorrt/trt_engine.h b/paddle/infrt/backends/tensorrt/trt_engine.h index 41d11a71117..44f36a84cb5 100644 --- a/paddle/infrt/backends/tensorrt/trt_engine.h +++ b/paddle/infrt/backends/tensorrt/trt_engine.h @@ -17,6 +17,7 @@ #include #include + #include "paddle/infrt/backends/tensorrt/trt_options.h" #include "paddle/infrt/backends/tensorrt/trt_utils.h" #include "paddle/phi/backends/dynload/tensorrt.h" diff --git a/paddle/infrt/backends/tensorrt/trt_options.h b/paddle/infrt/backends/tensorrt/trt_options.h index d5190f5e622..b4e36da2058 100644 --- a/paddle/infrt/backends/tensorrt/trt_options.h +++ b/paddle/infrt/backends/tensorrt/trt_options.h @@ -15,12 +15,12 @@ #pragma once +#include + #include #include #include -#include - namespace infrt { namespace backends { namespace tensorrt { diff --git a/paddle/infrt/common/global.h b/paddle/infrt/common/global.h index e6586cb3a3c..2d7735d5252 100644 --- a/paddle/infrt/common/global.h +++ b/paddle/infrt/common/global.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/infrt/tensor/dense_host_tensor.h" namespace infrt { diff --git a/paddle/infrt/common/memory.h b/paddle/infrt/common/memory.h index 678529b8b78..643b2147761 100644 --- a/paddle/infrt/common/memory.h +++ b/paddle/infrt/common/memory.h @@ -15,9 +15,9 @@ #pragma once #include -#include #include +#include #include "paddle/infrt/common/macros.h" #include "paddle/infrt/common/target.h" diff --git a/paddle/infrt/dialect/dense_tensor.h b/paddle/infrt/dialect/dense_tensor.h index 7fbd1e8a4ef..8dec818a80a 100644 --- a/paddle/infrt/dialect/dense_tensor.h +++ b/paddle/infrt/dialect/dense_tensor.h @@ -13,6 +13,8 @@ // limitations under the License. #pragma once + +// clang-format off #include #include #include @@ -25,3 +27,4 @@ #define GET_OP_CLASSES #include "paddle/infrt/dialect/dense_tensor.hpp.inc" +// clang-format on diff --git a/paddle/infrt/dialect/diagnostic_utils.cc b/paddle/infrt/dialect/diagnostic_utils.cc index 4151001067e..8785ce69b8e 100644 --- a/paddle/infrt/dialect/diagnostic_utils.cc +++ b/paddle/infrt/dialect/diagnostic_utils.cc @@ -15,6 +15,7 @@ #include "paddle/infrt/dialect/diagnostic_utils.h" #include + #include namespace infrt { diff --git a/paddle/infrt/dialect/infrt/ir/infrt_dialect.cc b/paddle/infrt/dialect/infrt/ir/infrt_dialect.cc index c4f20cb4d35..0e3a10270cd 100644 --- a/paddle/infrt/dialect/infrt/ir/infrt_dialect.cc +++ b/paddle/infrt/dialect/infrt/ir/infrt_dialect.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +// clang-format off #include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h" #include @@ -60,6 +61,7 @@ void InfrtDialect::initialize() { #include "paddle/infrt/dialect/infrt/ir/test_kernels.cpp.inc" >(); } +// clang-format on /// Parse a type registered to this dialect. mlir::Type InfrtDialect::parseType(::mlir::DialectAsmParser &parser) const { diff --git a/paddle/infrt/dialect/infrt/ir/infrt_dialect.h b/paddle/infrt/dialect/infrt/ir/infrt_dialect.h index e2e9b9348eb..5a7c45b3205 100644 --- a/paddle/infrt/dialect/infrt/ir/infrt_dialect.h +++ b/paddle/infrt/dialect/infrt/ir/infrt_dialect.h @@ -23,8 +23,8 @@ #include #include #include -#include "paddle/infrt/dialect/infrt/common/types.h" +#include "paddle/infrt/dialect/infrt/common/types.h" #include "paddle/infrt/dialect/infrt/ir/infrt_opsDialect.h.inc" #define GET_TYPEDEF_CLASSES #include "paddle/infrt/dialect/infrt/ir/infrt_opsTypes.h.inc" diff --git a/paddle/infrt/dialect/infrt/pass/infrt_op_fuse_pass.cc b/paddle/infrt/dialect/infrt/pass/infrt_op_fuse_pass.cc index 63be5ca9095..309e0f8b940 100644 --- a/paddle/infrt/dialect/infrt/pass/infrt_op_fuse_pass.cc +++ b/paddle/infrt/dialect/infrt/pass/infrt_op_fuse_pass.cc @@ -15,6 +15,7 @@ #include "paddle/infrt/dialect/infrt/pass/infrt_op_fuse_pass.h" #include + #include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h" #include "paddle/infrt/dialect/pd/ir/pd_ops.h" namespace { diff --git a/paddle/infrt/dialect/init_dialects.cc b/paddle/infrt/dialect/init_dialects.cc index 8da34bd404b..c204f9ea626 100644 --- a/paddle/infrt/dialect/init_dialects.cc +++ b/paddle/infrt/dialect/init_dialects.cc @@ -19,12 +19,10 @@ #include "paddle/infrt/dialect/dense_tensor.h" #include "paddle/infrt/dialect/infrt/ir/basic_kernels.h" #include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h" - #include "paddle/infrt/dialect/pd/ir/pd_ops.h" #include "paddle/infrt/dialect/phi/ir/infrt_phi_tensor.h" #include "paddle/infrt/dialect/phi/ir/phi_base.h" #include "paddle/infrt/dialect/phi/ir/phi_kernels.h" - #include "paddle/infrt/dialect/tensor_shape.h" #include "paddle/infrt/dialect/tensorrt/trt_ops.h" diff --git a/paddle/infrt/dialect/mlir_loader.cc b/paddle/infrt/dialect/mlir_loader.cc index 19b8cba12df..ab533a25c41 100644 --- a/paddle/infrt/dialect/mlir_loader.cc +++ b/paddle/infrt/dialect/mlir_loader.cc @@ -20,10 +20,10 @@ #include #include #include -#include #include #include +#include #include #include diff --git a/paddle/infrt/dialect/mlir_loader.h b/paddle/infrt/dialect/mlir_loader.h index 5e50ad9e5a2..b4faba8068e 100644 --- a/paddle/infrt/dialect/mlir_loader.h +++ b/paddle/infrt/dialect/mlir_loader.h @@ -16,9 +16,9 @@ #include #include -#include #include +#include namespace infrt { namespace dialect { diff --git a/paddle/infrt/dialect/opt.cc b/paddle/infrt/dialect/opt.cc index 2006530958f..e57666ffca0 100644 --- a/paddle/infrt/dialect/opt.cc +++ b/paddle/infrt/dialect/opt.cc @@ -14,6 +14,7 @@ #include #include + #include "paddle/infrt/dialect/init_dialects.h" int main(int argc, char **argv) { diff --git a/paddle/infrt/dialect/pd/pass/pd_op_fuse_pass.cc b/paddle/infrt/dialect/pd/pass/pd_op_fuse_pass.cc index 8bdf957db27..c9247abe695 100644 --- a/paddle/infrt/dialect/pd/pass/pd_op_fuse_pass.cc +++ b/paddle/infrt/dialect/pd/pass/pd_op_fuse_pass.cc @@ -14,6 +14,7 @@ #include "paddle/infrt/dialect/pd/pass/pd_op_fuse_pass.h" // NOLINT #include + #include "paddle/infrt/dialect/pd/ir/pd_ops.h" namespace { diff --git a/paddle/infrt/dialect/phi/ir/infrt_phi_tensor.h b/paddle/infrt/dialect/phi/ir/infrt_phi_tensor.h index 9a92558daab..f7358db5bf3 100644 --- a/paddle/infrt/dialect/phi/ir/infrt_phi_tensor.h +++ b/paddle/infrt/dialect/phi/ir/infrt_phi_tensor.h @@ -14,6 +14,7 @@ #pragma once +// clang-format off #include #include #include @@ -37,3 +38,4 @@ // NOLINT #define GET_OP_CLASSES #include "paddle/infrt/dialect/phi/ir/infrt_phi_tensor.h.inc" +// clang-format on diff --git a/paddle/infrt/dialect/phi/ir/phi_base.cc b/paddle/infrt/dialect/phi/ir/phi_base.cc index 1bd6068d3fb..39a23529ac3 100644 --- a/paddle/infrt/dialect/phi/ir/phi_base.cc +++ b/paddle/infrt/dialect/phi/ir/phi_base.cc @@ -21,6 +21,7 @@ #include #include #include + #include "paddle/infrt/common/global.h" #include "paddle/infrt/dialect/phi/ir/infrt_phi_base.cpp.inc" #include "paddle/infrt/dialect/phi/ir/infrt_phi_baseDialect.cpp.inc" diff --git a/paddle/infrt/dialect/phi/ir/phi_base.h b/paddle/infrt/dialect/phi/ir/phi_base.h index 64cd08cc05e..2cbdef5af90 100644 --- a/paddle/infrt/dialect/phi/ir/phi_base.h +++ b/paddle/infrt/dialect/phi/ir/phi_base.h @@ -18,8 +18,8 @@ #include #include -#include "paddle/infrt/dialect/infrt/common/types.h" +#include "paddle/infrt/dialect/infrt/common/types.h" #include "paddle/infrt/dialect/phi/ir/infrt_phi_baseDialect.h.inc" #define GET_OP_CLASSES diff --git a/paddle/infrt/dialect/phi/ir/phi_kernels.cc b/paddle/infrt/dialect/phi/ir/phi_kernels.cc index c7a837b83fc..69c3f963391 100644 --- a/paddle/infrt/dialect/phi/ir/phi_kernels.cc +++ b/paddle/infrt/dialect/phi/ir/phi_kernels.cc @@ -13,12 +13,12 @@ // limitations under the License. #include "paddle/infrt/dialect/phi/ir/phi_kernels.h" + #include #include "paddle/infrt/dialect/phi/ir/phi_gpu_kernelsDialect.cpp.inc" #define GET_OP_CLASSES #include "paddle/infrt/dialect/phi/ir/phi_cpu_kernels.cpp.inc" // NOLINT - #include "paddle/infrt/dialect/phi/ir/phi_cpu_kernelsDialect.cpp.inc" #define GET_OP_CLASSES #include "paddle/infrt/dialect/phi/ir/phi_gpu_kernels.cpp.inc" // NOLINT diff --git a/paddle/infrt/dialect/phi/ir/phi_kernels.h b/paddle/infrt/dialect/phi/ir/phi_kernels.h index 4f8b41852cc..9321ebb148f 100644 --- a/paddle/infrt/dialect/phi/ir/phi_kernels.h +++ b/paddle/infrt/dialect/phi/ir/phi_kernels.h @@ -32,11 +32,9 @@ #include "paddle/infrt/dialect/dense_tensor.h" #include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h" #include "paddle/infrt/dialect/phi/ir/phi_base.h" - #include "paddle/infrt/dialect/phi/ir/phi_cpu_kernelsDialect.h.inc" #define GET_OP_CLASSES #include "paddle/infrt/dialect/phi/ir/phi_cpu_kernels.h.inc" - #include "paddle/infrt/dialect/phi/ir/phi_gpu_kernelsDialect.h.inc" #define GET_OP_CLASSES #include "paddle/infrt/dialect/phi/ir/phi_gpu_kernels.h.inc" diff --git a/paddle/infrt/dialect/phi/pass/kernel_op_desc.cc b/paddle/infrt/dialect/phi/pass/kernel_op_desc.cc index 9425a290142..ff870a06752 100644 --- a/paddle/infrt/dialect/phi/pass/kernel_op_desc.cc +++ b/paddle/infrt/dialect/phi/pass/kernel_op_desc.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/infrt/dialect/phi/pass/kernel_op_desc.h" + #include + #include "paddle/infrt/dialect/phi/data_type.h" #include "paddle/phi/core/type_defs.h" #include "paddle/phi/kernels/declarations.h" diff --git a/paddle/infrt/dialect/phi/pass/kernel_op_desc.h b/paddle/infrt/dialect/phi/pass/kernel_op_desc.h index cdc8f7cbff5..4385d3c9417 100644 --- a/paddle/infrt/dialect/phi/pass/kernel_op_desc.h +++ b/paddle/infrt/dialect/phi/pass/kernel_op_desc.h @@ -16,6 +16,7 @@ #include #include + #include "paddle/infrt/dialect/infrt/common/types.h" namespace infrt { diff --git a/paddle/infrt/dialect/phi/pass/kernel_op_desc_test.cc b/paddle/infrt/dialect/phi/pass/kernel_op_desc_test.cc index bd5f0799a60..24af0ea4378 100644 --- a/paddle/infrt/dialect/phi/pass/kernel_op_desc_test.cc +++ b/paddle/infrt/dialect/phi/pass/kernel_op_desc_test.cc @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +// clang-format off #include #include #include #include "paddle/infrt/dialect/phi/pass/kernel_op_desc.h" #include "paddle/phi/kernels/declarations.h" +// clang-format on namespace infrt { diff --git a/paddle/infrt/dialect/phi/pass/phi_op_convert_pass.cc b/paddle/infrt/dialect/phi/pass/phi_op_convert_pass.cc index 862c9ae4ee5..f4de56b42a6 100644 --- a/paddle/infrt/dialect/phi/pass/phi_op_convert_pass.cc +++ b/paddle/infrt/dialect/phi/pass/phi_op_convert_pass.cc @@ -20,6 +20,7 @@ #include #include #include + #include #include #include diff --git a/paddle/infrt/dialect/phi/pass/phi_op_convert_pass.h b/paddle/infrt/dialect/phi/pass/phi_op_convert_pass.h index a0e74426a40..9748e1679d3 100644 --- a/paddle/infrt/dialect/phi/pass/phi_op_convert_pass.h +++ b/paddle/infrt/dialect/phi/pass/phi_op_convert_pass.h @@ -14,6 +14,7 @@ #pragma once #include + #include "paddle/infrt/dialect/infrt/common/types.h" namespace infrt { diff --git a/paddle/infrt/dialect/phi/pass/proto_arg_map_context.h b/paddle/infrt/dialect/phi/pass/proto_arg_map_context.h index 7cb2651ccf6..30bde83cd81 100644 --- a/paddle/infrt/dialect/phi/pass/proto_arg_map_context.h +++ b/paddle/infrt/dialect/phi/pass/proto_arg_map_context.h @@ -15,7 +15,9 @@ limitations under the License. */ #pragma once #include + #include + #include "paddle/infrt/dialect/pd/common/pd_ops_info.h" #include "paddle/phi/core/compat/arg_map_context.h" diff --git a/paddle/infrt/dialect/phi/phi_exec.cc b/paddle/infrt/dialect/phi/phi_exec.cc index a2808a00cb6..0aae8cc9337 100644 --- a/paddle/infrt/dialect/phi/phi_exec.cc +++ b/paddle/infrt/dialect/phi/phi_exec.cc @@ -41,7 +41,9 @@ bool parse_inputs(int argc, *params_file_name = argv[2]; return true; } - default: { return false; } + default: { + return false; + } } } diff --git a/paddle/infrt/dialect/print_ir.cc b/paddle/infrt/dialect/print_ir.cc index b118a5f7a9c..a240cebe736 100644 --- a/paddle/infrt/dialect/print_ir.cc +++ b/paddle/infrt/dialect/print_ir.cc @@ -28,6 +28,7 @@ #include #include #include + #include #include "paddle/infrt/common/global.h" @@ -74,8 +75,8 @@ void printOperation(mlir::Operation *op, int indent) { if (!op->getAttrs().empty()) { printIndent(indent) << op->getAttrs().size() << " attributes:\n"; for (mlir::NamedAttribute attr : op->getAttrs()) { - printIndent(indent + 1) << "- {" << attr.getName() << " : " - << attr.getValue() << "}\n"; + printIndent(indent + 1) + << "- {" << attr.getName() << " : " << attr.getValue() << "}\n"; } } diff --git a/paddle/infrt/dialect/tensor_shape.cc b/paddle/infrt/dialect/tensor_shape.cc index 92c03818264..9a825224f1d 100644 --- a/paddle/infrt/dialect/tensor_shape.cc +++ b/paddle/infrt/dialect/tensor_shape.cc @@ -66,5 +66,4 @@ void TensorShapeDialect::printType(mlir::Type type, #define GET_OP_CLASSES #include "paddle/infrt/dialect/tensor_shape.cpp.inc" // NOLINT - #include "paddle/infrt/dialect/tensor_shape_dialect.cpp.inc" diff --git a/paddle/infrt/dialect/tensorrt/convert.h b/paddle/infrt/dialect/tensorrt/convert.h index 2a242ca285b..2dcd86486f5 100644 --- a/paddle/infrt/dialect/tensorrt/convert.h +++ b/paddle/infrt/dialect/tensorrt/convert.h @@ -20,6 +20,7 @@ #include #include #include + #include "paddle/infrt/dialect/infrt/common/types.h" #include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h" #include "paddle/infrt/dialect/pd/ir/pd_ops.h" diff --git a/paddle/infrt/dialect/tensorrt/trt_exec.cc b/paddle/infrt/dialect/tensorrt/trt_exec.cc index dcb84ceb50e..899e71f1c99 100644 --- a/paddle/infrt/dialect/tensorrt/trt_exec.cc +++ b/paddle/infrt/dialect/tensorrt/trt_exec.cc @@ -11,10 +11,14 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + +// clang-format off #include #include + #include #include + #include "paddle/infrt/common/global.h" #include "paddle/infrt/dialect/infrt/pass/infrt_weights_unfold_pass.h" #include "paddle/infrt/dialect/mlir_loader.h" @@ -44,6 +48,7 @@ #endif #include +// clang-format on int main(int argc, char** argv) { static llvm::cl::opt input_file( diff --git a/paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.cc b/paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.cc index bbe9a76e87b..7109fc772ec 100644 --- a/paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.cc +++ b/paddle/infrt/dialect/tensorrt/trt_graph_fuse_pass.cc @@ -18,6 +18,7 @@ #include #include #include + #include #include #include diff --git a/paddle/infrt/dialect/tensorrt/trt_graph_split_pass.cc b/paddle/infrt/dialect/tensorrt/trt_graph_split_pass.cc index d5ce871edd1..d74fe3e5e9c 100644 --- a/paddle/infrt/dialect/tensorrt/trt_graph_split_pass.cc +++ b/paddle/infrt/dialect/tensorrt/trt_graph_split_pass.cc @@ -15,6 +15,7 @@ #include "paddle/infrt/dialect/tensorrt/trt_graph_split_pass.h" #include + #include "paddle/infrt/dialect/pd/ir/pd_ops.h" namespace infrt { diff --git a/paddle/infrt/dialect/tensorrt/trt_op_teller_pass.cc b/paddle/infrt/dialect/tensorrt/trt_op_teller_pass.cc index d7b917385cf..35b869fb307 100644 --- a/paddle/infrt/dialect/tensorrt/trt_op_teller_pass.cc +++ b/paddle/infrt/dialect/tensorrt/trt_op_teller_pass.cc @@ -16,6 +16,7 @@ #include #include + #include "paddle/infrt/dialect/dense_tensor.h" #include "paddle/infrt/dialect/infrt/ir/basic_kernels.h" #include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h" diff --git a/paddle/infrt/dialect/tensorrt/trt_ops.cc b/paddle/infrt/dialect/tensorrt/trt_ops.cc index 415a78a6967..161fbbbcc65 100644 --- a/paddle/infrt/dialect/tensorrt/trt_ops.cc +++ b/paddle/infrt/dialect/tensorrt/trt_ops.cc @@ -11,6 +11,8 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + +// clang-format off #include "paddle/infrt/dialect/tensorrt/trt_ops.h" #include #include @@ -24,6 +26,7 @@ #include "paddle/infrt/dialect/dense_tensor.h" #include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h" #include "paddle/infrt/dialect/phi/ir/phi_base.h" +// clang-format on namespace infrt { namespace trt { diff --git a/paddle/infrt/dialect/tensorrt/trt_ops.h b/paddle/infrt/dialect/tensorrt/trt_ops.h index 76768037dbd..e851c26c43c 100644 --- a/paddle/infrt/dialect/tensorrt/trt_ops.h +++ b/paddle/infrt/dialect/tensorrt/trt_ops.h @@ -28,6 +28,7 @@ #include #include #include + #include "paddle/infrt/dialect/infrt/ir/basic_kernels.h" #include "paddle/infrt/dialect/infrt/ir/infrt_dialect.h" #include "paddle/infrt/dialect/pd/ir/pd_ops.h" diff --git a/paddle/infrt/dialect/tensorrt/trt_type_convert_pass.cc b/paddle/infrt/dialect/tensorrt/trt_type_convert_pass.cc index 35c81d02301..1cb7c4155b9 100644 --- a/paddle/infrt/dialect/tensorrt/trt_type_convert_pass.cc +++ b/paddle/infrt/dialect/tensorrt/trt_type_convert_pass.cc @@ -15,6 +15,7 @@ #include "paddle/infrt/dialect/tensorrt/trt_type_convert_pass.h" #include + #include #include "llvm/ADT/StringRef.h" diff --git a/paddle/infrt/host_context/core_runtime.cc b/paddle/infrt/host_context/core_runtime.cc index e3917bd07d2..3dbb0b41c9f 100644 --- a/paddle/infrt/host_context/core_runtime.cc +++ b/paddle/infrt/host_context/core_runtime.cc @@ -14,9 +14,8 @@ #include "paddle/infrt/host_context/core_runtime.h" -#include - #include +#include #include #include "paddle/infrt/host_context/kernel_registry.h" diff --git a/paddle/infrt/host_context/core_runtime.h b/paddle/infrt/host_context/core_runtime.h index acb6a66cac6..585369e249b 100644 --- a/paddle/infrt/host_context/core_runtime.h +++ b/paddle/infrt/host_context/core_runtime.h @@ -46,7 +46,7 @@ class CoreRuntime : public std::enable_shared_from_this { //! Get the results of the execution. llvm::SmallVector // - GetResults(llvm::ArrayRef arg_names); + GetResults(llvm::ArrayRef arg_names); std::shared_ptr getptr() { return std::shared_ptr(this); diff --git a/paddle/infrt/host_context/kernel_registry.cc b/paddle/infrt/host_context/kernel_registry.cc index 5693e973a3f..2518056ba9d 100644 --- a/paddle/infrt/host_context/kernel_registry.cc +++ b/paddle/infrt/host_context/kernel_registry.cc @@ -39,8 +39,8 @@ const std::vector &KernelRegistry::GetAttrNameList( void KernelRegistry::AddKernel(const std::string &key, KernelImplementation fn, const std::vector &attr_order) { - CHECK(!impl_->data.count(key)) << "kernel [" << key - << "] is registered twice"; + CHECK(!impl_->data.count(key)) + << "kernel [" << key << "] is registered twice"; impl_->data.emplace( key, std::make_pair([fn]() { return fn; }, std::move(attr_order))); } @@ -48,8 +48,8 @@ void KernelRegistry::AddKernel(const std::string &key, void KernelRegistry::AddKernel(const std::string &key, KernelLauncher fn, const std::vector &attr_order) { - CHECK(!impl_->data.count(key)) << "kernel [" << key - << "] is registered twice"; + CHECK(!impl_->data.count(key)) + << "kernel [" << key << "] is registered twice"; impl_->data.emplace(key, std::make_pair(std::move(fn), std::move(attr_order))); } diff --git a/paddle/infrt/host_context/mlir_exec.cc b/paddle/infrt/host_context/mlir_exec.cc index 6ad51a02bda..1ae7cdc742a 100644 --- a/paddle/infrt/host_context/mlir_exec.cc +++ b/paddle/infrt/host_context/mlir_exec.cc @@ -14,6 +14,7 @@ #include #include + #include #include diff --git a/paddle/infrt/host_context/mlir_program_executor.h b/paddle/infrt/host_context/mlir_program_executor.h index c2ccb90640b..7808c460457 100644 --- a/paddle/infrt/host_context/mlir_program_executor.h +++ b/paddle/infrt/host_context/mlir_program_executor.h @@ -19,10 +19,10 @@ #include #include #include -#include #include #include +#include #include "paddle/infrt/host_context/core_runtime.h" #include "paddle/infrt/host_context/kernel_registry.h" diff --git a/paddle/infrt/host_context/mlir_to_runtime_translate.cc b/paddle/infrt/host_context/mlir_to_runtime_translate.cc index 05bb28b7c56..9292e593a70 100644 --- a/paddle/infrt/host_context/mlir_to_runtime_translate.cc +++ b/paddle/infrt/host_context/mlir_to_runtime_translate.cc @@ -14,6 +14,7 @@ #include "paddle/infrt/host_context/mlir_to_runtime_translate.h" +#include #include #include #include @@ -23,7 +24,6 @@ #include #include -#include #include #include #include @@ -591,8 +591,8 @@ bool MlirToRuntimeTranslator::EmitCallOp(mlir::Operation* op, { // lookup the callee function auto it = table.find(callee_name.getValue().str()); - CHECK(it != table.end()) << "can't find function [" - << callee_name.getValue().str() << "]"; + CHECK(it != table.end()) + << "can't find function [" << callee_name.getValue().str() << "]"; auto* function = impl_->cur_op->CreateFunctionExecutable(it->second, &impl_->func_defs); impl_->cur_op->AppendAttribute(new Value(function)); diff --git a/paddle/infrt/host_context/op_executable.cc b/paddle/infrt/host_context/op_executable.cc index 4d588a9c2b5..b53dc0545c7 100644 --- a/paddle/infrt/host_context/op_executable.cc +++ b/paddle/infrt/host_context/op_executable.cc @@ -15,6 +15,7 @@ #include "paddle/infrt/host_context/op_executable.h" #include + #include #include diff --git a/paddle/infrt/host_context/op_executable.h b/paddle/infrt/host_context/op_executable.h index 550f6ab6349..b80b99fd414 100644 --- a/paddle/infrt/host_context/op_executable.h +++ b/paddle/infrt/host_context/op_executable.h @@ -16,6 +16,7 @@ #include #include #include + #include #include #include diff --git a/paddle/infrt/host_context/paddle_mlir.h b/paddle/infrt/host_context/paddle_mlir.h index 57bdc1b4857..629181cca3d 100644 --- a/paddle/infrt/host_context/paddle_mlir.h +++ b/paddle/infrt/host_context/paddle_mlir.h @@ -20,6 +20,7 @@ #include #include #include + #include #include #include diff --git a/paddle/infrt/host_context/paddle_mlir_converter.cc b/paddle/infrt/host_context/paddle_mlir_converter.cc index a2808a00cb6..0aae8cc9337 100644 --- a/paddle/infrt/host_context/paddle_mlir_converter.cc +++ b/paddle/infrt/host_context/paddle_mlir_converter.cc @@ -41,7 +41,9 @@ bool parse_inputs(int argc, *params_file_name = argv[2]; return true; } - default: { return false; } + default: { + return false; + } } } diff --git a/paddle/infrt/host_context/symbol_table.h b/paddle/infrt/host_context/symbol_table.h index 805215a78ce..8c79c78c690 100644 --- a/paddle/infrt/host_context/symbol_table.h +++ b/paddle/infrt/host_context/symbol_table.h @@ -14,9 +14,8 @@ #pragma once -#include - #include +#include #include "paddle/infrt/host_context/value.h" diff --git a/paddle/infrt/host_context/value.h b/paddle/infrt/host_context/value.h index 1834cb4c0db..af785c13349 100644 --- a/paddle/infrt/host_context/value.h +++ b/paddle/infrt/host_context/value.h @@ -159,15 +159,15 @@ class Value : public common::Object { template const T& get() const { - CHECK(data.template is()) << "typeid: " << data.index() - << " != " << ValueVariantType::IndexOf; + CHECK(data.template is()) + << "typeid: " << data.index() << " != " << ValueVariantType::IndexOf; return data.get(); } template T& get() { - CHECK(data.template is()) << "typeid: " << data.index() - << " != " << ValueVariantType::IndexOf; + CHECK(data.template is()) + << "typeid: " << data.index() << " != " << ValueVariantType::IndexOf; return data.get(); } diff --git a/paddle/infrt/kernel/phi/dense_tensor_kernels.cc b/paddle/infrt/kernel/phi/dense_tensor_kernels.cc index 95e25b243f3..8c49f47e7d8 100644 --- a/paddle/infrt/kernel/phi/dense_tensor_kernels.cc +++ b/paddle/infrt/kernel/phi/dense_tensor_kernels.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/infrt/kernel/phi/dense_tensor_kernels.h" + #include + #include "llvm/Support/ErrorHandling.h" #include "paddle/infrt/backends/host/phi_allocator.h" #include "paddle/infrt/common/string.h" diff --git a/paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launcher.cc b/paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launcher.cc index 2e40261f273..cb9640451f9 100644 --- a/paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launcher.cc +++ b/paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launcher.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/infrt/kernel/phi/infershaped/infershaped_kernel_launcher.h" + #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/meta_tensor.h" diff --git a/paddle/infrt/kernel/phi/infershaped/infershaped_utils.h b/paddle/infrt/kernel/phi/infershaped/infershaped_utils.h index 277c4ad6b7a..531d77ba952 100644 --- a/paddle/infrt/kernel/phi/infershaped/infershaped_utils.h +++ b/paddle/infrt/kernel/phi/infershaped/infershaped_utils.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/infrt/tensor/dense_host_tensor.h" #include "paddle/phi/core/dense_tensor.h" diff --git a/paddle/infrt/kernel/phi/infershaped/phi_kernel_launcher.h b/paddle/infrt/kernel/phi/infershaped/phi_kernel_launcher.h index d8702784720..bac25e0f437 100644 --- a/paddle/infrt/kernel/phi/infershaped/phi_kernel_launcher.h +++ b/paddle/infrt/kernel/phi/infershaped/phi_kernel_launcher.h @@ -14,6 +14,7 @@ #pragma once #include + #include #include "paddle/infrt/backends/host/phi_context.h" diff --git a/paddle/infrt/kernel/tensorrt/trt_kernels.cc b/paddle/infrt/kernel/tensorrt/trt_kernels.cc index c0f5ebb4a76..0ea68f2e835 100644 --- a/paddle/infrt/kernel/tensorrt/trt_kernels.cc +++ b/paddle/infrt/kernel/tensorrt/trt_kernels.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +// clang-format off #include "paddle/infrt/kernel/tensorrt/trt_kernels.h" #include #include @@ -36,6 +37,7 @@ #include "paddle/infrt/host_context/symbol_table.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/dense_tensor.h" +// clang-format on namespace infrt { namespace kernel { diff --git a/paddle/infrt/kernel/tensorrt/trt_kernels.h b/paddle/infrt/kernel/tensorrt/trt_kernels.h index bf23bd45c13..bf41c124a29 100644 --- a/paddle/infrt/kernel/tensorrt/trt_kernels.h +++ b/paddle/infrt/kernel/tensorrt/trt_kernels.h @@ -19,7 +19,6 @@ #include #include "mlir/IR/Operation.h" - #include "paddle/infrt/backends/tensorrt/trt_engine.h" #include "paddle/phi/backends/gpu/gpu_context.h" diff --git a/paddle/infrt/kernel/test_kernels.cc b/paddle/infrt/kernel/test_kernels.cc index bcf475d1bc0..e00afa4b790 100644 --- a/paddle/infrt/kernel/test_kernels.cc +++ b/paddle/infrt/kernel/test_kernels.cc @@ -92,11 +92,11 @@ class BenchmarkStats { std::sort(run_times_walltime_.begin(), run_times_walltime_.end()); std::sort(run_times_cpu_.begin(), run_times_cpu_.end()); - auto percentile = []( - double p, const std::vector &run_times) { - assert(p >= 0.0 && p <= 1.0); - return run_times[run_times.size() * p]; - }; + auto percentile = + [](double p, const std::vector &run_times) { + assert(p >= 0.0 && p <= 1.0); + return run_times[run_times.size() * p]; + }; // BM: prefix is added to make grepping results from lit output easier. std::string prefix; diff --git a/paddle/infrt/paddle/scope.h b/paddle/infrt/paddle/scope.h index 4ebf846374c..1f81d0914df 100644 --- a/paddle/infrt/paddle/scope.h +++ b/paddle/infrt/paddle/scope.h @@ -13,10 +13,9 @@ // limitations under the License. #pragma once -#include - #include #include +#include #include #include "paddle/infrt/common/macros.h" diff --git a/paddle/infrt/support/type_traits.h b/paddle/infrt/support/type_traits.h index 341dabb7c1c..33a42fe37ea 100644 --- a/paddle/infrt/support/type_traits.h +++ b/paddle/infrt/support/type_traits.h @@ -115,7 +115,8 @@ struct nonesuch { template class Op, + template + class Op, class... Args> struct detector : std::false_type { using value_t = std::false_type; diff --git a/paddle/infrt/tests/models/test_abs.cc b/paddle/infrt/tests/models/test_abs.cc index 89bbe78ffe2..aa5a2c6945b 100644 --- a/paddle/infrt/tests/models/test_abs.cc +++ b/paddle/infrt/tests/models/test_abs.cc @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +// clang-format off #include #include #include @@ -49,6 +50,7 @@ #include "paddle/infrt/dialect/phi/ir/infrt_phi_tensor.h" #include "paddle/infrt/dialect/phi/ir/phi_base.h" #include "paddle/infrt/dialect/phi/ir/phi_kernels.h" +// clang-format on static llvm::cl::list cl_shared_libs( // NOLINT "shared_libs", diff --git a/paddle/phi/api/ext/op_meta_info.h b/paddle/phi/api/ext/op_meta_info.h index a9475db8008..fa19714dde7 100644 --- a/paddle/phi/api/ext/op_meta_info.h +++ b/paddle/phi/api/ext/op_meta_info.h @@ -317,25 +317,24 @@ using InferShapeFunc = std::vector> (*)( const std::vector>>& vec_input_shapes, const std::vector& attrs); -#define PD_SPECIALIZE_InferShapeCallHelper_FOR_SHAPE(input_type) \ - template \ - struct InferShapeCallHelper { \ - template \ - static Return InferShape( \ - const std::vector>& input_shapes, \ - const std::vector>>& \ - vec_input_shapes, \ - const std::vector& attrs, \ - const PreviousArgs&... pargs) { \ - input_type arg = input_shapes[in_idx]; \ - return InferShapeCallHelper::template InferShape( \ - input_shapes, vec_input_shapes, attrs, pargs..., arg); \ - } \ +#define PD_SPECIALIZE_InferShapeCallHelper_FOR_SHAPE(input_type) \ + template \ + struct InferShapeCallHelper { \ + template \ + static Return InferShape( \ + const std::vector>& input_shapes, \ + const std::vector>>& \ + vec_input_shapes, \ + const std::vector& attrs, \ + const PreviousArgs&... pargs) { \ + input_type arg = input_shapes[in_idx]; \ + return InferShapeCallHelper:: \ + template InferShape( \ + input_shapes, vec_input_shapes, attrs, pargs..., arg); \ + } \ } #define PD_SPECIALIZE_InferShapeCallHelper_FOR_SHAPES(input_type) \ @@ -397,10 +396,8 @@ struct InferShapeFuncImpl { const std::vector>& input_shapes, const std::vector>>& vec_input_shapes, const std::vector& attrs) { - return InferShapeCallHelper>::template InferShape<0, - 0, - 0>( - input_shapes, vec_input_shapes, attrs); + return InferShapeCallHelper>:: + template InferShape<0, 0, 0>(input_shapes, vec_input_shapes, attrs); } private: @@ -482,20 +479,19 @@ using InferDtypeFunc = std::vector (*)( } \ } -#define PD_SPECIALIZE_InferDtypeCallHelper_FOR_DTYPES(input_type) \ - template \ - struct InferDtypeCallHelper { \ - template \ - static Return InferDtype( \ - const std::vector& input_dtypes, \ - const std::vector>& vec_input_dtypes, \ - const PreviousArgs&... pargs) { \ - input_type arg = vec_input_dtypes[vec_in_idx]; \ - return InferDtypeCallHelper::template InferDtype( \ - input_dtypes, vec_input_dtypes, pargs..., arg); \ - } \ +#define PD_SPECIALIZE_InferDtypeCallHelper_FOR_DTYPES(input_type) \ + template \ + struct InferDtypeCallHelper { \ + template \ + static Return InferDtype( \ + const std::vector& input_dtypes, \ + const std::vector>& vec_input_dtypes, \ + const PreviousArgs&... pargs) { \ + input_type arg = vec_input_dtypes[vec_in_idx]; \ + return InferDtypeCallHelper:: \ + template InferDtype( \ + input_dtypes, vec_input_dtypes, pargs..., arg); \ + } \ } template diff --git a/paddle/phi/api/lib/api_custom_impl.cc b/paddle/phi/api/lib/api_custom_impl.cc index 3ef7763d57e..5ca7f2b51ed 100644 --- a/paddle/phi/api/lib/api_custom_impl.cc +++ b/paddle/phi/api/lib/api_custom_impl.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/phi/api/lib/api_custom_impl.h" +#include "glog/logging.h" #include "paddle/phi/api/lib/api_gen_utils.h" #include "paddle/phi/api/lib/data_transform.h" #include "paddle/phi/api/lib/kernel_dispatch.h" @@ -28,8 +29,6 @@ limitations under the License. */ #include "paddle/phi/infermeta/nullary.h" #include "paddle/phi/infermeta/unary.h" -#include "glog/logging.h" - namespace paddle { namespace experimental { diff --git a/paddle/phi/api/lib/backend_set.h b/paddle/phi/api/lib/backend_set.h index 2aa4f969221..93f8f05b74b 100644 --- a/paddle/phi/api/lib/backend_set.h +++ b/paddle/phi/api/lib/backend_set.h @@ -32,8 +32,9 @@ class BackendSet final { public: constexpr BackendSet() : bitset_(0) {} explicit constexpr BackendSet(Backend b) - : bitset_(b == Backend::UNDEFINED ? 0 : 1ULL << (static_cast(b) - - 1)) {} + : bitset_(b == Backend::UNDEFINED + ? 0 + : 1ULL << (static_cast(b) - 1)) {} inline uint64_t bitset() const { return bitset_; } diff --git a/paddle/phi/api/lib/data_transform.cc b/paddle/phi/api/lib/data_transform.cc index 12f7b8bba58..4803616812c 100644 --- a/paddle/phi/api/lib/data_transform.cc +++ b/paddle/phi/api/lib/data_transform.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +// clang-format off #include "paddle/phi/api/lib/data_transform.h" #include "paddle/phi/api/lib/kernel_dispatch.h" @@ -23,6 +24,7 @@ limitations under the License. */ #include "paddle/phi/kernels/transfer_layout_kernel.h" #include "paddle/fluid/framework/tensor_util.h" +// clang-format on namespace paddle { namespace experimental { diff --git a/paddle/phi/api/lib/sparse_api_custom_impl.cc b/paddle/phi/api/lib/sparse_api_custom_impl.cc index 71ba8eaae2d..0b93c96e7f8 100644 --- a/paddle/phi/api/lib/sparse_api_custom_impl.cc +++ b/paddle/phi/api/lib/sparse_api_custom_impl.cc @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/phi/api/lib/sparse_api_custom_impl.h" #include + #include "glog/logging.h" #include "paddle/phi/api/lib/kernel_dispatch.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/api/lib/tensor.cc b/paddle/phi/api/lib/tensor.cc index a340c0fed10..74364d5ab03 100644 --- a/paddle/phi/api/lib/tensor.cc +++ b/paddle/phi/api/lib/tensor.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +// clang-format off #include "paddle/phi/api/include/tensor.h" #include @@ -34,6 +35,7 @@ limitations under the License. */ #include "paddle/phi/core/tensor_utils.h" #include "paddle/fluid/platform/stream/cuda_stream.h" +// clang-format off namespace paddle { namespace experimental { diff --git a/paddle/phi/api/lib/tensor_copy.cc b/paddle/phi/api/lib/tensor_copy.cc index 85de3601fd9..5f8c2ed71e9 100644 --- a/paddle/phi/api/lib/tensor_copy.cc +++ b/paddle/phi/api/lib/tensor_copy.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/api/lib/tensor_copy.h" + #include "paddle/phi/api/lib/api_gen_utils.h" #include "paddle/phi/api/lib/kernel_dispatch.h" #include "paddle/phi/core/compat/convert_utils.h" diff --git a/paddle/phi/api/lib/tensor_method.cc b/paddle/phi/api/lib/tensor_method.cc index 5285392b4a6..fbeeb3332ea 100644 --- a/paddle/phi/api/lib/tensor_method.cc +++ b/paddle/phi/api/lib/tensor_method.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +// clang-format off #include "paddle/phi/api/include/tensor.h" #include "paddle/phi/common/int_array.h" @@ -22,6 +23,7 @@ limitations under the License. */ #include "paddle/phi/api/lib/api_gen_utils.h" #include "paddle/phi/api/lib/kernel_dispatch.h" #include "paddle/phi/infermeta/unary.h" +// clang-format off namespace paddle { namespace experimental { diff --git a/paddle/phi/api/lib/utils/tensor_utils.h b/paddle/phi/api/lib/utils/tensor_utils.h index 36a0901bbe9..f930f5b11f6 100644 --- a/paddle/phi/api/lib/utils/tensor_utils.h +++ b/paddle/phi/api/lib/utils/tensor_utils.h @@ -18,7 +18,6 @@ limitations under the License. */ #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/variable.h" - #include "paddle/phi/api/lib/utils/allocator.h" #include "paddle/phi/common/int_array.h" #include "paddle/phi/common/scalar.h" diff --git a/paddle/phi/backends/callback_manager.cc b/paddle/phi/backends/callback_manager.cc index 4a958ef73bf..295f70fc65c 100644 --- a/paddle/phi/backends/callback_manager.cc +++ b/paddle/phi/backends/callback_manager.cc @@ -13,11 +13,12 @@ // limitations under the License. #include "paddle/phi/backends/callback_manager.h" -#include "paddle/fluid/platform/device/device_wrapper.h" -#include "paddle/fluid/platform/enforce.h" #include +#include "paddle/fluid/platform/device/device_wrapper.h" +#include "paddle/fluid/platform/enforce.h" + namespace phi { CallbackManager::CallbackManager(stream::Stream *stream) diff --git a/paddle/phi/backends/custom/custom_context.h b/paddle/phi/backends/custom/custom_context.h index 37b0ee21219..57be8534fa9 100644 --- a/paddle/phi/backends/custom/custom_context.h +++ b/paddle/phi/backends/custom/custom_context.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/phi/common/place.h" #include "paddle/phi/core/device_context.h" diff --git a/paddle/phi/backends/custom/custom_device_test.cc b/paddle/phi/backends/custom/custom_device_test.cc index 53b88f9b4ac..51fa74b4dc5 100644 --- a/paddle/phi/backends/custom/custom_device_test.cc +++ b/paddle/phi/backends/custom/custom_device_test.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include #include "paddle/fluid/framework/tensor.h" diff --git a/paddle/phi/backends/device_base.cc b/paddle/phi/backends/device_base.cc index b72c6efd51f..e57653702c5 100644 --- a/paddle/phi/backends/device_base.cc +++ b/paddle/phi/backends/device_base.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/backends/device_base.h" + #include "gflags/gflags.h" #include "glog/logging.h" #include "paddle/phi/core/enforce.h" @@ -214,8 +215,9 @@ size_t DeviceInterface::AllocSize(size_t dev_id, bool realloc) { size_t flag_mb = realloc ? FLAGS_reallocate_gpu_memory_in_mb : FLAGS_initial_gpu_memory_in_mb; size_t alloc_bytes = - (flag_mb > 0ul ? flag_mb << 20 : available_to_alloc * - FLAGS_fraction_of_gpu_memory_to_use); + (flag_mb > 0ul + ? flag_mb << 20 + : available_to_alloc * FLAGS_fraction_of_gpu_memory_to_use); PADDLE_ENFORCE_GE(available_to_alloc, alloc_bytes, phi::errors::ResourceExhausted( diff --git a/paddle/phi/backends/device_ext.h b/paddle/phi/backends/device_ext.h index 749d8d323b6..ff58f4f35fd 100644 --- a/paddle/phi/backends/device_ext.h +++ b/paddle/phi/backends/device_ext.h @@ -34,7 +34,9 @@ typedef enum { C_INTERNAL_ERROR // plugin error } C_Status; -typedef struct C_Device_st { int id; } * C_Device; +typedef struct C_Device_st { + int id; +} * C_Device; typedef struct C_Stream_st* C_Stream; diff --git a/paddle/phi/backends/device_manager.h b/paddle/phi/backends/device_manager.h index 18d51687ef1..56d99ba43bd 100644 --- a/paddle/phi/backends/device_manager.h +++ b/paddle/phi/backends/device_manager.h @@ -19,11 +19,10 @@ #include "paddle/phi/backends/device_base.h" #include "paddle/phi/backends/device_ext.h" +#include "paddle/phi/backends/dynload/port.h" #include "paddle/phi/backends/event.h" #include "paddle/phi/backends/stream.h" #include "paddle/phi/common/place.h" - -#include "paddle/phi/backends/dynload/port.h" #include "paddle/phi/core/utils/rw_lock.h" namespace phi { diff --git a/paddle/phi/backends/dynload/cublas.h b/paddle/phi/backends/dynload/cublas.h index ee0696fb4b2..308ae2accef 100644 --- a/paddle/phi/backends/dynload/cublas.h +++ b/paddle/phi/backends/dynload/cublas.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include // NOLINT #include diff --git a/paddle/phi/backends/dynload/cublasLt.h b/paddle/phi/backends/dynload/cublasLt.h index 4c7ac9c3f21..1e2a20ebdf4 100644 --- a/paddle/phi/backends/dynload/cublasLt.h +++ b/paddle/phi/backends/dynload/cublasLt.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include + #include // NOLINT #include diff --git a/paddle/phi/backends/dynload/cuda_driver.h b/paddle/phi/backends/dynload/cuda_driver.h index f4ea70a81b9..f743a33a186 100644 --- a/paddle/phi/backends/dynload/cuda_driver.h +++ b/paddle/phi/backends/dynload/cuda_driver.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include // NOLINT #include "paddle/phi/backends/dynload/dynamic_loader.h" diff --git a/paddle/phi/backends/dynload/cudnn.cc b/paddle/phi/backends/dynload/cudnn.cc index 02d626d5f98..8aa3b623273 100644 --- a/paddle/phi/backends/dynload/cudnn.cc +++ b/paddle/phi/backends/dynload/cudnn.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/backends/dynload/cudnn.h" + #include "paddle/fluid/platform/enforce.h" namespace phi { diff --git a/paddle/phi/backends/dynload/cudnn.h b/paddle/phi/backends/dynload/cudnn.h index a3afb98e3e6..7b9004308e9 100644 --- a/paddle/phi/backends/dynload/cudnn.h +++ b/paddle/phi/backends/dynload/cudnn.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #ifdef PADDLE_WITH_CUDA #include + #include // NOLINT #include "paddle/phi/backends/dynload/dynamic_loader.h" diff --git a/paddle/phi/backends/dynload/cufft.cc b/paddle/phi/backends/dynload/cufft.cc index 596a68c1ed6..5a7080032d2 100644 --- a/paddle/phi/backends/dynload/cufft.cc +++ b/paddle/phi/backends/dynload/cufft.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/backends/dynload/cufft.h" + #include "paddle/fluid/platform/enforce.h" namespace phi { diff --git a/paddle/phi/backends/dynload/cufft.h b/paddle/phi/backends/dynload/cufft.h index 4697e335477..a27d7c3ab1e 100644 --- a/paddle/phi/backends/dynload/cufft.h +++ b/paddle/phi/backends/dynload/cufft.h @@ -17,6 +17,7 @@ limitations under the License. */ #include #include #include + #include // NOLINT #include "paddle/phi/backends/dynload/dynamic_loader.h" diff --git a/paddle/phi/backends/dynload/cupti.h b/paddle/phi/backends/dynload/cupti.h index a526fbfd926..22e21b78f4f 100644 --- a/paddle/phi/backends/dynload/cupti.h +++ b/paddle/phi/backends/dynload/cupti.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include // NOLINT #include "paddle/phi/backends/dynload/dynamic_loader.h" diff --git a/paddle/phi/backends/dynload/curand.h b/paddle/phi/backends/dynload/curand.h index 875403b03bb..f3c4496dc4d 100644 --- a/paddle/phi/backends/dynload/curand.h +++ b/paddle/phi/backends/dynload/curand.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include // NOLINT #include "paddle/phi/backends/dynload/dynamic_loader.h" diff --git a/paddle/phi/backends/dynload/cusolver.h b/paddle/phi/backends/dynload/cusolver.h index 40e5f183dc0..1354e310554 100644 --- a/paddle/phi/backends/dynload/cusolver.h +++ b/paddle/phi/backends/dynload/cusolver.h @@ -15,6 +15,7 @@ limitations under the License. */ #include #include + #include // NOLINT #include "paddle/phi/backends/dynload/dynamic_loader.h" diff --git a/paddle/phi/backends/dynload/cusparse.h b/paddle/phi/backends/dynload/cusparse.h index 8f7d54d55db..a7e305f98d4 100644 --- a/paddle/phi/backends/dynload/cusparse.h +++ b/paddle/phi/backends/dynload/cusparse.h @@ -15,6 +15,7 @@ limitations under the License. */ #include #include + #include // NOLINT #include "paddle/phi/backends/dynload/dynamic_loader.h" diff --git a/paddle/phi/backends/dynload/hiprand.h b/paddle/phi/backends/dynload/hiprand.h index ccaf02d9304..3e9502dd94d 100644 --- a/paddle/phi/backends/dynload/hiprand.h +++ b/paddle/phi/backends/dynload/hiprand.h @@ -16,9 +16,9 @@ limitations under the License. */ #include #include // NOLINT -#include "paddle/phi/backends/dynload/port.h" #include "paddle/phi/backends/dynload/dynamic_loader.h" +#include "paddle/phi/backends/dynload/port.h" namespace phi { namespace dynload { diff --git a/paddle/phi/backends/dynload/hiprtc.h b/paddle/phi/backends/dynload/hiprtc.h index 0404aad5593..75dd88f87bd 100644 --- a/paddle/phi/backends/dynload/hiprtc.h +++ b/paddle/phi/backends/dynload/hiprtc.h @@ -15,7 +15,9 @@ limitations under the License. */ #pragma once #include + #include // NOLINT + #include "paddle/phi/backends/dynload/dynamic_loader.h" #include "paddle/phi/backends/dynload/port.h" diff --git a/paddle/phi/backends/dynload/lapack.cc b/paddle/phi/backends/dynload/lapack.cc index bb03beabd4f..9719da97751 100644 --- a/paddle/phi/backends/dynload/lapack.cc +++ b/paddle/phi/backends/dynload/lapack.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/backends/dynload/lapack.h" + #include namespace phi { diff --git a/paddle/phi/backends/dynload/lapack.h b/paddle/phi/backends/dynload/lapack.h index c81c66c6928..f0e1e9ad7a4 100644 --- a/paddle/phi/backends/dynload/lapack.h +++ b/paddle/phi/backends/dynload/lapack.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/fluid/platform/complex.h" #include "paddle/phi/backends/dynload/dynamic_loader.h" #include "paddle/phi/backends/dynload/port.h" diff --git a/paddle/phi/backends/dynload/miopen.cc b/paddle/phi/backends/dynload/miopen.cc index e7916873ccf..9c58da1d6ff 100644 --- a/paddle/phi/backends/dynload/miopen.cc +++ b/paddle/phi/backends/dynload/miopen.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/backends/dynload/miopen.h" + #include "paddle/fluid/platform/enforce.h" namespace phi { diff --git a/paddle/phi/backends/dynload/miopen.h b/paddle/phi/backends/dynload/miopen.h index eb14bfe8ec5..eeaf8028ec3 100644 --- a/paddle/phi/backends/dynload/miopen.h +++ b/paddle/phi/backends/dynload/miopen.h @@ -14,10 +14,11 @@ limitations under the License. */ #pragma once #include - #include #include + #include // NOLINT + #include "paddle/phi/backends/dynload/dynamic_loader.h" #include "paddle/phi/backends/dynload/port.h" diff --git a/paddle/phi/backends/dynload/mklml.h b/paddle/phi/backends/dynload/mklml.h index 5f5520a831e..0f0c31f8064 100644 --- a/paddle/phi/backends/dynload/mklml.h +++ b/paddle/phi/backends/dynload/mklml.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include // NOLINT #include "paddle/phi/backends/dynload/dynamic_loader.h" diff --git a/paddle/phi/backends/dynload/mklrt.h b/paddle/phi/backends/dynload/mklrt.h index 8638d83d025..0267fb69a59 100644 --- a/paddle/phi/backends/dynload/mklrt.h +++ b/paddle/phi/backends/dynload/mklrt.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include // NOLINT #include "paddle/phi/backends/dynload/dynamic_loader.h" diff --git a/paddle/phi/backends/dynload/nccl.h b/paddle/phi/backends/dynload/nccl.h index b04ef0f0651..6c73c562caa 100644 --- a/paddle/phi/backends/dynload/nccl.h +++ b/paddle/phi/backends/dynload/nccl.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include // NOLINT #include "paddle/phi/backends/dynload/dynamic_loader.h" diff --git a/paddle/phi/backends/dynload/nvjpeg.h b/paddle/phi/backends/dynload/nvjpeg.h index 13bb8a5698f..6e71e6b582c 100644 --- a/paddle/phi/backends/dynload/nvjpeg.h +++ b/paddle/phi/backends/dynload/nvjpeg.h @@ -12,6 +12,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_CUDA #include + #include // NOLINT #include "paddle/phi/backends/dynload/dynamic_loader.h" diff --git a/paddle/phi/backends/dynload/nvrtc.h b/paddle/phi/backends/dynload/nvrtc.h index 516ca7686d2..9244e9487b2 100644 --- a/paddle/phi/backends/dynload/nvrtc.h +++ b/paddle/phi/backends/dynload/nvrtc.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include // NOLINT #include "paddle/phi/backends/dynload/dynamic_loader.h" diff --git a/paddle/phi/backends/dynload/nvtx.h b/paddle/phi/backends/dynload/nvtx.h index e9fd32668dc..a9a166b289e 100644 --- a/paddle/phi/backends/dynload/nvtx.h +++ b/paddle/phi/backends/dynload/nvtx.h @@ -15,6 +15,7 @@ limitations under the License. */ #ifndef _WIN32 #include #include + #include // NOLINT #include "paddle/phi/backends/dynload/dynamic_loader.h" diff --git a/paddle/phi/backends/dynload/port.h b/paddle/phi/backends/dynload/port.h index 981e5f5af64..d380993c9b6 100644 --- a/paddle/phi/backends/dynload/port.h +++ b/paddle/phi/backends/dynload/port.h @@ -28,6 +28,7 @@ #include // dladdr #include #include + #include // std::accumulate #else #ifndef NOMINMAX @@ -40,6 +41,7 @@ #include #include #include + #include // std::accumulate in msvc #ifndef S_ISDIR // windows port for sys/stat.h #define S_ISDIR(mode) (((mode)&S_IFMT) == S_IFDIR) diff --git a/paddle/phi/backends/dynload/rccl.h b/paddle/phi/backends/dynload/rccl.h index 44726849628..2da35dc2df2 100644 --- a/paddle/phi/backends/dynload/rccl.h +++ b/paddle/phi/backends/dynload/rccl.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include // NOLINT + #include "paddle/phi/backends/dynload/dynamic_loader.h" #include "paddle/phi/backends/dynload/port.h" diff --git a/paddle/phi/backends/dynload/rocblas.h b/paddle/phi/backends/dynload/rocblas.h index 18061b192e4..a9804b3d82a 100644 --- a/paddle/phi/backends/dynload/rocblas.h +++ b/paddle/phi/backends/dynload/rocblas.h @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include // NOLINT #include diff --git a/paddle/phi/backends/dynload/rocm_driver.h b/paddle/phi/backends/dynload/rocm_driver.h index 59e35b787a5..4e456db44c9 100644 --- a/paddle/phi/backends/dynload/rocm_driver.h +++ b/paddle/phi/backends/dynload/rocm_driver.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include // NOLINT #include "paddle/phi/backends/dynload/dynamic_loader.h" diff --git a/paddle/phi/backends/dynload/tensorrt.cc b/paddle/phi/backends/dynload/tensorrt.cc index cc3b4e01460..45525701020 100644 --- a/paddle/phi/backends/dynload/tensorrt.cc +++ b/paddle/phi/backends/dynload/tensorrt.cc @@ -13,6 +13,7 @@ limitations under the License. */ #include "paddle/phi/backends/dynload/tensorrt.h" + #include namespace phi { diff --git a/paddle/phi/backends/event.cc b/paddle/phi/backends/event.cc index a474536f865..43077d280f3 100644 --- a/paddle/phi/backends/event.cc +++ b/paddle/phi/backends/event.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/backends/event.h" + #include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/phi/backends/device_guard.h" #include "paddle/phi/backends/stream.h" diff --git a/paddle/phi/backends/gpu/cuda/cuda_helper.h b/paddle/phi/backends/gpu/cuda/cuda_helper.h index 08670832c77..c62addfd257 100644 --- a/paddle/phi/backends/gpu/cuda/cuda_helper.h +++ b/paddle/phi/backends/gpu/cuda/cuda_helper.h @@ -60,7 +60,7 @@ namespace gpu { * } * } * -*/ + */ #define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \ int64_t __index__ = blockIdx.x * blockDim.x + threadIdx.x; \ diff --git a/paddle/phi/backends/gpu/gpu_context.cc b/paddle/phi/backends/gpu/gpu_context.cc index e8c264b884f..f51f287ee4a 100644 --- a/paddle/phi/backends/gpu/gpu_context.cc +++ b/paddle/phi/backends/gpu/gpu_context.cc @@ -23,7 +23,6 @@ limitations under the License. */ #include #include "glog/logging.h" - #include "paddle/phi/api/ext/exception.h" #include "paddle/phi/backends/gpu/gpu_decls.h" #include "paddle/phi/backends/gpu/gpu_info.h" diff --git a/paddle/phi/backends/gpu/gpu_context.h b/paddle/phi/backends/gpu/gpu_context.h index db9f287041d..5246155131d 100644 --- a/paddle/phi/backends/gpu/gpu_context.h +++ b/paddle/phi/backends/gpu/gpu_context.h @@ -18,6 +18,7 @@ limitations under the License. */ #include #include #include + #include "paddle/phi/backends/gpu/forwards.h" #include "paddle/phi/backends/gpu/gpu_decls.h" #include "paddle/phi/backends/gpu/gpu_helper.h" diff --git a/paddle/phi/backends/gpu/gpu_info.h b/paddle/phi/backends/gpu/gpu_info.h index 443830acf47..323565c000a 100644 --- a/paddle/phi/backends/gpu/gpu_info.h +++ b/paddle/phi/backends/gpu/gpu_info.h @@ -14,6 +14,7 @@ limitations under the License. */ #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #include + #include #include #include diff --git a/paddle/phi/backends/gpu/gpu_launch_config.h b/paddle/phi/backends/gpu/gpu_launch_config.h index 888b44632ea..2dd1431ff58 100644 --- a/paddle/phi/backends/gpu/gpu_launch_config.h +++ b/paddle/phi/backends/gpu/gpu_launch_config.h @@ -25,9 +25,11 @@ #endif #include + #include #include #include + #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/enforce.h" @@ -95,9 +97,9 @@ struct GpuLaunchConfig { }; /* According to NVIDIA, if number of threads per block is 64/128/256/512, - * cuda performs better. And number of blocks should be greater (at least - * 2x~4x) than number of SMs. Hence, SM count is took into account within - * this function to determine the right number of threads per block. */ + * cuda performs better. And number of blocks should be greater (at least + * 2x~4x) than number of SMs. Hence, SM count is took into account within + * this function to determine the right number of threads per block. */ inline GpuLaunchConfig GetGpuLaunchConfig1D(const phi::GPUContext& context, int64_t numel, int vec_size = 1) { diff --git a/paddle/phi/backends/gpu/gpu_resources.h b/paddle/phi/backends/gpu/gpu_resources.h index 07ccb621540..7bec5eebf58 100644 --- a/paddle/phi/backends/gpu/gpu_resources.h +++ b/paddle/phi/backends/gpu/gpu_resources.h @@ -14,6 +14,7 @@ #pragma once #include + #include "paddle/phi/backends/gpu/gpu_decls.h" #include "paddle/phi/common/place.h" diff --git a/paddle/phi/backends/gpu/rocm/rocm_helper.h b/paddle/phi/backends/gpu/rocm/rocm_helper.h index 2d75b6ea4cb..14e9ca660bd 100644 --- a/paddle/phi/backends/gpu/rocm/rocm_helper.h +++ b/paddle/phi/backends/gpu/rocm/rocm_helper.h @@ -60,7 +60,7 @@ namespace gpu { * } * } * -*/ + */ #define CUDA_KERNEL_LOOP_TYPE(i, num, index_type) \ int64_t __index__ = hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x; \ diff --git a/paddle/phi/backends/gpu/rocm/rocm_info.cc b/paddle/phi/backends/gpu/rocm/rocm_info.cc index 23e58d34b25..b89d5a3c162 100644 --- a/paddle/phi/backends/gpu/rocm/rocm_info.cc +++ b/paddle/phi/backends/gpu/rocm/rocm_info.cc @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "paddle/phi/backends/gpu/gpu_info.h" // TODO(phi): remove fluid headers. diff --git a/paddle/phi/backends/stream.cc b/paddle/phi/backends/stream.cc index 30939f31fcc..f8b15bdbd9e 100644 --- a/paddle/phi/backends/stream.cc +++ b/paddle/phi/backends/stream.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/backends/stream.h" + #include "paddle/fluid/platform/device/device_wrapper.h" #include "paddle/phi/backends/device_guard.h" #include "paddle/phi/backends/event.h" diff --git a/paddle/phi/backends/xpu/enforce_xpu.h b/paddle/phi/backends/xpu/enforce_xpu.h index 29b048ead85..30095e3a007 100644 --- a/paddle/phi/backends/xpu/enforce_xpu.h +++ b/paddle/phi/backends/xpu/enforce_xpu.h @@ -14,11 +14,10 @@ limitations under the License. */ #pragma once +#include "paddle/fluid/platform/enforce.h" #include "paddle/phi/backends/xpu/xpu_header.h" #include "xpu/bkcl.h" -#include "paddle/fluid/platform/enforce.h" - namespace phi { namespace backends { namespace xpu { diff --git a/paddle/phi/backends/xpu/xpu_context.cc b/paddle/phi/backends/xpu/xpu_context.cc index 7cc9eb44bc4..dbff88c0a27 100644 --- a/paddle/phi/backends/xpu/xpu_context.cc +++ b/paddle/phi/backends/xpu/xpu_context.cc @@ -18,7 +18,6 @@ #include "paddle/phi/api/ext/exception.h" #include "paddle/phi/common/place.h" - #include "xpu/runtime.h" #include "xpu/runtime_ex.h" #include "xpu/xdnn.h" @@ -86,8 +85,8 @@ struct XPUContext::Impl { void Init() { owned_ = true; backends::xpu::XPUDeviceGuard guard(place_.GetDeviceId()); - LOG_FIRST_N(WARNING, 1) << "Please NOTE: xpu device: " - << static_cast(place_.device); + LOG_FIRST_N(WARNING, 1) + << "Please NOTE: xpu device: " << static_cast(place_.device); context_ = xpu::create_context(); xpu_version_ = backends::xpu::get_xpu_version(place_.device); SetL3Cache(); diff --git a/paddle/phi/backends/xpu/xpu_context.h b/paddle/phi/backends/xpu/xpu_context.h index b87489c567c..d39b3c9cc1f 100644 --- a/paddle/phi/backends/xpu/xpu_context.h +++ b/paddle/phi/backends/xpu/xpu_context.h @@ -15,12 +15,12 @@ limitations under the License. */ #pragma once #include -#include "paddle/phi/backends/xpu/forwards.h" -#include "paddle/phi/common/place.h" -#include "paddle/phi/core/device_context.h" +#include "paddle/phi/backends/xpu/forwards.h" #include "paddle/phi/backends/xpu/xpu_header.h" #include "paddle/phi/backends/xpu/xpu_info.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/device_context.h" namespace xpu = baidu::xpu::api; diff --git a/paddle/phi/backends/xpu/xpu_header.h b/paddle/phi/backends/xpu/xpu_header.h index 5337f78c642..1fe6f6d0779 100644 --- a/paddle/phi/backends/xpu/xpu_header.h +++ b/paddle/phi/backends/xpu/xpu_header.h @@ -22,7 +22,6 @@ limitations under the License. */ #include "paddle/fluid/platform/enforce.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/float16.h" - #include "xpu/runtime.h" #include "xpu/runtime_ex.h" #include "xpu/xdnn.h" diff --git a/paddle/phi/backends/xpu/xpu_info.h b/paddle/phi/backends/xpu/xpu_info.h index b1056cdc4b1..9d5f073eaa8 100644 --- a/paddle/phi/backends/xpu/xpu_info.h +++ b/paddle/phi/backends/xpu/xpu_info.h @@ -12,6 +12,7 @@ limitations under the License. */ #include #include + #include "paddle/phi/common/place.h" namespace phi { diff --git a/paddle/phi/common/data_type.h b/paddle/phi/common/data_type.h index 1792cb93706..ef9b4250482 100644 --- a/paddle/phi/common/data_type.h +++ b/paddle/phi/common/data_type.h @@ -14,11 +14,10 @@ limitations under the License. */ #pragma once +#include "paddle/phi/api/ext/exception.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/complex.h" #include "paddle/phi/common/float16.h" - -#include "paddle/phi/api/ext/exception.h" #include "paddle/phi/common/pstring.h" namespace paddle { diff --git a/paddle/phi/common/int_array.cc b/paddle/phi/common/int_array.cc index daed2b6625a..81701ee010c 100644 --- a/paddle/phi/common/int_array.cc +++ b/paddle/phi/common/int_array.cc @@ -14,9 +14,8 @@ limitations under the License. */ #include "paddle/phi/common/int_array.h" -#include "paddle/phi/common/place.h" - #include "paddle/fluid/framework/tensor_util.h" +#include "paddle/phi/common/place.h" namespace paddle { namespace experimental { diff --git a/paddle/phi/common/place.cc b/paddle/phi/common/place.cc index 667d0a32b93..c15a17651b1 100644 --- a/paddle/phi/common/place.cc +++ b/paddle/phi/common/place.cc @@ -19,7 +19,6 @@ limitations under the License. */ #include #include "glog/logging.h" - #include "paddle/phi/api/ext/exception.h" namespace phi { diff --git a/paddle/phi/common/scalar.cc b/paddle/phi/common/scalar.cc index 41f1c954182..2954af086ac 100644 --- a/paddle/phi/common/scalar.cc +++ b/paddle/phi/common/scalar.cc @@ -14,11 +14,10 @@ limitations under the License. */ #include "paddle/phi/common/scalar.h" -#include "paddle/phi/common/place.h" -#include "paddle/phi/core/enforce.h" - #include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/enforce.h" namespace paddle { namespace experimental { diff --git a/paddle/phi/core/compat/op_utils.h b/paddle/phi/core/compat/op_utils.h index 8eb6524e79c..ae3b8924ece 100644 --- a/paddle/phi/core/compat/op_utils.h +++ b/paddle/phi/core/compat/op_utils.h @@ -18,7 +18,6 @@ limitations under the License. */ #include #include "glog/logging.h" - #include "paddle/phi/core/compat/arg_map_context.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/phi/core/ddim.h b/paddle/phi/core/ddim.h index dd13081ddaf..794d7051aee 100644 --- a/paddle/phi/core/ddim.h +++ b/paddle/phi/core/ddim.h @@ -238,10 +238,10 @@ int arity(const DDim& ddim); std::ostream& operator<<(std::ostream&, const DDim&); /** -* \brief Flatten dim to 3d -* e.g., DDim d = mak_ddim({1, 2, 3, 4, 5, 6}) -* flatten_to_3d(d, 2, 4); ===> {1*2, 3*4, 5*6} ===> {2, 12, 30} -*/ + * \brief Flatten dim to 3d + * e.g., DDim d = mak_ddim({1, 2, 3, 4, 5, 6}) + * flatten_to_3d(d, 2, 4); ===> {1*2, 3*4, 5*6} ===> {2, 12, 30} + */ DDim flatten_to_3d(const DDim& src, int num_row_dims, int num_col_dims); // Reshape a tensor to a matrix. The matrix's first dimension(column length) diff --git a/paddle/phi/core/dense_tensor.h b/paddle/phi/core/dense_tensor.h index 06d3e435bc1..09098705b11 100644 --- a/paddle/phi/core/dense_tensor.h +++ b/paddle/phi/core/dense_tensor.h @@ -20,7 +20,7 @@ limitations under the License. */ #include "paddle/phi/core/tensor_meta.h" /* @jim19930609: Move to MKLDNN_Tensor in the future - */ + */ #ifdef PADDLE_WITH_MKLDNN #include "dnnl.hpp" #endif diff --git a/paddle/phi/core/dense_tensor_impl.cc b/paddle/phi/core/dense_tensor_impl.cc index 8c97b6bf223..a59b910b7e0 100644 --- a/paddle/phi/core/dense_tensor_impl.cc +++ b/paddle/phi/core/dense_tensor_impl.cc @@ -12,15 +12,12 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/phi/core/dense_tensor.h" - +#include "paddle/fluid/memory/malloc.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/complex.h" #include "paddle/phi/common/float16.h" - #include "paddle/phi/core/compat/convert_utils.h" - -#include "paddle/fluid/memory/malloc.h" +#include "paddle/phi/core/dense_tensor.h" #ifdef PADDLE_WITH_MKLDNN #include "paddle/fluid/platform/mkldnn_utils.h" diff --git a/paddle/phi/core/device_context.cc b/paddle/phi/core/device_context.cc index 0f5f22b5bd1..ce57f4f627b 100644 --- a/paddle/phi/core/device_context.cc +++ b/paddle/phi/core/device_context.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/core/device_context.h" + #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/selected_rows.h" diff --git a/paddle/phi/core/device_context.h b/paddle/phi/core/device_context.h index d7c2c777ca6..45e4fbf64dc 100644 --- a/paddle/phi/core/device_context.h +++ b/paddle/phi/core/device_context.h @@ -75,17 +75,17 @@ class PADDLE_API DeviceContext { void SetHostAllocator(const Allocator*); /** - * @brief Set the zero-size Allocator object. - * - * @param allocator - */ + * @brief Set the zero-size Allocator object. + * + * @param allocator + */ void SetZeroAllocator(const Allocator*); /** - * @brief Set the zero-size Allocator object. - * - * @param allocator - */ + * @brief Set the zero-size Allocator object. + * + * @param allocator + */ void SetPinnedAllocator(const Allocator*); /** @@ -135,10 +135,10 @@ class PADDLE_API DeviceContext { virtual void Wait() const {} /** - * @brief Set the generator for special op. - * - * @param Generator - */ + * @brief Set the generator for special op. + * + * @param Generator + */ void SetGenerator(Generator*); /** * @brief Get the generator object. @@ -148,10 +148,10 @@ class PADDLE_API DeviceContext { Generator* GetGenerator() const; /** - * @brief Set the host generator for special op. - * - * @param Generator - */ + * @brief Set the host generator for special op. + * + * @param Generator + */ void SetHostGenerator(Generator*); /** * @brief Get the host generator object. diff --git a/paddle/phi/core/enforce.cc b/paddle/phi/core/enforce.cc index ae6b0135b32..91e0316ff75 100644 --- a/paddle/phi/core/enforce.cc +++ b/paddle/phi/core/enforce.cc @@ -14,13 +14,12 @@ limitations under the License. */ #include "paddle/phi/core/enforce.h" +#include #include #include #include #include -#include - // is not suitable to be placed in the header file, // it will introduce a large number of unnecessary includes, and these type // declarations that depend on boost are also not suitable for the phi header diff --git a/paddle/phi/core/hostdevice.h b/paddle/phi/core/hostdevice.h index 0869df14323..decebbe66a5 100644 --- a/paddle/phi/core/hostdevice.h +++ b/paddle/phi/core/hostdevice.h @@ -20,6 +20,7 @@ #if defined(__xpu__) #include + #include "xpu/kernel/cluster_header.h" #include "xpu/kernel/debug.h" #include "xpu/kernel/math.h" diff --git a/paddle/phi/core/kernel_factory.cc b/paddle/phi/core/kernel_factory.cc index d479147f06b..d864544e10d 100644 --- a/paddle/phi/core/kernel_factory.cc +++ b/paddle/phi/core/kernel_factory.cc @@ -15,7 +15,6 @@ #include "paddle/phi/core/kernel_factory.h" #include "glog/logging.h" - #include "paddle/phi/core/enforce.h" namespace phi { diff --git a/paddle/phi/core/kernel_registry.h b/paddle/phi/core/kernel_registry.h index 41e1e2b53a9..65f655d5037 100644 --- a/paddle/phi/core/kernel_registry.h +++ b/paddle/phi/core/kernel_registry.h @@ -22,13 +22,12 @@ #include #include "paddle/phi/core/custom_kernel.h" +#include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_factory.h" #include "paddle/phi/core/kernel_utils.h" #include "paddle/phi/core/macros.h" #include "paddle/phi/core/type_defs.h" -#include "paddle/phi/core/enforce.h" - namespace phi { #define BACKEND(arg__) phi::Backend::arg__ @@ -58,16 +57,13 @@ struct KernelArgsParseFunctor { for (auto arg_type : args_type) { if (arg_type == std::type_index(typeid(const CPUContext&)) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) - || - arg_type == std::type_index(typeid(const GPUContext&))) { + || arg_type == std::type_index(typeid(const GPUContext&))) { #elif defined(PADDLE_WITH_XPU) - || - arg_type == std::type_index(typeid(const XPUContext&))) { + || arg_type == std::type_index(typeid(const XPUContext&))) { #elif defined(PADDLE_WITH_CUSTOM_DEVICE) - || - arg_type == std::type_index(typeid(const CustomContext&))) { + || arg_type == std::type_index(typeid(const CustomContext&))) { #else - ) { + ) { #endif // do nothing, skip context arg now } else if (arg_type == std::type_index(typeid(const DenseTensor&))) { @@ -420,93 +416,93 @@ struct KernelRegistrar { PD_CONCATENATE(_PD_KERNEL_INSTANTIATION_, N) \ (meta_kernel_fn, backend, context, __VA_ARGS__) -#define _PD_KERNEL_INSTANTIATION_1( \ - meta_kernel_fn, backend, context, cpp_dtype) \ - template decltype( \ - meta_kernel_fn) meta_kernel_fn -#define _PD_KERNEL_INSTANTIATION_2( \ - meta_kernel_fn, backend, context, cpp_dtype, ...) \ - template decltype( \ - meta_kernel_fn) meta_kernel_fn; \ - PD_EXPAND(_PD_KERNEL_INSTANTIATION_1( \ +#define _PD_KERNEL_INSTANTIATION_1( \ + meta_kernel_fn, backend, context, cpp_dtype) \ + template decltype(meta_kernel_fn) \ + meta_kernel_fn +#define _PD_KERNEL_INSTANTIATION_2( \ + meta_kernel_fn, backend, context, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) \ + meta_kernel_fn; \ + PD_EXPAND(_PD_KERNEL_INSTANTIATION_1( \ meta_kernel_fn, backend, context, __VA_ARGS__)) -#define _PD_KERNEL_INSTANTIATION_3( \ - meta_kernel_fn, backend, context, cpp_dtype, ...) \ - template decltype( \ - meta_kernel_fn) meta_kernel_fn; \ - PD_EXPAND(_PD_KERNEL_INSTANTIATION_2( \ +#define _PD_KERNEL_INSTANTIATION_3( \ + meta_kernel_fn, backend, context, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) \ + meta_kernel_fn; \ + PD_EXPAND(_PD_KERNEL_INSTANTIATION_2( \ meta_kernel_fn, backend, context, __VA_ARGS__)) -#define _PD_KERNEL_INSTANTIATION_4( \ - meta_kernel_fn, backend, context, cpp_dtype, ...) \ - template decltype( \ - meta_kernel_fn) meta_kernel_fn; \ - PD_EXPAND(_PD_KERNEL_INSTANTIATION_3( \ +#define _PD_KERNEL_INSTANTIATION_4( \ + meta_kernel_fn, backend, context, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) \ + meta_kernel_fn; \ + PD_EXPAND(_PD_KERNEL_INSTANTIATION_3( \ meta_kernel_fn, backend, context, __VA_ARGS__)) -#define _PD_KERNEL_INSTANTIATION_5( \ - meta_kernel_fn, backend, context, cpp_dtype, ...) \ - template decltype( \ - meta_kernel_fn) meta_kernel_fn; \ - PD_EXPAND(_PD_KERNEL_INSTANTIATION_4( \ +#define _PD_KERNEL_INSTANTIATION_5( \ + meta_kernel_fn, backend, context, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) \ + meta_kernel_fn; \ + PD_EXPAND(_PD_KERNEL_INSTANTIATION_4( \ meta_kernel_fn, backend, context, __VA_ARGS__)) -#define _PD_KERNEL_INSTANTIATION_6( \ - meta_kernel_fn, backend, context, cpp_dtype, ...) \ - template decltype( \ - meta_kernel_fn) meta_kernel_fn; \ - PD_EXPAND(_PD_KERNEL_INSTANTIATION_5( \ +#define _PD_KERNEL_INSTANTIATION_6( \ + meta_kernel_fn, backend, context, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) \ + meta_kernel_fn; \ + PD_EXPAND(_PD_KERNEL_INSTANTIATION_5( \ meta_kernel_fn, backend, context, __VA_ARGS__)) -#define _PD_KERNEL_INSTANTIATION_7( \ - meta_kernel_fn, backend, context, cpp_dtype, ...) \ - template decltype( \ - meta_kernel_fn) meta_kernel_fn; \ - PD_EXPAND(_PD_KERNEL_INSTANTIATION_6( \ +#define _PD_KERNEL_INSTANTIATION_7( \ + meta_kernel_fn, backend, context, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) \ + meta_kernel_fn; \ + PD_EXPAND(_PD_KERNEL_INSTANTIATION_6( \ meta_kernel_fn, backend, context, __VA_ARGS__)) -#define _PD_KERNEL_INSTANTIATION_8( \ - meta_kernel_fn, backend, context, cpp_dtype, ...) \ - template decltype( \ - meta_kernel_fn) meta_kernel_fn; \ - PD_EXPAND(_PD_KERNEL_INSTANTIATION_7( \ +#define _PD_KERNEL_INSTANTIATION_8( \ + meta_kernel_fn, backend, context, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) \ + meta_kernel_fn; \ + PD_EXPAND(_PD_KERNEL_INSTANTIATION_7( \ meta_kernel_fn, backend, context, __VA_ARGS__)) -#define _PD_KERNEL_INSTANTIATION_9( \ - meta_kernel_fn, backend, context, cpp_dtype, ...) \ - template decltype( \ - meta_kernel_fn) meta_kernel_fn; \ - PD_EXPAND(_PD_KERNEL_INSTANTIATION_8( \ +#define _PD_KERNEL_INSTANTIATION_9( \ + meta_kernel_fn, backend, context, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) \ + meta_kernel_fn; \ + PD_EXPAND(_PD_KERNEL_INSTANTIATION_8( \ meta_kernel_fn, backend, context, __VA_ARGS__)) -#define _PD_KERNEL_INSTANTIATION_10( \ - meta_kernel_fn, backend, context, cpp_dtype, ...) \ - template decltype( \ - meta_kernel_fn) meta_kernel_fn; \ - PD_EXPAND(_PD_KERNEL_INSTANTIATION_9( \ +#define _PD_KERNEL_INSTANTIATION_10( \ + meta_kernel_fn, backend, context, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) \ + meta_kernel_fn; \ + PD_EXPAND(_PD_KERNEL_INSTANTIATION_9( \ meta_kernel_fn, backend, context, __VA_ARGS__)) -#define _PD_KERNEL_INSTANTIATION_11( \ - meta_kernel_fn, backend, context, cpp_dtype, ...) \ - template decltype( \ - meta_kernel_fn) meta_kernel_fn; \ - PD_EXPAND(_PD_KERNEL_INSTANTIATION_10( \ +#define _PD_KERNEL_INSTANTIATION_11( \ + meta_kernel_fn, backend, context, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) \ + meta_kernel_fn; \ + PD_EXPAND(_PD_KERNEL_INSTANTIATION_10( \ meta_kernel_fn, backend, context, __VA_ARGS__)) -#define _PD_KERNEL_INSTANTIATION_12( \ - meta_kernel_fn, backend, context, cpp_dtype, ...) \ - template decltype( \ - meta_kernel_fn) meta_kernel_fn; \ - PD_EXPAND(_PD_KERNEL_INSTANTIATION_11( \ +#define _PD_KERNEL_INSTANTIATION_12( \ + meta_kernel_fn, backend, context, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) \ + meta_kernel_fn; \ + PD_EXPAND(_PD_KERNEL_INSTANTIATION_11( \ meta_kernel_fn, backend, context, __VA_ARGS__)) -#define _PD_KERNEL_INSTANTIATION_13( \ - meta_kernel_fn, backend, context, cpp_dtype, ...) \ - template decltype( \ - meta_kernel_fn) meta_kernel_fn; \ - PD_EXPAND(_PD_KERNEL_INSTANTIATION_12( \ +#define _PD_KERNEL_INSTANTIATION_13( \ + meta_kernel_fn, backend, context, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) \ + meta_kernel_fn; \ + PD_EXPAND(_PD_KERNEL_INSTANTIATION_12( \ meta_kernel_fn, backend, context, __VA_ARGS__)) -#define _PD_KERNEL_INSTANTIATION_14( \ - meta_kernel_fn, backend, context, cpp_dtype, ...) \ - template decltype( \ - meta_kernel_fn) meta_kernel_fn; \ - PD_EXPAND(_PD_KERNEL_INSTANTIATION_13( \ +#define _PD_KERNEL_INSTANTIATION_14( \ + meta_kernel_fn, backend, context, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) \ + meta_kernel_fn; \ + PD_EXPAND(_PD_KERNEL_INSTANTIATION_13( \ meta_kernel_fn, backend, context, __VA_ARGS__)) -#define _PD_KERNEL_INSTANTIATION_15( \ - meta_kernel_fn, backend, context, cpp_dtype, ...) \ - template decltype( \ - meta_kernel_fn) meta_kernel_fn; \ - PD_EXPAND(_PD_KERNEL_INSTANTIATION_14( \ +#define _PD_KERNEL_INSTANTIATION_15( \ + meta_kernel_fn, backend, context, cpp_dtype, ...) \ + template decltype(meta_kernel_fn) \ + meta_kernel_fn; \ + PD_EXPAND(_PD_KERNEL_INSTANTIATION_14( \ meta_kernel_fn, backend, context, __VA_ARGS__)) #define PD_KERNEL_REGISTRAR_INIT(reg_type, \ @@ -569,8 +565,8 @@ struct KernelRegistrar { #backend, \ DATALAYOUT(layout), \ ::paddle::experimental::CppTypeToDataType::Type(), \ - ::phi::KernelArgsParseFunctor)>::Parse, \ + ::phi::KernelArgsParseFunctor< \ + decltype(&meta_kernel_fn)>::Parse, \ args_def_fn, \ PHI_KERNEL(meta_kernel_fn), \ PHI_VARIADIC_KERNEL(meta_kernel_fn)); \ @@ -592,8 +588,8 @@ struct KernelRegistrar { #backend, \ DATALAYOUT(layout), \ ::paddle::experimental::CppTypeToDataType::Type(), \ - ::phi::KernelArgsParseFunctor)>::Parse, \ + ::phi::KernelArgsParseFunctor< \ + decltype(&meta_kernel_fn)>::Parse, \ args_def_fn, \ PHI_KERNEL(meta_kernel_fn), \ PHI_VARIADIC_KERNEL(meta_kernel_fn)); \ @@ -623,8 +619,8 @@ struct KernelRegistrar { #backend, \ DATALAYOUT(layout), \ ::paddle::experimental::CppTypeToDataType::Type(), \ - ::phi::KernelArgsParseFunctor)>::Parse, \ + ::phi::KernelArgsParseFunctor< \ + decltype(&meta_kernel_fn)>::Parse, \ args_def_fn, \ PHI_KERNEL(meta_kernel_fn), \ PHI_VARIADIC_KERNEL(meta_kernel_fn)); \ @@ -654,8 +650,8 @@ struct KernelRegistrar { #backend, \ DATALAYOUT(layout), \ ::paddle::experimental::CppTypeToDataType::Type(), \ - ::phi::KernelArgsParseFunctor)>::Parse, \ + ::phi::KernelArgsParseFunctor< \ + decltype(&meta_kernel_fn)>::Parse, \ args_def_fn, \ PHI_KERNEL(meta_kernel_fn), \ PHI_VARIADIC_KERNEL(meta_kernel_fn)); \ @@ -685,8 +681,8 @@ struct KernelRegistrar { #backend, \ DATALAYOUT(layout), \ ::paddle::experimental::CppTypeToDataType::Type(), \ - ::phi::KernelArgsParseFunctor)>::Parse, \ + ::phi::KernelArgsParseFunctor< \ + decltype(&meta_kernel_fn)>::Parse, \ args_def_fn, \ PHI_KERNEL(meta_kernel_fn), \ PHI_VARIADIC_KERNEL(meta_kernel_fn)); \ @@ -716,8 +712,8 @@ struct KernelRegistrar { #backend, \ DATALAYOUT(layout), \ ::paddle::experimental::CppTypeToDataType::Type(), \ - ::phi::KernelArgsParseFunctor)>::Parse, \ + ::phi::KernelArgsParseFunctor< \ + decltype(&meta_kernel_fn)>::Parse, \ args_def_fn, \ PHI_KERNEL(meta_kernel_fn), \ PHI_VARIADIC_KERNEL(meta_kernel_fn)); \ @@ -747,8 +743,8 @@ struct KernelRegistrar { #backend, \ DATALAYOUT(layout), \ ::paddle::experimental::CppTypeToDataType::Type(), \ - ::phi::KernelArgsParseFunctor)>::Parse, \ + ::phi::KernelArgsParseFunctor< \ + decltype(&meta_kernel_fn)>::Parse, \ args_def_fn, \ PHI_KERNEL(meta_kernel_fn), \ PHI_VARIADIC_KERNEL(meta_kernel_fn)); \ @@ -778,8 +774,8 @@ struct KernelRegistrar { #backend, \ DATALAYOUT(layout), \ ::paddle::experimental::CppTypeToDataType::Type(), \ - ::phi::KernelArgsParseFunctor)>::Parse, \ + ::phi::KernelArgsParseFunctor< \ + decltype(&meta_kernel_fn)>::Parse, \ args_def_fn, \ PHI_KERNEL(meta_kernel_fn), \ PHI_VARIADIC_KERNEL(meta_kernel_fn)); \ @@ -809,8 +805,8 @@ struct KernelRegistrar { #backend, \ DATALAYOUT(layout), \ ::paddle::experimental::CppTypeToDataType::Type(), \ - ::phi::KernelArgsParseFunctor)>::Parse, \ + ::phi::KernelArgsParseFunctor< \ + decltype(&meta_kernel_fn)>::Parse, \ args_def_fn, \ PHI_KERNEL(meta_kernel_fn), \ PHI_VARIADIC_KERNEL(meta_kernel_fn)); \ @@ -840,8 +836,8 @@ struct KernelRegistrar { #backend, \ DATALAYOUT(layout), \ ::paddle::experimental::CppTypeToDataType::Type(), \ - ::phi::KernelArgsParseFunctor)>::Parse, \ + ::phi::KernelArgsParseFunctor< \ + decltype(&meta_kernel_fn)>::Parse, \ args_def_fn, \ PHI_KERNEL(meta_kernel_fn), \ PHI_VARIADIC_KERNEL(meta_kernel_fn)); \ @@ -871,8 +867,8 @@ struct KernelRegistrar { #backend, \ DATALAYOUT(layout), \ ::paddle::experimental::CppTypeToDataType::Type(), \ - ::phi::KernelArgsParseFunctor)>::Parse, \ + ::phi::KernelArgsParseFunctor< \ + decltype(&meta_kernel_fn)>::Parse, \ args_def_fn, \ PHI_KERNEL(meta_kernel_fn), \ PHI_VARIADIC_KERNEL(meta_kernel_fn)); \ @@ -902,8 +898,8 @@ struct KernelRegistrar { #backend, \ DATALAYOUT(layout), \ ::paddle::experimental::CppTypeToDataType::Type(), \ - ::phi::KernelArgsParseFunctor)>::Parse, \ + ::phi::KernelArgsParseFunctor< \ + decltype(&meta_kernel_fn)>::Parse, \ args_def_fn, \ PHI_KERNEL(meta_kernel_fn), \ PHI_VARIADIC_KERNEL(meta_kernel_fn)); \ @@ -933,8 +929,8 @@ struct KernelRegistrar { #backend, \ DATALAYOUT(layout), \ ::paddle::experimental::CppTypeToDataType::Type(), \ - ::phi::KernelArgsParseFunctor)>::Parse, \ + ::phi::KernelArgsParseFunctor< \ + decltype(&meta_kernel_fn)>::Parse, \ args_def_fn, \ PHI_KERNEL(meta_kernel_fn), \ PHI_VARIADIC_KERNEL(meta_kernel_fn)); \ @@ -964,8 +960,8 @@ struct KernelRegistrar { #backend, \ DATALAYOUT(layout), \ ::paddle::experimental::CppTypeToDataType::Type(), \ - ::phi::KernelArgsParseFunctor)>::Parse, \ + ::phi::KernelArgsParseFunctor< \ + decltype(&meta_kernel_fn)>::Parse, \ args_def_fn, \ PHI_KERNEL(meta_kernel_fn), \ PHI_VARIADIC_KERNEL(meta_kernel_fn)); \ @@ -995,8 +991,8 @@ struct KernelRegistrar { #backend, \ DATALAYOUT(layout), \ ::paddle::experimental::CppTypeToDataType::Type(), \ - ::phi::KernelArgsParseFunctor)>::Parse, \ + ::phi::KernelArgsParseFunctor< \ + decltype(&meta_kernel_fn)>::Parse, \ args_def_fn, \ PHI_KERNEL(meta_kernel_fn), \ PHI_VARIADIC_KERNEL(meta_kernel_fn)); \ diff --git a/paddle/phi/core/kernel_utils.h b/paddle/phi/core/kernel_utils.h index d4765d1c4c3..3b5fd0247a4 100644 --- a/paddle/phi/core/kernel_utils.h +++ b/paddle/phi/core/kernel_utils.h @@ -233,9 +233,8 @@ template struct KernelImpl { static void Compute(KernelContext* ctx) { - KernelCallHelper>::template Compute<0, 0, 0, 0>(ctx); + KernelCallHelper>:: + template Compute<0, 0, 0, 0>(ctx); } static void VariadicCompute(const DeviceContext& dev_ctx, Args... args) { diff --git a/paddle/phi/core/meta_tensor.h b/paddle/phi/core/meta_tensor.h index d277f32d8ea..27175916186 100644 --- a/paddle/phi/core/meta_tensor.h +++ b/paddle/phi/core/meta_tensor.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include "glog/logging.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/core/ddim.h" @@ -21,8 +22,6 @@ limitations under the License. */ #include "paddle/phi/core/tensor_base.h" #include "paddle/phi/core/tensor_meta.h" -#include "glog/logging.h" - namespace phi { // TODO(chenweihang): add other flags if needed diff --git a/paddle/phi/core/string_tensor.cc b/paddle/phi/core/string_tensor.cc index 0a4e0d61915..20cbf3dffcb 100644 --- a/paddle/phi/core/string_tensor.cc +++ b/paddle/phi/core/string_tensor.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/core/string_tensor.h" + #include "paddle/fluid/memory/malloc.h" namespace phi { diff --git a/paddle/phi/core/tensor_base.cc b/paddle/phi/core/tensor_base.cc index 1b3628906af..718bf09ff7e 100644 --- a/paddle/phi/core/tensor_base.cc +++ b/paddle/phi/core/tensor_base.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/core/tensor_base.h" + #include "paddle/phi/core/utils/type_registry.h" namespace phi {} diff --git a/paddle/phi/core/utils/intrusive_ptr.h b/paddle/phi/core/utils/intrusive_ptr.h index 2b758019253..e2e6cb7060d 100644 --- a/paddle/phi/core/utils/intrusive_ptr.h +++ b/paddle/phi/core/utils/intrusive_ptr.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "glog/logging.h" #include "paddle/phi/core/enforce.h" diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc index a8d5ad564fe..f10fc54795d 100644 --- a/paddle/phi/infermeta/binary.cc +++ b/paddle/phi/infermeta/binary.cc @@ -16,6 +16,7 @@ limitations under the License. */ #include #include + #include "paddle/phi/common/data_type.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/core/ddim.h" diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index 63f0d0c1eeb..61c57981f94 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -13,7 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/infermeta/multiary.h" + #include + #include "paddle/phi/common/layout.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/infermeta_utils.h" diff --git a/paddle/phi/infermeta/ternary.cc b/paddle/phi/infermeta/ternary.cc index 3c2888cee58..d84cc9e6d75 100644 --- a/paddle/phi/infermeta/ternary.cc +++ b/paddle/phi/infermeta/ternary.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/infermeta/ternary.h" + #include "paddle/phi/core/ddim.h" #include "paddle/phi/kernels/funcs/common_shape.h" diff --git a/paddle/phi/kernels/assign_kernel.cc b/paddle/phi/kernels/assign_kernel.cc index 2349bf990ac..3d8e4db08bb 100644 --- a/paddle/phi/kernels/assign_kernel.cc +++ b/paddle/phi/kernels/assign_kernel.cc @@ -14,12 +14,11 @@ #include "paddle/phi/kernels/assign_kernel.h" +#include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/copy_kernel.h" #include "paddle/utils/optional.h" -#include "paddle/fluid/framework/tensor_util.h" - namespace phi { template diff --git a/paddle/phi/kernels/auc_kernel.h b/paddle/phi/kernels/auc_kernel.h index acbd17c7801..f58c3ce112b 100644 --- a/paddle/phi/kernels/auc_kernel.h +++ b/paddle/phi/kernels/auc_kernel.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/dense_tensor.h" diff --git a/paddle/phi/kernels/autotune/auto_tune_base.h b/paddle/phi/kernels/autotune/auto_tune_base.h index eaf325dad75..e18b854cf34 100644 --- a/paddle/phi/kernels/autotune/auto_tune_base.h +++ b/paddle/phi/kernels/autotune/auto_tune_base.h @@ -15,6 +15,7 @@ #pragma once #include + #include "glog/logging.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/kernels/autotune/gpu_timer.h" diff --git a/paddle/phi/kernels/autotune/auto_tune_test.cu b/paddle/phi/kernels/autotune/auto_tune_test.cu index f477cd12193..c3918b8ebe5 100644 --- a/paddle/phi/kernels/autotune/auto_tune_test.cu +++ b/paddle/phi/kernels/autotune/auto_tune_test.cu @@ -13,6 +13,7 @@ // limitations under the License. #include + #include "glog/logging.h" #include "paddle/phi/api/lib/utils/allocator.h" #include "paddle/phi/backends/all_context.h" @@ -66,8 +67,8 @@ float Algo(const phi::GPUContext& ctx, N); #else VLOG(3) << "Vecsize is " << Vecsize; - VecSumTest<<>>( - d_in_data, d_out_data, N); + VecSumTest + <<>>(d_in_data, d_out_data, N); #endif return Vecsize; } diff --git a/paddle/phi/kernels/autotune/cache.cc b/paddle/phi/kernels/autotune/cache.cc index ef2cbe633d4..5e2c9e1c742 100644 --- a/paddle/phi/kernels/autotune/cache.cc +++ b/paddle/phi/kernels/autotune/cache.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/phi/kernels/autotune/cache.h" + #include + #include "glog/logging.h" namespace phi { diff --git a/paddle/phi/kernels/autotune/cache.h b/paddle/phi/kernels/autotune/cache.h index 37c5d134e8a..9d7f57e96e3 100644 --- a/paddle/phi/kernels/autotune/cache.h +++ b/paddle/phi/kernels/autotune/cache.h @@ -19,6 +19,7 @@ #include #include #include + #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/errors.h" diff --git a/paddle/phi/kernels/autotune/cache_test.cc b/paddle/phi/kernels/autotune/cache_test.cc index f99f8bfc8b8..53574c3d0c9 100644 --- a/paddle/phi/kernels/autotune/cache_test.cc +++ b/paddle/phi/kernels/autotune/cache_test.cc @@ -13,9 +13,12 @@ // limitations under the License. #include "paddle/phi/kernels/autotune/cache.h" + #include + #include #include + #include "glog/logging.h" enum ConvAlgos { GEMMKernel = 0, CuDNNKernel_1 = 1, CuDNNKernel_2 = 2 }; diff --git a/paddle/phi/kernels/autotune/gpu_timer_test.cu b/paddle/phi/kernels/autotune/gpu_timer_test.cu index b6eb345885f..d24508dfa20 100644 --- a/paddle/phi/kernels/autotune/gpu_timer_test.cu +++ b/paddle/phi/kernels/autotune/gpu_timer_test.cu @@ -13,7 +13,9 @@ // limitations under the License. #include + #include + #include "glog/logging.h" #include "paddle/phi/kernels/autotune/gpu_timer.h" #include "paddle/phi/kernels/funcs/aligned_vector.h" diff --git a/paddle/phi/kernels/autotune/switch_autotune.h b/paddle/phi/kernels/autotune/switch_autotune.h index 1793940542d..de638ac4eda 100644 --- a/paddle/phi/kernels/autotune/switch_autotune.h +++ b/paddle/phi/kernels/autotune/switch_autotune.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/phi/kernels/autotune/cache.h" namespace phi { diff --git a/paddle/phi/kernels/batch_norm_grad_kernel.h b/paddle/phi/kernels/batch_norm_grad_kernel.h index 3de2f69f452..afbb0c78ca9 100644 --- a/paddle/phi/kernels/batch_norm_grad_kernel.h +++ b/paddle/phi/kernels/batch_norm_grad_kernel.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/phi/core/dense_tensor.h" namespace phi { diff --git a/paddle/phi/kernels/broadcast_tensors_grad_kernel.h b/paddle/phi/kernels/broadcast_tensors_grad_kernel.h index 5d24f6684a4..79d5b8a445b 100644 --- a/paddle/phi/kernels/broadcast_tensors_grad_kernel.h +++ b/paddle/phi/kernels/broadcast_tensors_grad_kernel.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/phi/core/dense_tensor.h" namespace phi { diff --git a/paddle/phi/kernels/broadcast_tensors_kernel.h b/paddle/phi/kernels/broadcast_tensors_kernel.h index 22b5201b690..dccaebcf41f 100644 --- a/paddle/phi/kernels/broadcast_tensors_kernel.h +++ b/paddle/phi/kernels/broadcast_tensors_kernel.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/phi/core/dense_tensor.h" namespace phi { diff --git a/paddle/phi/kernels/channel_shuffle_grad_kernel.h b/paddle/phi/kernels/channel_shuffle_grad_kernel.h index ac89f3336bc..d75d887d0fc 100644 --- a/paddle/phi/kernels/channel_shuffle_grad_kernel.h +++ b/paddle/phi/kernels/channel_shuffle_grad_kernel.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/phi/core/dense_tensor.h" namespace phi { diff --git a/paddle/phi/kernels/channel_shuffle_kernel.h b/paddle/phi/kernels/channel_shuffle_kernel.h index 12de25606dd..c15e06fb552 100644 --- a/paddle/phi/kernels/channel_shuffle_kernel.h +++ b/paddle/phi/kernels/channel_shuffle_kernel.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/phi/core/dense_tensor.h" namespace phi { diff --git a/paddle/phi/kernels/conv_kernel.cc b/paddle/phi/kernels/conv_kernel.cc index 7268384f401..542a4ec8a61 100644 --- a/paddle/phi/kernels/conv_kernel.cc +++ b/paddle/phi/kernels/conv_kernel.cc @@ -14,9 +14,8 @@ #include "paddle/phi/kernels/conv_kernel.h" -#include "paddle/phi/core/kernel_registry.h" - #include "paddle/fluid/platform/cudnn_workspace_helper.h" +#include "paddle/phi/core/kernel_registry.h" namespace phi { @@ -41,8 +40,8 @@ void ConvInferKernel(const Context& dev_ctx, dilations, data_format, /*use_addto=*/false, - /*workspace_size_MB=*/paddle::platform:: - GetDefaultConvWorkspaceSizeLimitMB(), + /*workspace_size_MB=*/ + paddle::platform::GetDefaultConvWorkspaceSizeLimitMB(), /*exhaustive_search=*/false, out); } diff --git a/paddle/phi/kernels/conv_transpose_grad_kernel.h b/paddle/phi/kernels/conv_transpose_grad_kernel.h index 2b1c0c1a934..00d5fb51f01 100644 --- a/paddle/phi/kernels/conv_transpose_grad_kernel.h +++ b/paddle/phi/kernels/conv_transpose_grad_kernel.h @@ -16,6 +16,7 @@ #include #include + #include "paddle/phi/core/dense_tensor.h" namespace phi { diff --git a/paddle/phi/kernels/conv_transpose_kernel.h b/paddle/phi/kernels/conv_transpose_kernel.h index de56f13ddf7..e39617e0e7c 100644 --- a/paddle/phi/kernels/conv_transpose_kernel.h +++ b/paddle/phi/kernels/conv_transpose_kernel.h @@ -16,6 +16,7 @@ #include #include + #include "paddle/phi/core/dense_tensor.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/abs_kernel.cc b/paddle/phi/kernels/cpu/abs_kernel.cc index 9f89fc27a71..a10e0eed64a 100644 --- a/paddle/phi/kernels/cpu/abs_kernel.cc +++ b/paddle/phi/kernels/cpu/abs_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/abs_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/complex.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/accuracy_kernel.cc b/paddle/phi/kernels/cpu/accuracy_kernel.cc index 6ff8a1f7558..17246de35db 100644 --- a/paddle/phi/kernels/cpu/accuracy_kernel.cc +++ b/paddle/phi/kernels/cpu/accuracy_kernel.cc @@ -15,6 +15,7 @@ #include "paddle/phi/kernels/accuracy_kernel.h" #include + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/activation_kernel.cc b/paddle/phi/kernels/cpu/activation_kernel.cc index 165627839a3..bd3e16d54dc 100644 --- a/paddle/phi/kernels/cpu/activation_kernel.cc +++ b/paddle/phi/kernels/cpu/activation_kernel.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/kernels/activation_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/activation_functor.h" diff --git a/paddle/phi/kernels/cpu/adagrad_kernel.cc b/paddle/phi/kernels/cpu/adagrad_kernel.cc index fcd89caf7fa..d6867deff4c 100644 --- a/paddle/phi/kernels/cpu/adagrad_kernel.cc +++ b/paddle/phi/kernels/cpu/adagrad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/adagrad_kernel.h" + #include "paddle/fluid/operators/math/selected_rows_functor.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/allclose_kernel.cc b/paddle/phi/kernels/cpu/allclose_kernel.cc index f95ddc5621e..c6a512aa95c 100644 --- a/paddle/phi/kernels/cpu/allclose_kernel.cc +++ b/paddle/phi/kernels/cpu/allclose_kernel.cc @@ -15,6 +15,7 @@ #include "paddle/phi/kernels/allclose_kernel.h" #include + #include "glog/logging.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/arange_kernel.cc b/paddle/phi/kernels/cpu/arange_kernel.cc index 478251b0d3b..7f7e5554231 100644 --- a/paddle/phi/kernels/cpu/arange_kernel.cc +++ b/paddle/phi/kernels/cpu/arange_kernel.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/kernels/arange_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/range_function.h" diff --git a/paddle/phi/kernels/cpu/atan2_grad_kernel.cc b/paddle/phi/kernels/cpu/atan2_grad_kernel.cc index 7a519aab0ad..3bc8c853a7b 100644 --- a/paddle/phi/kernels/cpu/atan2_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/atan2_grad_kernel.cc @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/impl/atan2_grad_kernel_impl.h" - #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/atan2_grad_kernel_impl.h" PD_REGISTER_KERNEL(atan2_grad, CPU, diff --git a/paddle/phi/kernels/cpu/atan2_kernel.cc b/paddle/phi/kernels/cpu/atan2_kernel.cc index df6f5f59ac0..4cb96ad8b6c 100644 --- a/paddle/phi/kernels/cpu/atan2_kernel.cc +++ b/paddle/phi/kernels/cpu/atan2_kernel.cc @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/impl/atan2_kernel_impl.h" - #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/atan2_kernel_impl.h" PD_REGISTER_KERNEL(atan2, CPU, diff --git a/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc b/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc index 366a08e59fe..beda276c8ef 100644 --- a/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/batch_norm_grad_kernel.cc @@ -12,12 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/batch_norm_kernel.h" #include "paddle/phi/kernels/funcs/eigen/common.h" - -#include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/gpu/batch_norm_utils.h" diff --git a/paddle/phi/kernels/cpu/batch_norm_kernel.cc b/paddle/phi/kernels/cpu/batch_norm_kernel.cc index 743128e8dea..cb8af06b540 100644 --- a/paddle/phi/kernels/cpu/batch_norm_kernel.cc +++ b/paddle/phi/kernels/cpu/batch_norm_kernel.cc @@ -13,12 +13,12 @@ // limitations under the License. #include "paddle/phi/kernels/batch_norm_kernel.h" + +#include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/eigen/common.h" -#include "paddle/fluid/framework/tensor_util.h" - namespace phi { template diff --git a/paddle/phi/kernels/cpu/bce_loss_grad_kernel.cc b/paddle/phi/kernels/cpu/bce_loss_grad_kernel.cc index 6859451e8be..fc91af3ff71 100644 --- a/paddle/phi/kernels/cpu/bce_loss_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/bce_loss_grad_kernel.cc @@ -15,6 +15,7 @@ #include "paddle/phi/kernels/bce_loss_grad_kernel.h" #include // for max + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/bce_loss_kernel.cc b/paddle/phi/kernels/cpu/bce_loss_kernel.cc index 76b97936514..9d62fabcbe7 100644 --- a/paddle/phi/kernels/cpu/bce_loss_kernel.cc +++ b/paddle/phi/kernels/cpu/bce_loss_kernel.cc @@ -15,6 +15,7 @@ #include "paddle/phi/kernels/bce_loss_kernel.h" #include // for max + #include "paddle/fluid/operators/math.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/bernoulli_kernel.cc b/paddle/phi/kernels/cpu/bernoulli_kernel.cc index 09c07d9ec9d..6bf548154a4 100644 --- a/paddle/phi/kernels/cpu/bernoulli_kernel.cc +++ b/paddle/phi/kernels/cpu/bernoulli_kernel.cc @@ -13,7 +13,9 @@ // limitations under the License. #include "paddle/phi/kernels/bernoulli_kernel.h" + #include + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/bilinear_tensor_product_grad_kernel.cc b/paddle/phi/kernels/cpu/bilinear_tensor_product_grad_kernel.cc index 2268212316a..ef7e8a981c5 100644 --- a/paddle/phi/kernels/cpu/bilinear_tensor_product_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/bilinear_tensor_product_grad_kernel.cc @@ -13,9 +13,9 @@ // limitations under the License. #include "paddle/phi/kernels/bilinear_tensor_product_grad_kernel.h" -#include "paddle/phi/kernels/impl/bilinear_tensor_product_grad_kernel_impl.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/bilinear_tensor_product_grad_kernel_impl.h" PD_REGISTER_KERNEL(bilinear_tensor_product_grad, CPU, diff --git a/paddle/phi/kernels/cpu/bilinear_tensor_product_kernel.cc b/paddle/phi/kernels/cpu/bilinear_tensor_product_kernel.cc index 25bc5913865..d8226564182 100644 --- a/paddle/phi/kernels/cpu/bilinear_tensor_product_kernel.cc +++ b/paddle/phi/kernels/cpu/bilinear_tensor_product_kernel.cc @@ -13,9 +13,9 @@ // limitations under the License. #include "paddle/phi/kernels/bilinear_tensor_product_kernel.h" -#include "paddle/phi/kernels/impl/bilinear_tensor_product_kernel_impl.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/bilinear_tensor_product_kernel_impl.h" PD_REGISTER_KERNEL(bilinear_tensor_product, CPU, diff --git a/paddle/phi/kernels/cpu/broadcast_tensors_grad_kernel.cc b/paddle/phi/kernels/cpu/broadcast_tensors_grad_kernel.cc index 0869cd62024..413638e1772 100644 --- a/paddle/phi/kernels/cpu/broadcast_tensors_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/broadcast_tensors_grad_kernel.cc @@ -15,6 +15,7 @@ #include "paddle/phi/kernels/broadcast_tensors_grad_kernel.h" #include + #include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/dense_tensor.h" diff --git a/paddle/phi/kernels/cpu/broadcast_tensors_kernel.cc b/paddle/phi/kernels/cpu/broadcast_tensors_kernel.cc index 4cb6db87692..3ad26164d7d 100644 --- a/paddle/phi/kernels/cpu/broadcast_tensors_kernel.cc +++ b/paddle/phi/kernels/cpu/broadcast_tensors_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/broadcast_tensors_kernel.h" -#include "paddle/phi/kernels/impl/broadcast_tensors_kernel_impl.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/broadcast_tensors_kernel_impl.h" PD_REGISTER_KERNEL(broadcast_tensors, CPU, diff --git a/paddle/phi/kernels/cpu/cast_kernel.cc b/paddle/phi/kernels/cpu/cast_kernel.cc index 2132f0d5ae8..8abfa173fd0 100644 --- a/paddle/phi/kernels/cpu/cast_kernel.cc +++ b/paddle/phi/kernels/cpu/cast_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/cast_kernel.h" -#include "paddle/phi/kernels/cpu/cast_impl.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/visit_type.h" +#include "paddle/phi/kernels/cpu/cast_impl.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/channel_shuffle_grad_kernel.cc b/paddle/phi/kernels/cpu/channel_shuffle_grad_kernel.cc index fcc91b21916..e95b454dbf9 100644 --- a/paddle/phi/kernels/cpu/channel_shuffle_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/channel_shuffle_grad_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/channel_shuffle_grad_kernel.h" -#include "paddle/phi/kernels/impl/channel_shuffle_grad_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/channel_shuffle_grad_kernel_impl.h" PD_REGISTER_KERNEL(channel_shuffle_grad, CPU, diff --git a/paddle/phi/kernels/cpu/channel_shuffle_kernel.cc b/paddle/phi/kernels/cpu/channel_shuffle_kernel.cc index 95d19ec6a77..0bac82e779c 100644 --- a/paddle/phi/kernels/cpu/channel_shuffle_kernel.cc +++ b/paddle/phi/kernels/cpu/channel_shuffle_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/channel_shuffle_kernel.h" -#include "paddle/phi/kernels/impl/channel_shuffle_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/channel_shuffle_kernel_impl.h" PD_REGISTER_KERNEL(channel_shuffle, CPU, diff --git a/paddle/phi/kernels/cpu/cholesky_solve_grad_kernel.cc b/paddle/phi/kernels/cpu/cholesky_solve_grad_kernel.cc index b6f5dd29ba2..612d10994cb 100644 --- a/paddle/phi/kernels/cpu/cholesky_solve_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/cholesky_solve_grad_kernel.cc @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/impl/cholesky_solve_grad_kernel_impl.h" - #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/cholesky_solve_grad_kernel_impl.h" PD_REGISTER_KERNEL(cholesky_solve_grad, CPU, diff --git a/paddle/phi/kernels/cpu/cholesky_solve_kernel.cc b/paddle/phi/kernels/cpu/cholesky_solve_kernel.cc index 02597560a7f..11cb66f88c1 100644 --- a/paddle/phi/kernels/cpu/cholesky_solve_kernel.cc +++ b/paddle/phi/kernels/cpu/cholesky_solve_kernel.cc @@ -12,11 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/impl/cholesky_solve_kernel_impl.h" - #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/lapack/lapack_function.h" +#include "paddle/phi/kernels/impl/cholesky_solve_kernel_impl.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/clip_grad_kernel.cc b/paddle/phi/kernels/cpu/clip_grad_kernel.cc index bccdc0746d5..89a14af10d1 100644 --- a/paddle/phi/kernels/cpu/clip_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/clip_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/clip_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/clip_grad_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/clip_kernel.cc b/paddle/phi/kernels/cpu/clip_kernel.cc index 5fd9aea966f..bcbb8527927 100644 --- a/paddle/phi/kernels/cpu/clip_kernel.cc +++ b/paddle/phi/kernels/cpu/clip_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/clip_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/clip_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/compare_kernel.cc b/paddle/phi/kernels/cpu/compare_kernel.cc index 9006325a521..694b44c16d8 100644 --- a/paddle/phi/kernels/cpu/compare_kernel.cc +++ b/paddle/phi/kernels/cpu/compare_kernel.cc @@ -13,11 +13,11 @@ // limitations under the License. #include "paddle/phi/kernels/compare_kernel.h" -#include "paddle/phi/kernels/impl/compare_kernel_impl.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/elementwise_base.h" +#include "paddle/phi/kernels/impl/compare_kernel_impl.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/complex_grad_kernel.cc b/paddle/phi/kernels/cpu/complex_grad_kernel.cc index 5c1d50f5bf2..11b7a058346 100644 --- a/paddle/phi/kernels/cpu/complex_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/complex_grad_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/complex_grad_kernel.h" -#include "paddle/phi/kernels/impl/complex_grad_kernel_impl.h" #include "paddle/phi/common/complex.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/complex_grad_kernel_impl.h" PD_REGISTER_KERNEL(real_grad, CPU, diff --git a/paddle/phi/kernels/cpu/complex_kernel.cc b/paddle/phi/kernels/cpu/complex_kernel.cc index 859d5a84527..bef0b7b747a 100644 --- a/paddle/phi/kernels/cpu/complex_kernel.cc +++ b/paddle/phi/kernels/cpu/complex_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/complex_kernel.h" -#include "paddle/phi/kernels/impl/complex_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/complex_kernel_impl.h" // See Note [ Why still include the fluid headers? ] #include "paddle/phi/common/complex.h" diff --git a/paddle/phi/kernels/cpu/conv_grad_grad_kernel.cc b/paddle/phi/kernels/cpu/conv_grad_grad_kernel.cc index 4538ccf9433..3289c8f5c84 100644 --- a/paddle/phi/kernels/cpu/conv_grad_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/conv_grad_grad_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/conv_grad_grad_kernel.h" -#include "paddle/phi/kernels/impl/conv_grad_grad_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/conv_grad_grad_kernel_impl.h" namespace phi { template diff --git a/paddle/phi/kernels/cpu/conv_grad_kernel.cc b/paddle/phi/kernels/cpu/conv_grad_kernel.cc index 2d8a9bf1de7..880837dd7cd 100644 --- a/paddle/phi/kernels/cpu/conv_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/conv_grad_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/conv_grad_kernel.h" -#include "paddle/phi/kernels/impl/conv_grad_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/conv_grad_kernel_impl.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/conv_kernel.cc b/paddle/phi/kernels/cpu/conv_kernel.cc index e0b4ee7d577..ec325319493 100644 --- a/paddle/phi/kernels/cpu/conv_kernel.cc +++ b/paddle/phi/kernels/cpu/conv_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/conv_kernel.h" -#include "paddle/phi/kernels/impl/conv_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/conv_kernel_impl.h" namespace phi { template diff --git a/paddle/phi/kernels/cpu/conv_transpose_grad_kernel.cc b/paddle/phi/kernels/cpu/conv_transpose_grad_kernel.cc index 8d074950069..17fe44dea3f 100644 --- a/paddle/phi/kernels/cpu/conv_transpose_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/conv_transpose_grad_kernel.cc @@ -13,9 +13,9 @@ // limitations under the License. #include "paddle/phi/kernels/conv_transpose_grad_kernel.h" -#include "paddle/phi/kernels/impl/conv_transpose_grad_kernel_impl.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/conv_transpose_grad_kernel_impl.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/conv_transpose_kernel.cc b/paddle/phi/kernels/cpu/conv_transpose_kernel.cc index b4cacc85093..ad9a5933f28 100644 --- a/paddle/phi/kernels/cpu/conv_transpose_kernel.cc +++ b/paddle/phi/kernels/cpu/conv_transpose_kernel.cc @@ -13,9 +13,9 @@ // limitations under the License. #include "paddle/phi/kernels/conv_transpose_kernel.h" -#include "paddle/phi/kernels/impl/conv_transpose_kernel_impl.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/conv_transpose_kernel_impl.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/cross_entropy_kernel.cc b/paddle/phi/kernels/cpu/cross_entropy_kernel.cc index c684fb416ea..bd3eb3eb754 100644 --- a/paddle/phi/kernels/cpu/cross_entropy_kernel.cc +++ b/paddle/phi/kernels/cpu/cross_entropy_kernel.cc @@ -14,6 +14,7 @@ limitations under the License. */ #include "paddle/phi/kernels/cross_entropy_kernel.h" +#include "paddle/fluid/operators/math/cross_entropy.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/copy_kernel.h" @@ -21,8 +22,6 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/softmax_kernel.h" -#include "paddle/fluid/operators/math/cross_entropy.h" - namespace phi { template diff --git a/paddle/phi/kernels/cpu/cross_grad_kernel.cc b/paddle/phi/kernels/cpu/cross_grad_kernel.cc index 390420008e6..8dddc6f6e4e 100644 --- a/paddle/phi/kernels/cpu/cross_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/cross_grad_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/cross_grad_kernel.h" -#include "paddle/phi/kernels/impl/cross_grad_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/cross_grad_kernel_impl.h" PD_REGISTER_KERNEL(cross_grad, CPU, diff --git a/paddle/phi/kernels/cpu/cross_kernel.cc b/paddle/phi/kernels/cpu/cross_kernel.cc index a63f33174ea..1f3a8fe5a38 100644 --- a/paddle/phi/kernels/cpu/cross_kernel.cc +++ b/paddle/phi/kernels/cpu/cross_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/cross_kernel.h" -#include "paddle/phi/kernels/impl/cross_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/cross_kernel_impl.h" PD_REGISTER_KERNEL( cross, CPU, ALL_LAYOUT, phi::CrossKernel, float, double, int, int64_t) {} diff --git a/paddle/phi/kernels/cpu/cumprod_kernel.cc b/paddle/phi/kernels/cpu/cumprod_kernel.cc index aea338027f5..4ecf0929184 100644 --- a/paddle/phi/kernels/cpu/cumprod_kernel.cc +++ b/paddle/phi/kernels/cpu/cumprod_kernel.cc @@ -16,6 +16,7 @@ #include #include + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/complex_functors.h" diff --git a/paddle/phi/kernels/cpu/deformable_conv_grad_kernel.cc b/paddle/phi/kernels/cpu/deformable_conv_grad_kernel.cc index f64b1d3291f..a4d43ef8fbe 100644 --- a/paddle/phi/kernels/cpu/deformable_conv_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/deformable_conv_grad_kernel.cc @@ -58,10 +58,9 @@ inline void ModulatedDeformableCol2imCPUKernel( int w_in = w_out * stride_w - pad_w; int h_in = h_out * stride_h - pad_h; - const T* data_offset_ptr = data_offset + - (b * deformable_group + deformable_group_index) * - 2 * kernel_h * kernel_w * height_col * - width_col; + const T* data_offset_ptr = + data_offset + (b * deformable_group + deformable_group_index) * 2 * + kernel_h * kernel_w * height_col * width_col; const int data_offset_h_ptr = ((2 * (i * kernel_w + j)) * height_col + h_out) * width_col + w_out; const int data_offset_w_ptr = @@ -75,9 +74,9 @@ inline void ModulatedDeformableCol2imCPUKernel( T cur_top_grad = data_col[thread]; if (data_mask) { - const T* data_mask_ptr = data_mask + - (b * deformable_group + deformable_group_index) * - kernel_h * kernel_w * height_col * width_col; + const T* data_mask_ptr = + data_mask + (b * deformable_group + deformable_group_index) * + kernel_h * kernel_w * height_col * width_col; const T mask = data_mask_ptr[data_mask_hw_ptr]; cur_top_grad *= mask; } @@ -180,23 +179,20 @@ void ModulatedDeformableCol2imCoordCPUKernel( const int deformable_group_index = c / (2 * kernel_h * kernel_w); const int col_step = kernel_h * kernel_w; int cnt = 0; - const T* data_col_ptr = data_col + - deformable_group_index * - channel_per_deformable_group * batch_size * - width_col * height_col; - const T* data_im_ptr = data_im + - (b * deformable_group + deformable_group_index) * - channel_per_deformable_group / kernel_h / - kernel_w * height * width; - const T* data_offset_ptr = data_offset + - (b * deformable_group + deformable_group_index) * - 2 * kernel_h * kernel_w * height_col * - width_col; + const T* data_col_ptr = data_col + deformable_group_index * + channel_per_deformable_group * + batch_size * width_col * height_col; + const T* data_im_ptr = + data_im + (b * deformable_group + deformable_group_index) * + channel_per_deformable_group / kernel_h / kernel_w * + height * width; + const T* data_offset_ptr = + data_offset + (b * deformable_group + deformable_group_index) * 2 * + kernel_h * kernel_w * height_col * width_col; const T* data_mask_ptr = data_mask - ? data_mask + - (b * deformable_group + deformable_group_index) * kernel_h * - kernel_w * height_col * width_col + ? data_mask + (b * deformable_group + deformable_group_index) * + kernel_h * kernel_w * height_col * width_col : nullptr; const int offset_c = c - deformable_group_index * 2 * kernel_h * kernel_w; diff --git a/paddle/phi/kernels/cpu/diag_grad_kernel.cc b/paddle/phi/kernels/cpu/diag_grad_kernel.cc index c56b225e2a7..616ea753ef1 100644 --- a/paddle/phi/kernels/cpu/diag_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/diag_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/diag_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/diag_functor.h" diff --git a/paddle/phi/kernels/cpu/diagonal_grad_kernel.cc b/paddle/phi/kernels/cpu/diagonal_grad_kernel.cc index c3c290b4fe9..5671e70c96e 100644 --- a/paddle/phi/kernels/cpu/diagonal_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/diagonal_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/diagonal_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/diagonal.h" diff --git a/paddle/phi/kernels/cpu/diagonal_kernel.cc b/paddle/phi/kernels/cpu/diagonal_kernel.cc index df17b458e11..8ea5826ba25 100644 --- a/paddle/phi/kernels/cpu/diagonal_kernel.cc +++ b/paddle/phi/kernels/cpu/diagonal_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/diagonal_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/diagonal.h" diff --git a/paddle/phi/kernels/cpu/digamma_grad_kernel.cc b/paddle/phi/kernels/cpu/digamma_grad_kernel.cc index da1b5ae5566..dc7fcaf6f92 100644 --- a/paddle/phi/kernels/cpu/digamma_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/digamma_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/digamma_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/dense_tensor.h" diff --git a/paddle/phi/kernels/cpu/digamma_kernel.cc b/paddle/phi/kernels/cpu/digamma_kernel.cc index ee120a29b60..80cbda4b7a9 100644 --- a/paddle/phi/kernels/cpu/digamma_kernel.cc +++ b/paddle/phi/kernels/cpu/digamma_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/digamma_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/dense_tensor.h" diff --git a/paddle/phi/kernels/cpu/dist_grad_kernel.cc b/paddle/phi/kernels/cpu/dist_grad_kernel.cc index 2b7f8f98f94..c1aaa2adf75 100644 --- a/paddle/phi/kernels/cpu/dist_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/dist_grad_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/dist_grad_kernel.h" -#include "paddle/phi/kernels/impl/dist_grad_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/dist_grad_kernel_impl.h" PD_REGISTER_KERNEL( dist_grad, CPU, ALL_LAYOUT, phi::DistGradKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/dist_kernel.cc b/paddle/phi/kernels/cpu/dist_kernel.cc index ccf3d4be832..0c7b5db64b3 100644 --- a/paddle/phi/kernels/cpu/dist_kernel.cc +++ b/paddle/phi/kernels/cpu/dist_kernel.cc @@ -13,9 +13,9 @@ // limitations under the License. #include "paddle/phi/kernels/dist_kernel.h" -#include "paddle/phi/kernels/impl/dist_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/dist_kernel_impl.h" PD_REGISTER_KERNEL(dist, CPU, ALL_LAYOUT, phi::DistKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/dot_grad_kernel.cc b/paddle/phi/kernels/cpu/dot_grad_kernel.cc index a2abdb7c009..883b7780221 100644 --- a/paddle/phi/kernels/cpu/dot_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/dot_grad_kernel.cc @@ -13,12 +13,11 @@ // limitations under the License. #include "paddle/phi/kernels/dot_grad_kernel.h" -#include "paddle/phi/kernels/impl/dot_grad_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/core/kernel_registry.h" - #include "paddle/phi/common/complex.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/dot_grad_kernel_impl.h" PD_REGISTER_KERNEL(dot_grad, CPU, diff --git a/paddle/phi/kernels/cpu/dropout_grad_kernel.cc b/paddle/phi/kernels/cpu/dropout_grad_kernel.cc index b77a6c55b14..db956564218 100644 --- a/paddle/phi/kernels/cpu/dropout_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/dropout_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/dropout_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/eigen/common.h" diff --git a/paddle/phi/kernels/cpu/dropout_kernel.cc b/paddle/phi/kernels/cpu/dropout_kernel.cc index fa12e505e42..d9c02eff010 100644 --- a/paddle/phi/kernels/cpu/dropout_kernel.cc +++ b/paddle/phi/kernels/cpu/dropout_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/dropout_kernel.h" + #include "paddle/fluid/framework/generator.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/eigh_grad_kernel.cc b/paddle/phi/kernels/cpu/eigh_grad_kernel.cc index 5135778db56..db533416d27 100644 --- a/paddle/phi/kernels/cpu/eigh_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/eigh_grad_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/eigh_grad_kernel.h" -#include "paddle/phi/kernels/impl/eigh_grad_kernel_impl.h" #include "paddle/phi/common/complex.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/eigh_grad_kernel_impl.h" PD_REGISTER_KERNEL(eigh_grad, CPU, diff --git a/paddle/phi/kernels/cpu/eigh_kernel.cc b/paddle/phi/kernels/cpu/eigh_kernel.cc index 92fd20ca9b8..0f0a10c8377 100644 --- a/paddle/phi/kernels/cpu/eigh_kernel.cc +++ b/paddle/phi/kernels/cpu/eigh_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/eigh_kernel.h" -#include "paddle/phi/kernels/funcs/values_vectors_functor.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/complex_functors.h" +#include "paddle/phi/kernels/funcs/values_vectors_functor.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/einsum_kernel.cc b/paddle/phi/kernels/cpu/einsum_kernel.cc index 8968542b3e0..401d2fd158a 100644 --- a/paddle/phi/kernels/cpu/einsum_kernel.cc +++ b/paddle/phi/kernels/cpu/einsum_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/einsum_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/einsum_impl.h" diff --git a/paddle/phi/kernels/cpu/elementwise.h b/paddle/phi/kernels/cpu/elementwise.h index 0f67df66113..255dae7da01 100644 --- a/paddle/phi/kernels/cpu/elementwise.h +++ b/paddle/phi/kernels/cpu/elementwise.h @@ -16,10 +16,9 @@ limitations under the License. */ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/broadcast_function.h" #include "paddle/phi/kernels/funcs/common_shape.h" - -#include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/eigen/common.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/elementwise_add_kernel.cc b/paddle/phi/kernels/cpu/elementwise_add_kernel.cc index 5019b9f5706..b5e28ab39e5 100644 --- a/paddle/phi/kernels/cpu/elementwise_add_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_add_kernel.cc @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/cpu/elementwise.h" #include "paddle/phi/api/ext/dispatch.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/complex.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/cpu/elementwise.h" #include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/elementwise_divide_kernel.cc b/paddle/phi/kernels/cpu/elementwise_divide_kernel.cc index d380621818b..15fe92c9291 100644 --- a/paddle/phi/kernels/cpu/elementwise_divide_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_divide_kernel.cc @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/cpu/elementwise.h" #include "paddle/phi/api/ext/dispatch.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/complex.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/cpu/elementwise.h" #include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/elementwise_kernel.cc b/paddle/phi/kernels/cpu/elementwise_kernel.cc index 286b0d0ffaa..f090ddd5bbe 100644 --- a/paddle/phi/kernels/cpu/elementwise_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_kernel.cc @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/cpu/elementwise.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/complex.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/cpu/elementwise.h" #include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/elementwise_multiply_kernel.cc b/paddle/phi/kernels/cpu/elementwise_multiply_kernel.cc index 2424a533010..34915037384 100644 --- a/paddle/phi/kernels/cpu/elementwise_multiply_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_multiply_kernel.cc @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/cpu/elementwise.h" #include "paddle/phi/api/ext/dispatch.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/complex.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/cpu/elementwise.h" #include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/elementwise_subtract_kernel.cc b/paddle/phi/kernels/cpu/elementwise_subtract_kernel.cc index 0e97852ac33..a013309233d 100644 --- a/paddle/phi/kernels/cpu/elementwise_subtract_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_subtract_kernel.cc @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/cpu/elementwise.h" #include "paddle/phi/api/ext/dispatch.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/common/complex.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/cpu/elementwise.h" #include "paddle/phi/kernels/impl/elementwise_kernel_impl.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/embedding_grad_kernel.cc b/paddle/phi/kernels/cpu/embedding_grad_kernel.cc index 21b3e6da8d9..fabb4e83d52 100644 --- a/paddle/phi/kernels/cpu/embedding_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/embedding_grad_kernel.cc @@ -13,11 +13,11 @@ // limitations under the License. #include "paddle/phi/kernels/embedding_grad_kernel.h" -#include "paddle/phi/kernels/funcs/embedding_util.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/embedding_util.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/embedding_kernel.cc b/paddle/phi/kernels/cpu/embedding_kernel.cc index 76cc3814b05..0430f7a0052 100644 --- a/paddle/phi/kernels/cpu/embedding_kernel.cc +++ b/paddle/phi/kernels/cpu/embedding_kernel.cc @@ -13,12 +13,12 @@ // limitations under the License. #include "paddle/phi/kernels/embedding_kernel.h" -#include "paddle/phi/kernels/funcs/embedding_util.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" +#include "paddle/phi/kernels/funcs/embedding_util.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/erf_grad_kernel.cc b/paddle/phi/kernels/cpu/erf_grad_kernel.cc index 3c1cd0df153..ae0b218bc0b 100644 --- a/paddle/phi/kernels/cpu/erf_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/erf_grad_kernel.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/kernels/erf_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/erf_kernel.cc b/paddle/phi/kernels/cpu/erf_kernel.cc index 05ce4cab7fc..ace9775c0b8 100644 --- a/paddle/phi/kernels/cpu/erf_kernel.cc +++ b/paddle/phi/kernels/cpu/erf_kernel.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/kernels/erf_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/erfinv_grad_kernel.cc b/paddle/phi/kernels/cpu/erfinv_grad_kernel.cc index b1fe4f026ab..2d363189936 100644 --- a/paddle/phi/kernels/cpu/erfinv_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/erfinv_grad_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/erfinv_grad_kernel.h" -#include "paddle/phi/kernels/impl/erfinv_grad_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/erfinv_grad_kernel_impl.h" PD_REGISTER_KERNEL( erfinv_grad, CPU, ALL_LAYOUT, phi::ErfinvGradKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/erfinv_kernel.cc b/paddle/phi/kernels/cpu/erfinv_kernel.cc index 4f3a740f9d9..f298cc358d6 100644 --- a/paddle/phi/kernels/cpu/erfinv_kernel.cc +++ b/paddle/phi/kernels/cpu/erfinv_kernel.cc @@ -13,9 +13,9 @@ // limitations under the License. #include "paddle/phi/kernels/erfinv_kernel.h" -#include "paddle/phi/kernels/impl/erfinv_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/erfinv_kernel_impl.h" PD_REGISTER_KERNEL(erfinv, CPU, ALL_LAYOUT, phi::ErfinvKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/expand_as_grad_kernel.cc b/paddle/phi/kernels/cpu/expand_as_grad_kernel.cc index 6eafe9aa49d..c57e3a87281 100644 --- a/paddle/phi/kernels/cpu/expand_as_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/expand_as_grad_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/expand_as_grad_kernel.h" -#include "paddle/phi/kernels/impl/expand_as_grad_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/expand_as_grad_kernel_impl.h" PD_REGISTER_KERNEL(expand_as_grad, CPU, diff --git a/paddle/phi/kernels/cpu/expand_as_kernel.cc b/paddle/phi/kernels/cpu/expand_as_kernel.cc index 697ea138097..4ec28ef8413 100644 --- a/paddle/phi/kernels/cpu/expand_as_kernel.cc +++ b/paddle/phi/kernels/cpu/expand_as_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/expand_as_kernel.h" -#include "paddle/phi/kernels/impl/expand_as_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/expand_as_kernel_impl.h" PD_REGISTER_KERNEL(expand_as, CPU, diff --git a/paddle/phi/kernels/cpu/expand_grad_kernel.cc b/paddle/phi/kernels/cpu/expand_grad_kernel.cc index 4799a6aa7af..5cbbf253b74 100644 --- a/paddle/phi/kernels/cpu/expand_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/expand_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/expand_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/dense_tensor.h" diff --git a/paddle/phi/kernels/cpu/expand_kernel.cc b/paddle/phi/kernels/cpu/expand_kernel.cc index 07704897672..2df833d0f9c 100644 --- a/paddle/phi/kernels/cpu/expand_kernel.cc +++ b/paddle/phi/kernels/cpu/expand_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/expand_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/dense_tensor.h" diff --git a/paddle/phi/kernels/cpu/eye_kernel.cc b/paddle/phi/kernels/cpu/eye_kernel.cc index a0d0f2c4390..ef3489d3fae 100644 --- a/paddle/phi/kernels/cpu/eye_kernel.cc +++ b/paddle/phi/kernels/cpu/eye_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/eye_kernel.h" -#include "paddle/phi/kernels/impl/eye_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/eye_kernel_impl.h" PD_REGISTER_KERNEL(eye, CPU, diff --git a/paddle/phi/kernels/cpu/frobenius_norm_grad_kernel.cc b/paddle/phi/kernels/cpu/frobenius_norm_grad_kernel.cc index 338be9e252d..5434296be4d 100644 --- a/paddle/phi/kernels/cpu/frobenius_norm_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/frobenius_norm_grad_kernel.cc @@ -13,9 +13,9 @@ // limitations under the License. #include "paddle/phi/kernels/frobenius_norm_grad_kernel.h" -#include "paddle/phi/kernels/impl/frobenius_norm_grad_kernel_impl.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/frobenius_norm_grad_kernel_impl.h" PD_REGISTER_KERNEL(frobenius_norm_grad, CPU, diff --git a/paddle/phi/kernels/cpu/frobenius_norm_kernel.cc b/paddle/phi/kernels/cpu/frobenius_norm_kernel.cc index 77509b953bf..56444ddad8d 100644 --- a/paddle/phi/kernels/cpu/frobenius_norm_kernel.cc +++ b/paddle/phi/kernels/cpu/frobenius_norm_kernel.cc @@ -13,9 +13,9 @@ // limitations under the License. #include "paddle/phi/kernels/frobenius_norm_kernel.h" -#include "paddle/phi/kernels/impl/frobenius_norm_kernel_impl.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/frobenius_norm_kernel_impl.h" PD_REGISTER_KERNEL( frobenius_norm, CPU, ALL_LAYOUT, phi::FrobeniusNormKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/full_kernel.cc b/paddle/phi/kernels/cpu/full_kernel.cc index 0b76425a659..ceb2312b53a 100644 --- a/paddle/phi/kernels/cpu/full_kernel.cc +++ b/paddle/phi/kernels/cpu/full_kernel.cc @@ -16,7 +16,6 @@ limitations under the License. */ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" - #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" diff --git a/paddle/phi/kernels/cpu/gather_nd_grad_kernel.cc b/paddle/phi/kernels/cpu/gather_nd_grad_kernel.cc index b375a7ec469..88a288afd31 100644 --- a/paddle/phi/kernels/cpu/gather_nd_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/gather_nd_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/gather_nd_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" diff --git a/paddle/phi/kernels/cpu/gather_nd_kernel.cc b/paddle/phi/kernels/cpu/gather_nd_kernel.cc index aa32d036934..8ae866a1c8a 100644 --- a/paddle/phi/kernels/cpu/gather_nd_kernel.cc +++ b/paddle/phi/kernels/cpu/gather_nd_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/gather_nd_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/gather.h" diff --git a/paddle/phi/kernels/cpu/gather_tree_kernel.cc b/paddle/phi/kernels/cpu/gather_tree_kernel.cc index 25fb870d851..6f3cac6c4aa 100644 --- a/paddle/phi/kernels/cpu/gather_tree_kernel.cc +++ b/paddle/phi/kernels/cpu/gather_tree_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/gather_tree_kernel.h" + #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/gaussian_random_kernel.cc b/paddle/phi/kernels/cpu/gaussian_random_kernel.cc index 348d24b534e..c600149cbba 100644 --- a/paddle/phi/kernels/cpu/gaussian_random_kernel.cc +++ b/paddle/phi/kernels/cpu/gaussian_random_kernel.cc @@ -14,11 +14,10 @@ #include "paddle/phi/kernels/gaussian_random_kernel.h" +#include "paddle/fluid/framework/generator.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/fluid/framework/generator.h" - namespace phi { template diff --git a/paddle/phi/kernels/cpu/gelu_kernel.cc b/paddle/phi/kernels/cpu/gelu_kernel.cc index d7af2205745..4d23470aa4e 100644 --- a/paddle/phi/kernels/cpu/gelu_kernel.cc +++ b/paddle/phi/kernels/cpu/gelu_kernel.cc @@ -13,8 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/gelu_kernel.h" + #include #include + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/blas/blas.h" diff --git a/paddle/phi/kernels/cpu/graph_reindex_kernel.cc b/paddle/phi/kernels/cpu/graph_reindex_kernel.cc index 92f2dc41e65..428bcb03170 100644 --- a/paddle/phi/kernels/cpu/graph_reindex_kernel.cc +++ b/paddle/phi/kernels/cpu/graph_reindex_kernel.cc @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/phi/kernels/graph_reindex_kernel.h" + #include #include -#include "paddle/phi/kernels/graph_reindex_kernel.h" - #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/graph_sample_neighbors_kernel.cc b/paddle/phi/kernels/cpu/graph_sample_neighbors_kernel.cc index 70aac053417..1ef5373d631 100644 --- a/paddle/phi/kernels/cpu/graph_sample_neighbors_kernel.cc +++ b/paddle/phi/kernels/cpu/graph_sample_neighbors_kernel.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include - #include "paddle/phi/kernels/graph_sample_neighbors_kernel.h" +#include + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/graph_send_recv_grad_kernel.cc b/paddle/phi/kernels/cpu/graph_send_recv_grad_kernel.cc index 6ea65d005c1..ad04bd258e1 100644 --- a/paddle/phi/kernels/cpu/graph_send_recv_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/graph_send_recv_grad_kernel.cc @@ -13,12 +13,12 @@ // limitations under the License. #include "paddle/phi/kernels/graph_send_recv_grad_kernel.h" -#include "paddle/phi/kernels/cpu/graph_send_recv_funcs.h" #include #include #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/cpu/graph_send_recv_funcs.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/graph_send_recv_kernel.cc b/paddle/phi/kernels/cpu/graph_send_recv_kernel.cc index 8f71ba12cc4..e4034230c78 100644 --- a/paddle/phi/kernels/cpu/graph_send_recv_kernel.cc +++ b/paddle/phi/kernels/cpu/graph_send_recv_kernel.cc @@ -13,7 +13,6 @@ // limitations under the License. #include "paddle/phi/kernels/graph_send_recv_kernel.h" -#include "paddle/phi/kernels/cpu/graph_send_recv_funcs.h" #include #include @@ -22,6 +21,7 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/hostdevice.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/cpu/graph_send_recv_funcs.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/grid_sample_grad_kernel.cc b/paddle/phi/kernels/cpu/grid_sample_grad_kernel.cc index 923cb842411..32fa0d5aafe 100644 --- a/paddle/phi/kernels/cpu/grid_sample_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/grid_sample_grad_kernel.cc @@ -73,8 +73,9 @@ static inline void ClipWithMask(const CPUContext& ctx, .cwiseMin(static_cast(max_val)); auto in_bound = (clipped == reflected).template cast(); grid_scale_t.device(place) = - grid_scale_t * ((is_neg == one_more_flip).template cast() - - (is_neg != one_more_flip).template cast()) * + grid_scale_t * + ((is_neg == one_more_flip).template cast() - + (is_neg != one_more_flip).template cast()) * in_bound; grid_slice_t.device(place) = clipped; } diff --git a/paddle/phi/kernels/cpu/gumbel_softmax_grad_kernel.cc b/paddle/phi/kernels/cpu/gumbel_softmax_grad_kernel.cc index a4c131e72b5..832df98e0f3 100644 --- a/paddle/phi/kernels/cpu/gumbel_softmax_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/gumbel_softmax_grad_kernel.cc @@ -13,9 +13,9 @@ // limitations under the License. #include "paddle/phi/kernels/gumbel_softmax_grad_kernel.h" -#include "paddle/phi/kernels/impl/gumbel_softmax_grad_kernel_impl.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/gumbel_softmax_grad_kernel_impl.h" PD_REGISTER_KERNEL(gumbel_softmax_grad, CPU, diff --git a/paddle/phi/kernels/cpu/gumbel_softmax_kernel.cc b/paddle/phi/kernels/cpu/gumbel_softmax_kernel.cc index eb406665c5f..7638ca3aa7e 100644 --- a/paddle/phi/kernels/cpu/gumbel_softmax_kernel.cc +++ b/paddle/phi/kernels/cpu/gumbel_softmax_kernel.cc @@ -13,11 +13,11 @@ // limitations under the License. #include "paddle/phi/kernels/gumbel_softmax_kernel.h" -#include "paddle/phi/kernels/impl/gumbel_softmax_kernel_impl.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/axis_utils.h" #include "paddle/phi/kernels/funcs/math_function.h" +#include "paddle/phi/kernels/impl/gumbel_softmax_kernel_impl.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/histogram_kernel.cc b/paddle/phi/kernels/cpu/histogram_kernel.cc index 82b88f868d8..d9c41508efd 100644 --- a/paddle/phi/kernels/cpu/histogram_kernel.cc +++ b/paddle/phi/kernels/cpu/histogram_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/histogram_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/cpu/huber_loss_grad_kernel.cc b/paddle/phi/kernels/cpu/huber_loss_grad_kernel.cc index 654f2c9400a..b52a587070a 100644 --- a/paddle/phi/kernels/cpu/huber_loss_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/huber_loss_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/huber_loss_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/huber_loss_grad_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/huber_loss_kernel.cc b/paddle/phi/kernels/cpu/huber_loss_kernel.cc index 702c0589057..2c4d8941ab8 100644 --- a/paddle/phi/kernels/cpu/huber_loss_kernel.cc +++ b/paddle/phi/kernels/cpu/huber_loss_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/huber_loss_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/huber_loss_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/index_sample_grad_kernel.cc b/paddle/phi/kernels/cpu/index_sample_grad_kernel.cc index d060e8c9b28..fe8ca4e432e 100644 --- a/paddle/phi/kernels/cpu/index_sample_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/index_sample_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/index_sample_grad_kernel.h" + #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/backends/cpu/cpu_context.h" diff --git a/paddle/phi/kernels/cpu/index_sample_kernel.cc b/paddle/phi/kernels/cpu/index_sample_kernel.cc index b895e4aa7c0..faa6953704e 100644 --- a/paddle/phi/kernels/cpu/index_sample_kernel.cc +++ b/paddle/phi/kernels/cpu/index_sample_kernel.cc @@ -13,12 +13,14 @@ // limitations under the License. #include "paddle/phi/kernels/index_sample_kernel.h" + #include #include #include #include #include #include + #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/tensor_util.h" #include "paddle/phi/backends/cpu/cpu_context.h" diff --git a/paddle/phi/kernels/cpu/instance_norm_grad_kernel.cc b/paddle/phi/kernels/cpu/instance_norm_grad_kernel.cc index 867d43fd833..45ef0034109 100644 --- a/paddle/phi/kernels/cpu/instance_norm_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/instance_norm_grad_kernel.cc @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" @@ -142,12 +143,11 @@ void InstanceNormGradKernel(const Context& dev_ctx, dx_arr.device(*place) = scale_arr.broadcast(bcast_param) * inv_var_arr.broadcast(bcast) * (dy_arr - dy_mean - - tmp * - (dy_arr * tmp) - .mean(mean_rdims) - .reshape(NxC_shape) - .eval() - .broadcast(bcast)); + tmp * (dy_arr * tmp) + .mean(mean_rdims) + .reshape(NxC_shape) + .eval() + .broadcast(bcast)); } template diff --git a/paddle/phi/kernels/cpu/instance_norm_kernel.cc b/paddle/phi/kernels/cpu/instance_norm_kernel.cc index 5eac473effa..4deced5499e 100644 --- a/paddle/phi/kernels/cpu/instance_norm_kernel.cc +++ b/paddle/phi/kernels/cpu/instance_norm_kernel.cc @@ -17,6 +17,7 @@ #include #include #include + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/interpolate_grad_kernel.cc b/paddle/phi/kernels/cpu/interpolate_grad_kernel.cc index d4e13aa3b24..edd41b2c7a3 100644 --- a/paddle/phi/kernels/cpu/interpolate_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/interpolate_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/interpolate_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/isclose_kernel.cc b/paddle/phi/kernels/cpu/isclose_kernel.cc index 633c6ba093e..dca21494b3e 100644 --- a/paddle/phi/kernels/cpu/isclose_kernel.cc +++ b/paddle/phi/kernels/cpu/isclose_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/isclose_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/isclose_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/kldiv_loss_grad_kernel.cc b/paddle/phi/kernels/cpu/kldiv_loss_grad_kernel.cc index f9399d38d71..9f6e2573e33 100644 --- a/paddle/phi/kernels/cpu/kldiv_loss_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/kldiv_loss_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/kldiv_loss_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/kldiv_loss_grad_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/kldiv_loss_kernel.cc b/paddle/phi/kernels/cpu/kldiv_loss_kernel.cc index c462b8ec32c..ecb1915cf42 100644 --- a/paddle/phi/kernels/cpu/kldiv_loss_kernel.cc +++ b/paddle/phi/kernels/cpu/kldiv_loss_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/kldiv_loss_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/kldiv_loss_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/label_smooth_grad_kernel.cc b/paddle/phi/kernels/cpu/label_smooth_grad_kernel.cc index 74664fb270b..1a900b4bc2a 100644 --- a/paddle/phi/kernels/cpu/label_smooth_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/label_smooth_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/label_smooth_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/eigen/common.h" diff --git a/paddle/phi/kernels/cpu/label_smooth_kernel.cc b/paddle/phi/kernels/cpu/label_smooth_kernel.cc index af9548e8186..cdeed73310d 100644 --- a/paddle/phi/kernels/cpu/label_smooth_kernel.cc +++ b/paddle/phi/kernels/cpu/label_smooth_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/label_smooth_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/eigen/common.h" diff --git a/paddle/phi/kernels/cpu/layer_norm_grad_kernel.cc b/paddle/phi/kernels/cpu/layer_norm_grad_kernel.cc index a30f54fd4b6..081a32b4f24 100644 --- a/paddle/phi/kernels/cpu/layer_norm_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/layer_norm_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/layer_norm_grad_kernel.h" + #include "paddle/phi/kernels/cpu/elementwise.h" #include "paddle/phi/kernels/funcs/layer_norm_util.h" #if !defined(PADDLE_WITH_CUDA) && !defined(_WIN32) && !defined(__APPLE__) && \ diff --git a/paddle/phi/kernels/cpu/layer_norm_kernel.cc b/paddle/phi/kernels/cpu/layer_norm_kernel.cc index 52722468e16..dbc3da0ca15 100644 --- a/paddle/phi/kernels/cpu/layer_norm_kernel.cc +++ b/paddle/phi/kernels/cpu/layer_norm_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/layer_norm_kernel.h" + #include "paddle/phi/kernels/cpu/elementwise.h" #include "paddle/phi/kernels/funcs/layer_norm_util.h" #if !defined(PADDLE_WITH_CUDA) && !defined(_WIN32) && !defined(__APPLE__) && \ diff --git a/paddle/phi/kernels/cpu/lerp_grad_kernel.cc b/paddle/phi/kernels/cpu/lerp_grad_kernel.cc index d74919011ec..ae98cb9d03a 100644 --- a/paddle/phi/kernels/cpu/lerp_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/lerp_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/lerp_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/lerp_grad_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/lerp_kernel.cc b/paddle/phi/kernels/cpu/lerp_kernel.cc index 7adfc35bfa3..d02e706d8d6 100644 --- a/paddle/phi/kernels/cpu/lerp_kernel.cc +++ b/paddle/phi/kernels/cpu/lerp_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/lerp_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/lerp_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/lgamma_grad_kernel.cc b/paddle/phi/kernels/cpu/lgamma_grad_kernel.cc index 116fa3f8d3f..a87c01214a9 100644 --- a/paddle/phi/kernels/cpu/lgamma_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/lgamma_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/lgamma_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/lgamma_grad_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/lgamma_kernel.cc b/paddle/phi/kernels/cpu/lgamma_kernel.cc index f849322174d..4979ad0b30b 100644 --- a/paddle/phi/kernels/cpu/lgamma_kernel.cc +++ b/paddle/phi/kernels/cpu/lgamma_kernel.cc @@ -15,6 +15,7 @@ #include "paddle/phi/kernels/lgamma_kernel.h" #include + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/for_range.h" diff --git a/paddle/phi/kernels/cpu/log_softmax_grad_kernel.cc b/paddle/phi/kernels/cpu/log_softmax_grad_kernel.cc index 5f344b9cc3f..d3e5e90fd17 100644 --- a/paddle/phi/kernels/cpu/log_softmax_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/log_softmax_grad_kernel.cc @@ -55,10 +55,9 @@ struct LogSoftmaxGradFunctor { Eigen::DSizes one_axis(1, axis_dim); dx.device(*context.eigen_device()) = - dy - - (y.exp()) * (dy.reshape(batch_axis_remain) - .sum(along_class) - .broadcast(one_axis)); + dy - (y.exp()) * (dy.reshape(batch_axis_remain) + .sum(along_class) + .broadcast(one_axis)); } }; diff --git a/paddle/phi/kernels/cpu/log_softmax_kernel.cc b/paddle/phi/kernels/cpu/log_softmax_kernel.cc index 241742378cc..510eb7a6ca9 100644 --- a/paddle/phi/kernels/cpu/log_softmax_kernel.cc +++ b/paddle/phi/kernels/cpu/log_softmax_kernel.cc @@ -72,34 +72,31 @@ struct LogSoftmaxFunctor { // axis == -1, axis and class in same dimension, calculate along // class dimension directly for higher performance log_softmax.device(*context.eigen_device()) = - (logits - - logits.maximum(along_axis) - .eval() - .reshape(batch_by_one) - .broadcast(one_by_class)) + (logits - logits.maximum(along_axis) + .eval() + .reshape(batch_by_one) + .broadcast(one_by_class)) .unaryExpr(ValueClip()); } else { // axis != -1, class dimension split into (axis, remain), max and sum // should be calculated along axis dimension log_softmax.device(*context.eigen_device()) = - (logits.reshape(batch_axis_remain) - - logits.reshape(batch_axis_remain) - .maximum(along_axis) - .eval() - .reshape(batch_one_remain) - .broadcast(one_axis_one) - .reshape(batch_classes)) + (logits.reshape(batch_axis_remain) - logits.reshape(batch_axis_remain) + .maximum(along_axis) + .eval() + .reshape(batch_one_remain) + .broadcast(one_axis_one) + .reshape(batch_classes)) .unaryExpr(ValueClip()); } log_softmax.device(*context.eigen_device()) = - log_softmax - - log_softmax.exp() - .eval() - .reshape(batch_axis_remain) - .sum(along_axis) - .log() - .broadcast(one_axis); + log_softmax - log_softmax.exp() + .eval() + .reshape(batch_axis_remain) + .sum(along_axis) + .log() + .broadcast(one_axis); } }; diff --git a/paddle/phi/kernels/cpu/logsumexp_kernel.cc b/paddle/phi/kernels/cpu/logsumexp_kernel.cc index 06e0b30a9ca..f1fecdfbe9e 100644 --- a/paddle/phi/kernels/cpu/logsumexp_kernel.cc +++ b/paddle/phi/kernels/cpu/logsumexp_kernel.cc @@ -16,7 +16,6 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" - #include "paddle/phi/kernels/impl/logsumexp_kernel_impl.h" PD_REGISTER_KERNEL( diff --git a/paddle/phi/kernels/cpu/matmul_grad_kernel.cc b/paddle/phi/kernels/cpu/matmul_grad_kernel.cc index aba519ff048..e3cd8fff8a5 100644 --- a/paddle/phi/kernels/cpu/matmul_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/matmul_grad_kernel.cc @@ -16,7 +16,6 @@ limitations under the License. */ #include "paddle/phi/common/complex.h" #include "paddle/phi/core/kernel_registry.h" - #include "paddle/phi/kernels/impl/matmul_grad_kernel_impl.h" PD_REGISTER_KERNEL(matmul_grad, diff --git a/paddle/phi/kernels/cpu/matmul_kernel.cc b/paddle/phi/kernels/cpu/matmul_kernel.cc index 8aa25c0da07..c75a50130db 100644 --- a/paddle/phi/kernels/cpu/matmul_kernel.cc +++ b/paddle/phi/kernels/cpu/matmul_kernel.cc @@ -15,9 +15,8 @@ limitations under the License. */ #include "paddle/phi/kernels/matmul_kernel.h" #include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/core/kernel_registry.h" - #include "paddle/phi/common/complex.h" +#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/matmul_kernel_impl.h" PD_REGISTER_KERNEL(matmul, diff --git a/paddle/phi/kernels/cpu/matrix_power_grad_kernel.cc b/paddle/phi/kernels/cpu/matrix_power_grad_kernel.cc index ae3b4d2b455..0f60f8da71a 100644 --- a/paddle/phi/kernels/cpu/matrix_power_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/matrix_power_grad_kernel.cc @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/kernels/matrix_power_grad_kernel.h" -#include "paddle/phi/kernels/impl/matrix_power_grad_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/matrix_power_grad_kernel_impl.h" PD_REGISTER_KERNEL(matrix_power_grad, CPU, diff --git a/paddle/phi/kernels/cpu/matrix_power_kernel.cc b/paddle/phi/kernels/cpu/matrix_power_kernel.cc index f40e1e616f5..08ee7cbc865 100644 --- a/paddle/phi/kernels/cpu/matrix_power_kernel.cc +++ b/paddle/phi/kernels/cpu/matrix_power_kernel.cc @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/kernels/matrix_power_kernel.h" -#include "paddle/phi/kernels/impl/matrix_power_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/matrix_power_kernel_impl.h" PD_REGISTER_KERNEL( matrix_power, CPU, ALL_LAYOUT, phi::MatrixPowerKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/matrix_rank_kernel.cc b/paddle/phi/kernels/cpu/matrix_rank_kernel.cc index 5e13abe8aed..f56bd3d6dbe 100644 --- a/paddle/phi/kernels/cpu/matrix_rank_kernel.cc +++ b/paddle/phi/kernels/cpu/matrix_rank_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/matrix_rank_kernel.h" -#include "paddle/phi/kernels/matrix_rank_tol_kernel.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/full_kernel.h" +#include "paddle/phi/kernels/matrix_rank_tol_kernel.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc b/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc index 3bfc07319e9..af9b7728389 100644 --- a/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc +++ b/paddle/phi/kernels/cpu/matrix_rank_tol_kernel.cc @@ -16,6 +16,7 @@ #include #include + #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/elementwise_multiply_kernel.h" #include "paddle/phi/kernels/full_kernel.h" diff --git a/paddle/phi/kernels/cpu/maxout_grad_kernel.cc b/paddle/phi/kernels/cpu/maxout_grad_kernel.cc index 429344a362b..dad4e96b5a8 100644 --- a/paddle/phi/kernels/cpu/maxout_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/maxout_grad_kernel.cc @@ -12,9 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/impl/maxout_grad_kernel_impl.h" - #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/maxout_grad_kernel_impl.h" PD_REGISTER_KERNEL( maxout_grad, CPU, ALL_LAYOUT, phi::MaxOutGradKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/maxout_kernel.cc b/paddle/phi/kernels/cpu/maxout_kernel.cc index e7cd3ab07ff..cc1d21d310b 100644 --- a/paddle/phi/kernels/cpu/maxout_kernel.cc +++ b/paddle/phi/kernels/cpu/maxout_kernel.cc @@ -12,8 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/impl/maxout_kernel_impl.h" - #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/maxout_kernel_impl.h" PD_REGISTER_KERNEL(maxout, CPU, ALL_LAYOUT, phi::MaxOutKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/meshgrid_grad_kernel.cc b/paddle/phi/kernels/cpu/meshgrid_grad_kernel.cc index 159d1092553..5b43fb02b51 100644 --- a/paddle/phi/kernels/cpu/meshgrid_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/meshgrid_grad_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/meshgrid_grad_kernel.h" -#include "paddle/phi/kernels/impl/meshgrid_grad_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/meshgrid_grad_kernel_impl.h" PD_REGISTER_KERNEL(meshgrid_grad, CPU, diff --git a/paddle/phi/kernels/cpu/meshgrid_kernel.cc b/paddle/phi/kernels/cpu/meshgrid_kernel.cc index c201103b3da..35e43f7bbc8 100644 --- a/paddle/phi/kernels/cpu/meshgrid_kernel.cc +++ b/paddle/phi/kernels/cpu/meshgrid_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/meshgrid_kernel.h" -#include "paddle/phi/kernels/impl/meshgrid_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/meshgrid_kernel_impl.h" PD_REGISTER_KERNEL(meshgrid, CPU, diff --git a/paddle/phi/kernels/cpu/momentum_kernel.cc b/paddle/phi/kernels/cpu/momentum_kernel.cc index 63cc5592ef4..7a4ea9f19e5 100644 --- a/paddle/phi/kernels/cpu/momentum_kernel.cc +++ b/paddle/phi/kernels/cpu/momentum_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/momentum_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/momentum_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/multi_dot_grad_kernel.cc b/paddle/phi/kernels/cpu/multi_dot_grad_kernel.cc index 2cd75404be8..f6b07584ce4 100644 --- a/paddle/phi/kernels/cpu/multi_dot_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/multi_dot_grad_kernel.cc @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/kernels/multi_dot_grad_kernel.h" -#include "paddle/phi/kernels/impl/multi_dot_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/multi_dot_kernel_impl.h" PD_REGISTER_KERNEL( multi_dot_grad, CPU, ALL_LAYOUT, phi::MultiDotGradKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/multi_dot_kernel.cc b/paddle/phi/kernels/cpu/multi_dot_kernel.cc index a4249a98e46..00cf425a038 100644 --- a/paddle/phi/kernels/cpu/multi_dot_kernel.cc +++ b/paddle/phi/kernels/cpu/multi_dot_kernel.cc @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/kernels/multi_dot_kernel.h" -#include "paddle/phi/kernels/impl/multi_dot_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/multi_dot_kernel_impl.h" PD_REGISTER_KERNEL( multi_dot, CPU, ALL_LAYOUT, phi::MultiDotKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/multiplex_grad_kernel.cc b/paddle/phi/kernels/cpu/multiplex_grad_kernel.cc index f5a426e93db..12ba6dadde3 100644 --- a/paddle/phi/kernels/cpu/multiplex_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/multiplex_grad_kernel.cc @@ -15,7 +15,6 @@ #include "paddle/phi/kernels/multiplex_grad_kernel.h" #include "paddle/fluid/memory/memcpy.h" - #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/eigen/common.h" diff --git a/paddle/phi/kernels/cpu/mv_kernel.cc b/paddle/phi/kernels/cpu/mv_kernel.cc index 7f76ddda6dd..408eda34e1c 100644 --- a/paddle/phi/kernels/cpu/mv_kernel.cc +++ b/paddle/phi/kernels/cpu/mv_kernel.cc @@ -16,7 +16,6 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" - #include "paddle/phi/kernels/impl/mv_kernel_impl.h" PD_REGISTER_KERNEL(mv, CPU, ALL_LAYOUT, phi::MvKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/nll_loss_grad_kernel.cc b/paddle/phi/kernels/cpu/nll_loss_grad_kernel.cc index dd2b09ee39a..9048e87d049 100644 --- a/paddle/phi/kernels/cpu/nll_loss_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/nll_loss_grad_kernel.cc @@ -16,6 +16,7 @@ #include #include + #include "paddle/fluid/operators/math.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/nll_loss_kernel.cc b/paddle/phi/kernels/cpu/nll_loss_kernel.cc index 92cb6a1ad17..c966e91a9a6 100644 --- a/paddle/phi/kernels/cpu/nll_loss_kernel.cc +++ b/paddle/phi/kernels/cpu/nll_loss_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/nll_loss_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/enforce.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/norm_grad_kernel.cc b/paddle/phi/kernels/cpu/norm_grad_kernel.cc index bd05e2c4c6e..92ca51b499c 100644 --- a/paddle/phi/kernels/cpu/norm_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/norm_grad_kernel.cc @@ -13,15 +13,13 @@ // limitations under the License. #include "paddle/phi/kernels/norm_grad_kernel.h" -#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -#include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" - #include "paddle/phi/kernels/funcs/common_shape.h" +#include "paddle/phi/kernels/funcs/eigen/common.h" +#include "paddle/phi/kernels/funcs/eigen/eigen_function.h" +#include "paddle/phi/kernels/funcs/math_function.h" namespace phi { template diff --git a/paddle/phi/kernels/cpu/norm_kernel.cc b/paddle/phi/kernels/cpu/norm_kernel.cc index 50906d9c3bb..f69d03b66b1 100644 --- a/paddle/phi/kernels/cpu/norm_kernel.cc +++ b/paddle/phi/kernels/cpu/norm_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/norm_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/common_shape.h" diff --git a/paddle/phi/kernels/cpu/one_hot_kernel.cc b/paddle/phi/kernels/cpu/one_hot_kernel.cc index fc7979e41d9..f408c9f0361 100644 --- a/paddle/phi/kernels/cpu/one_hot_kernel.cc +++ b/paddle/phi/kernels/cpu/one_hot_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/one_hot_kernel.h" + #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/cpu/p_norm_grad_kernel.cc b/paddle/phi/kernels/cpu/p_norm_grad_kernel.cc index 44ab0504086..32905ab0878 100644 --- a/paddle/phi/kernels/cpu/p_norm_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/p_norm_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/p_norm_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/eigen/common.h" diff --git a/paddle/phi/kernels/cpu/p_norm_kernel.cc b/paddle/phi/kernels/cpu/p_norm_kernel.cc index 9da7fdbb297..597939953b2 100644 --- a/paddle/phi/kernels/cpu/p_norm_kernel.cc +++ b/paddle/phi/kernels/cpu/p_norm_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/p_norm_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/common_shape.h" diff --git a/paddle/phi/kernels/cpu/pixel_shuffle_grad_kernel.cc b/paddle/phi/kernels/cpu/pixel_shuffle_grad_kernel.cc index b32065d4f0a..0e2bfd04b62 100644 --- a/paddle/phi/kernels/cpu/pixel_shuffle_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/pixel_shuffle_grad_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/pixel_shuffle_grad_kernel.h" -#include "paddle/phi/kernels/impl/pixel_shuffle_grad_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/pixel_shuffle_grad_kernel_impl.h" PD_REGISTER_KERNEL(pixel_shuffle_grad, CPU, diff --git a/paddle/phi/kernels/cpu/pixel_shuffle_kernel.cc b/paddle/phi/kernels/cpu/pixel_shuffle_kernel.cc index 80f8fa7b50e..44dcb8b59f7 100644 --- a/paddle/phi/kernels/cpu/pixel_shuffle_kernel.cc +++ b/paddle/phi/kernels/cpu/pixel_shuffle_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/pixel_shuffle_kernel.h" -#include "paddle/phi/kernels/impl/pixel_shuffle_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/pixel_shuffle_kernel_impl.h" PD_REGISTER_KERNEL( pixel_shuffle, CPU, ALL_LAYOUT, phi::PixelShuffleKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/pixel_unshuffle_grad_kernel.cc b/paddle/phi/kernels/cpu/pixel_unshuffle_grad_kernel.cc index ef61fca3595..cbcbf1e129d 100644 --- a/paddle/phi/kernels/cpu/pixel_unshuffle_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/pixel_unshuffle_grad_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/pixel_unshuffle_grad_kernel.h" -#include "paddle/phi/kernels/impl/pixel_unshuffle_grad_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/pixel_unshuffle_grad_kernel_impl.h" PD_REGISTER_KERNEL(pixel_unshuffle_grad, CPU, diff --git a/paddle/phi/kernels/cpu/pixel_unshuffle_kernel.cc b/paddle/phi/kernels/cpu/pixel_unshuffle_kernel.cc index 9f4bc747f32..837378972c6 100644 --- a/paddle/phi/kernels/cpu/pixel_unshuffle_kernel.cc +++ b/paddle/phi/kernels/cpu/pixel_unshuffle_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/pixel_unshuffle_kernel.h" -#include "paddle/phi/kernels/impl/pixel_unshuffle_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/pixel_unshuffle_kernel_impl.h" PD_REGISTER_KERNEL(pixel_unshuffle, CPU, diff --git a/paddle/phi/kernels/cpu/poisson_kernel.cc b/paddle/phi/kernels/cpu/poisson_kernel.cc index 6a3e32c2f07..8ba1afe229e 100644 --- a/paddle/phi/kernels/cpu/poisson_kernel.cc +++ b/paddle/phi/kernels/cpu/poisson_kernel.cc @@ -12,11 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/phi/kernels/poisson_kernel.h" + #include #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/poisson_kernel.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/pool_grad_kernel.cc b/paddle/phi/kernels/cpu/pool_grad_kernel.cc index bb97694d8fc..68cd57c5227 100644 --- a/paddle/phi/kernels/cpu/pool_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/pool_grad_kernel.cc @@ -14,9 +14,8 @@ #include "paddle/phi/kernels/pool_grad_kernel.h" -#include "paddle/phi/kernels/impl/pool_grad_kernel_impl.h" - #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/pool_grad_kernel_impl.h" PD_REGISTER_KERNEL( pool2d_grad, CPU, ALL_LAYOUT, phi::Pool2dGradKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/pool_kernel.cc b/paddle/phi/kernels/cpu/pool_kernel.cc index 1d57e282c3c..3d3880692c0 100644 --- a/paddle/phi/kernels/cpu/pool_kernel.cc +++ b/paddle/phi/kernels/cpu/pool_kernel.cc @@ -14,9 +14,8 @@ #include "paddle/phi/kernels/pool_kernel.h" -#include "paddle/phi/kernels/impl/pool_kernel_impl.h" - #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/pool_kernel_impl.h" PD_REGISTER_KERNEL(pool2d, CPU, ALL_LAYOUT, phi::Pool2dKernel, float, double) {} PD_REGISTER_KERNEL(max_pool2d_with_index, diff --git a/paddle/phi/kernels/cpu/psroi_pool_grad_kernel.cc b/paddle/phi/kernels/cpu/psroi_pool_grad_kernel.cc index b68c3ad545d..202baddd713 100644 --- a/paddle/phi/kernels/cpu/psroi_pool_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/psroi_pool_grad_kernel.cc @@ -15,6 +15,7 @@ #include "paddle/phi/kernels/psroi_pool_grad_kernel.h" #include + #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/math_function.h" diff --git a/paddle/phi/kernels/cpu/psroi_pool_kernel.cc b/paddle/phi/kernels/cpu/psroi_pool_kernel.cc index 4f7925ad00f..82eff70b756 100644 --- a/paddle/phi/kernels/cpu/psroi_pool_kernel.cc +++ b/paddle/phi/kernels/cpu/psroi_pool_kernel.cc @@ -15,6 +15,7 @@ #include "paddle/phi/kernels/psroi_pool_kernel.h" #include + #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/qr_kernel.cc b/paddle/phi/kernels/cpu/qr_kernel.cc index b0e82cedb6b..6a5551d9557 100644 --- a/paddle/phi/kernels/cpu/qr_kernel.cc +++ b/paddle/phi/kernels/cpu/qr_kernel.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include - #include "paddle/phi/kernels/qr_kernel.h" +#include + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/complex_functors.h" diff --git a/paddle/phi/kernels/cpu/reduce.h b/paddle/phi/kernels/cpu/reduce.h index 35395dccca1..dad288cff2c 100644 --- a/paddle/phi/kernels/cpu/reduce.h +++ b/paddle/phi/kernels/cpu/reduce.h @@ -17,10 +17,9 @@ #include #include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/visit_type.h" #include "paddle/phi/kernels/cast_kernel.h" - -#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/kernels/funcs/eigen/common.h" #include "paddle/phi/kernels/funcs/math_function.h" // See Note [ Why still include the fluid headers? ] diff --git a/paddle/phi/kernels/cpu/reduce_sum_grad_kernel.cc b/paddle/phi/kernels/cpu/reduce_sum_grad_kernel.cc index 66ae5e02ffc..abc18b1c578 100644 --- a/paddle/phi/kernels/cpu/reduce_sum_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/reduce_sum_grad_kernel.cc @@ -111,4 +111,3 @@ PD_REGISTER_KERNEL(sum_grad, int64_t, phi::dtype::complex, phi::dtype::complex) {} - diff --git a/paddle/phi/kernels/cpu/rmsprop_kernel.cc b/paddle/phi/kernels/cpu/rmsprop_kernel.cc index fa1e1a2eed3..1d60823d759 100644 --- a/paddle/phi/kernels/cpu/rmsprop_kernel.cc +++ b/paddle/phi/kernels/cpu/rmsprop_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/rmsprop_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/rmsprop_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/rnn_functor.h b/paddle/phi/kernels/cpu/rnn_functor.h index ab6f98ffcd5..911814647d6 100644 --- a/paddle/phi/kernels/cpu/rnn_functor.h +++ b/paddle/phi/kernels/cpu/rnn_functor.h @@ -14,6 +14,8 @@ #pragma once +#include "paddle/fluid/framework/generator.h" +#include "paddle/fluid/operators/utils.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/empty_kernel.h" @@ -21,9 +23,6 @@ #include "paddle/phi/kernels/funcs/eigen/eigen_function.h" #include "paddle/phi/kernels/funcs/math_function.h" -#include "paddle/fluid/framework/generator.h" -#include "paddle/fluid/operators/utils.h" - namespace phi { #define DEFINE_MODE_DETECTOR(MODE_NAME, MODE_STR) \ @@ -252,9 +251,12 @@ inline std::vector Unbind(const DenseTensor& in) { } template class LayerT, - template class SingleLayerT, - template class BidirLayerT, + template + class LayerT, + template + class SingleLayerT, + template + class BidirLayerT, typename T, typename Context> void RnnFunc(const Context& dev_ctx, diff --git a/paddle/phi/kernels/cpu/rnn_grad_kernel.cc b/paddle/phi/kernels/cpu/rnn_grad_kernel.cc index 4dd1894320a..1cd4add7d50 100644 --- a/paddle/phi/kernels/cpu/rnn_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/rnn_grad_kernel.cc @@ -16,7 +16,6 @@ #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" - #include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/cpu/rnn_functor.h" #include "paddle/phi/kernels/funcs/activation_functor.h" @@ -962,8 +961,10 @@ void dropout_cpu_grad_function_inplace(const CPUContext& dev_ctx, } template class SingleGradLayerT, - template class BidirGradLayerT, + template + class SingleGradLayerT, + template + class BidirGradLayerT, typename T> void RnnGradFunc(const CPUContext& dev_ctx, const DenseTensor& x, diff --git a/paddle/phi/kernels/cpu/rnn_kernel.cc b/paddle/phi/kernels/cpu/rnn_kernel.cc index 80c521918ed..e2e784b2943 100644 --- a/paddle/phi/kernels/cpu/rnn_kernel.cc +++ b/paddle/phi/kernels/cpu/rnn_kernel.cc @@ -49,7 +49,8 @@ struct Cell { }; template class EigenActivationFunctor, + template + class EigenActivationFunctor, funcs::detail::ActivationType act_type> struct SimpleRNNCell : Cell { void operator()(const CPUContext* dev_ctx, diff --git a/paddle/phi/kernels/cpu/roi_align_kernel.cc b/paddle/phi/kernels/cpu/roi_align_kernel.cc index cd779b72e7a..cf0dc47f47b 100644 --- a/paddle/phi/kernels/cpu/roi_align_kernel.cc +++ b/paddle/phi/kernels/cpu/roi_align_kernel.cc @@ -79,16 +79,12 @@ std::vector> GetIndexesAndRatios( for (std::size_t px = 0; px < pooled_width; px++) { for (std::size_t iy = 0; iy < roi_bin_grid_h; iy++) { // calculate x of sample points - auto y = - roi_ymin + - bin_h * (py + - static_cast(iy + .5f) / static_cast(roi_bin_grid_h)); + auto y = roi_ymin + bin_h * (py + static_cast(iy + .5f) / + static_cast(roi_bin_grid_h)); for (std::size_t ix = 0; ix < roi_bin_grid_w; ix++) { // calculate x of sample points - auto x = roi_xmin + - bin_w * (px + - static_cast(ix + .5f) / - static_cast(roi_bin_grid_w)); + auto x = roi_xmin + bin_w * (px + static_cast(ix + .5f) / + static_cast(roi_bin_grid_w)); // deal with elements out of map if (y < -1.0 || y > height || x < -1.0 || x > width) { diff --git a/paddle/phi/kernels/cpu/scatter_grad_kernel.cc b/paddle/phi/kernels/cpu/scatter_grad_kernel.cc index 62fd58704c4..f09015f24a1 100644 --- a/paddle/phi/kernels/cpu/scatter_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/scatter_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/scatter_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/copy_kernel.h" diff --git a/paddle/phi/kernels/cpu/scatter_kernel.cc b/paddle/phi/kernels/cpu/scatter_kernel.cc index d48ceaf29a0..7032c3bb5a3 100644 --- a/paddle/phi/kernels/cpu/scatter_kernel.cc +++ b/paddle/phi/kernels/cpu/scatter_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/scatter_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/copy_kernel.h" diff --git a/paddle/phi/kernels/cpu/scatter_nd_add_grad_kernel.cc b/paddle/phi/kernels/cpu/scatter_nd_add_grad_kernel.cc index cc143ba8d0e..7c3665c5d2e 100644 --- a/paddle/phi/kernels/cpu/scatter_nd_add_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/scatter_nd_add_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/scatter_nd_add_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/copy_kernel.h" diff --git a/paddle/phi/kernels/cpu/scatter_nd_add_kernel.cc b/paddle/phi/kernels/cpu/scatter_nd_add_kernel.cc index 04ae10f5e8b..31e2f4c7161 100644 --- a/paddle/phi/kernels/cpu/scatter_nd_add_kernel.cc +++ b/paddle/phi/kernels/cpu/scatter_nd_add_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/scatter_nd_add_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/copy_kernel.h" diff --git a/paddle/phi/kernels/cpu/segment_pool_grad_kernel.cc b/paddle/phi/kernels/cpu/segment_pool_grad_kernel.cc index a5c9dc4c55e..744ec7805fa 100644 --- a/paddle/phi/kernels/cpu/segment_pool_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/segment_pool_grad_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/segment_pool_grad_kernel.h" -#include "paddle/phi/kernels/impl/segment_pool_grad_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/segment_pool_grad_kernel_impl.h" PD_REGISTER_KERNEL(segment_pool_grad, CPU, diff --git a/paddle/phi/kernels/cpu/segment_pool_kernel.cc b/paddle/phi/kernels/cpu/segment_pool_kernel.cc index ad76a7a86bc..541ccd34365 100644 --- a/paddle/phi/kernels/cpu/segment_pool_kernel.cc +++ b/paddle/phi/kernels/cpu/segment_pool_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/segment_pool_kernel.h" -#include "paddle/phi/kernels/impl/segment_pool_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/segment_pool_kernel_impl.h" PD_REGISTER_KERNEL(segment_pool, CPU, diff --git a/paddle/phi/kernels/cpu/selu_grad_kernel.cc b/paddle/phi/kernels/cpu/selu_grad_kernel.cc index 32101b19132..9f83e39a363 100644 --- a/paddle/phi/kernels/cpu/selu_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/selu_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/selu_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/selu_grad_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/sgd_kernel.cc b/paddle/phi/kernels/cpu/sgd_kernel.cc index 214fd82bef3..055c44d38e4 100644 --- a/paddle/phi/kernels/cpu/sgd_kernel.cc +++ b/paddle/phi/kernels/cpu/sgd_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/sgd_kernel.h" + #include "paddle/fluid/operators/jit/kernels.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/sign_kernel.cc b/paddle/phi/kernels/cpu/sign_kernel.cc index 5fe11ffbd6d..9ded252c5c5 100644 --- a/paddle/phi/kernels/cpu/sign_kernel.cc +++ b/paddle/phi/kernels/cpu/sign_kernel.cc @@ -13,10 +13,10 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/kernels/sign_kernel.h" -#include "paddle/phi/kernels/impl/sign_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/sign_kernel_impl.h" // See Note [ Why still include the fluid headers? ] #include "paddle/phi/common/bfloat16.h" diff --git a/paddle/phi/kernels/cpu/size_kernel.cc b/paddle/phi/kernels/cpu/size_kernel.cc index 71ebf9cdc09..ca8373b8488 100644 --- a/paddle/phi/kernels/cpu/size_kernel.cc +++ b/paddle/phi/kernels/cpu/size_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/size_kernel.h" -#include "paddle/phi/kernels/impl/size_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/size_kernel_impl.h" PD_REGISTER_KERNEL(size, CPU, diff --git a/paddle/phi/kernels/cpu/slice_grad_kernel.cc b/paddle/phi/kernels/cpu/slice_grad_kernel.cc index 5c2cb3ea80e..7e3efd21751 100644 --- a/paddle/phi/kernels/cpu/slice_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/slice_grad_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/slice_grad_kernel.h" -#include "paddle/phi/kernels/impl/slice_grad_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/slice_grad_kernel_impl.h" PD_REGISTER_KERNEL(slice_grad, CPU, diff --git a/paddle/phi/kernels/cpu/slice_kernel.cc b/paddle/phi/kernels/cpu/slice_kernel.cc index 736540609dd..0f2fe98a853 100644 --- a/paddle/phi/kernels/cpu/slice_kernel.cc +++ b/paddle/phi/kernels/cpu/slice_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/slice_kernel.h" -#include "paddle/phi/kernels/impl/slice_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/slice_kernel_impl.h" PD_REGISTER_KERNEL(slice, CPU, diff --git a/paddle/phi/kernels/cpu/sparse_weight_embedding_grad_kernel.cc b/paddle/phi/kernels/cpu/sparse_weight_embedding_grad_kernel.cc index d78477073ad..d296aba6650 100644 --- a/paddle/phi/kernels/cpu/sparse_weight_embedding_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/sparse_weight_embedding_grad_kernel.cc @@ -13,12 +13,12 @@ // limitations under the License. #include "paddle/phi/kernels/sparse_weight_embedding_grad_kernel.h" -#include "paddle/phi/kernels/funcs/embedding_util.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" +#include "paddle/phi/kernels/funcs/embedding_util.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/sparse_weight_embedding_kernel.cc b/paddle/phi/kernels/cpu/sparse_weight_embedding_kernel.cc index c0f95d03888..cfdccb5c8d9 100644 --- a/paddle/phi/kernels/cpu/sparse_weight_embedding_kernel.cc +++ b/paddle/phi/kernels/cpu/sparse_weight_embedding_kernel.cc @@ -12,14 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/embedding_kernel.h" -#include "paddle/phi/kernels/funcs/embedding_util.h" - #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" +#include "paddle/phi/kernels/embedding_kernel.h" #include "paddle/phi/kernels/funcs/blas/blas.h" +#include "paddle/phi/kernels/funcs/embedding_util.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/split_kernel.cc b/paddle/phi/kernels/cpu/split_kernel.cc index 56d87292249..288cdd235ae 100644 --- a/paddle/phi/kernels/cpu/split_kernel.cc +++ b/paddle/phi/kernels/cpu/split_kernel.cc @@ -17,7 +17,6 @@ #include "paddle/fluid/operators/strided_memcpy.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" - #include "paddle/phi/infermeta/unary.h" #include "paddle/phi/kernels/funcs/concat_and_split_functor.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc b/paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc index 400f7e87839..2aff1568197 100644 --- a/paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/temporal_shift_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/temporal_shift_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/temporal_shift_kernel.cc b/paddle/phi/kernels/cpu/temporal_shift_kernel.cc index 6721117992d..29be4871319 100644 --- a/paddle/phi/kernels/cpu/temporal_shift_kernel.cc +++ b/paddle/phi/kernels/cpu/temporal_shift_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/temporal_shift_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/layout.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/transpose_grad_kernel.cc b/paddle/phi/kernels/cpu/transpose_grad_kernel.cc index 9dbcf575f33..dee69222e6d 100644 --- a/paddle/phi/kernels/cpu/transpose_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/transpose_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/transpose_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/common/bfloat16.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/tril_triu_grad_kernel.cc b/paddle/phi/kernels/cpu/tril_triu_grad_kernel.cc index 14aca258a2c..660254fef86 100644 --- a/paddle/phi/kernels/cpu/tril_triu_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/tril_triu_grad_kernel.cc @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/impl/tril_triu_grad_kernel_impl.h" - #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/tril_triu_grad_kernel_impl.h" PD_REGISTER_KERNEL(tril_triu_grad, CPU, diff --git a/paddle/phi/kernels/cpu/tril_triu_kernel.cc b/paddle/phi/kernels/cpu/tril_triu_kernel.cc index a3d20e55e21..f3599bb92b9 100644 --- a/paddle/phi/kernels/cpu/tril_triu_kernel.cc +++ b/paddle/phi/kernels/cpu/tril_triu_kernel.cc @@ -12,10 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/impl/tril_triu_kernel_impl.h" - #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/tril_triu_kernel_impl.h" PD_REGISTER_KERNEL(tril_triu, CPU, diff --git a/paddle/phi/kernels/cpu/trunc_grad_kernel.cc b/paddle/phi/kernels/cpu/trunc_grad_kernel.cc index 4d85dd609e2..24fc3892562 100644 --- a/paddle/phi/kernels/cpu/trunc_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/trunc_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/trunc_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" diff --git a/paddle/phi/kernels/cpu/trunc_kernel.cc b/paddle/phi/kernels/cpu/trunc_kernel.cc index babae6ce7c9..5fe33ec6a4b 100644 --- a/paddle/phi/kernels/cpu/trunc_kernel.cc +++ b/paddle/phi/kernels/cpu/trunc_kernel.cc @@ -12,11 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/phi/kernels/trunc_kernel.h" + #include #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/trunc_kernel.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/unfold_grad_kernel.cc b/paddle/phi/kernels/cpu/unfold_grad_kernel.cc index c97005dd845..6ba4ba49b9a 100644 --- a/paddle/phi/kernels/cpu/unfold_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/unfold_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/unfold_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/unfold_grad_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/unfold_kernel.cc b/paddle/phi/kernels/cpu/unfold_kernel.cc index e38d8acd098..f15201542e6 100644 --- a/paddle/phi/kernels/cpu/unfold_kernel.cc +++ b/paddle/phi/kernels/cpu/unfold_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/unfold_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/unfold_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/uniform_random_kernel.cc b/paddle/phi/kernels/cpu/uniform_random_kernel.cc index c95a8f4ded6..a09812363f1 100644 --- a/paddle/phi/kernels/cpu/uniform_random_kernel.cc +++ b/paddle/phi/kernels/cpu/uniform_random_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/uniform_random_kernel.h" + #include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/cpu/unique_kernel.cc b/paddle/phi/kernels/cpu/unique_kernel.cc index 853b401315d..834f05f73e2 100644 --- a/paddle/phi/kernels/cpu/unique_kernel.cc +++ b/paddle/phi/kernels/cpu/unique_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/unique_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" diff --git a/paddle/phi/kernels/cpu/unstack_grad_kernel.cc b/paddle/phi/kernels/cpu/unstack_grad_kernel.cc index 9c2dce808dc..c494cbc965e 100644 --- a/paddle/phi/kernels/cpu/unstack_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/unstack_grad_kernel.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/kernels/unstack_grad_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/unstack_grad_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/unstack_kernel.cc b/paddle/phi/kernels/cpu/unstack_kernel.cc index 3d233e9ec40..4bc8d1b2c93 100644 --- a/paddle/phi/kernels/cpu/unstack_kernel.cc +++ b/paddle/phi/kernels/cpu/unstack_kernel.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/kernels/unstack_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/impl/unstack_kernel_impl.h" diff --git a/paddle/phi/kernels/cpu/viterbi_decode_kernel.cc b/paddle/phi/kernels/cpu/viterbi_decode_kernel.cc index fab49f54160..c98a098aa0e 100644 --- a/paddle/phi/kernels/cpu/viterbi_decode_kernel.cc +++ b/paddle/phi/kernels/cpu/viterbi_decode_kernel.cc @@ -109,7 +109,8 @@ struct Gather { }; template typename CompareFunctor, + template + typename CompareFunctor, typename T> struct GetMask { void operator()(const Context& dev_ctx, @@ -122,7 +123,8 @@ struct GetMask { }; template typename BinaryFunctor, + template + typename BinaryFunctor, typename T> struct BinaryOperation { void operator()(const Context& dev_ctx, diff --git a/paddle/phi/kernels/cpu/warpctc_grad_kernel.cc b/paddle/phi/kernels/cpu/warpctc_grad_kernel.cc index 0b293363354..7d70d825250 100644 --- a/paddle/phi/kernels/cpu/warpctc_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/warpctc_grad_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/warpctc_grad_kernel.h" -#include "paddle/phi/kernels/impl/warpctc_grad_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/warpctc_grad_kernel_impl.h" PD_REGISTER_KERNEL( warpctc_grad, CPU, ALL_LAYOUT, phi::WarpctcGradKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/warpctc_kernel.cc b/paddle/phi/kernels/cpu/warpctc_kernel.cc index 4b87202c11e..239c6cb0cbe 100644 --- a/paddle/phi/kernels/cpu/warpctc_kernel.cc +++ b/paddle/phi/kernels/cpu/warpctc_kernel.cc @@ -13,10 +13,10 @@ // limitations under the License. #include "paddle/phi/kernels/warpctc_kernel.h" -#include "paddle/phi/kernels/impl/warpctc_kernel_impl.h" #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/warpctc_kernel_impl.h" PD_REGISTER_KERNEL( warpctc, CPU, ALL_LAYOUT, phi::WarpctcKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/yolo_box_kernel.cc b/paddle/phi/kernels/cpu/yolo_box_kernel.cc index a83bc019fc3..6b882ad2895 100644 --- a/paddle/phi/kernels/cpu/yolo_box_kernel.cc +++ b/paddle/phi/kernels/cpu/yolo_box_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/yolo_box_kernel.h" + #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/funcs/yolo_box_util.h" diff --git a/paddle/phi/kernels/cpu/yolov3_loss_grad_kernel.cc b/paddle/phi/kernels/cpu/yolov3_loss_grad_kernel.cc index 383009229f9..655106e9cb4 100644 --- a/paddle/phi/kernels/cpu/yolov3_loss_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/yolov3_loss_grad_kernel.cc @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/phi/kernels/yolov3_loss_grad_kernel.h" + #include #include -#include "paddle/phi/kernels/yolov3_loss_grad_kernel.h" - #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/cpu/yolov3_loss_functor.h" diff --git a/paddle/phi/kernels/cpu/yolov3_loss_kernel.cc b/paddle/phi/kernels/cpu/yolov3_loss_kernel.cc index 8a190ab25a7..75b2e3c5c4a 100644 --- a/paddle/phi/kernels/cpu/yolov3_loss_kernel.cc +++ b/paddle/phi/kernels/cpu/yolov3_loss_kernel.cc @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "paddle/phi/kernels/yolov3_loss_kernel.h" + #include #include -#include "paddle/phi/kernels/yolov3_loss_kernel.h" - #include "paddle/phi/backends/cpu/cpu_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/cpu/yolov3_loss_functor.h" diff --git a/paddle/phi/kernels/cumprod_grad_kernel.h b/paddle/phi/kernels/cumprod_grad_kernel.h index b3cb17b28e0..7610cad31e3 100644 --- a/paddle/phi/kernels/cumprod_grad_kernel.h +++ b/paddle/phi/kernels/cumprod_grad_kernel.h @@ -25,4 +25,4 @@ void CumprodGradKernel(const Context& dev_ctx, const DenseTensor& dout, int dim, DenseTensor* dx); -} // phi +} // namespace phi diff --git a/paddle/phi/kernels/cumprod_kernel.h b/paddle/phi/kernels/cumprod_kernel.h index 96d76cb0f43..bb8b1427b30 100644 --- a/paddle/phi/kernels/cumprod_kernel.h +++ b/paddle/phi/kernels/cumprod_kernel.h @@ -23,4 +23,4 @@ void CumprodKernel(const Context& dev_ctx, const DenseTensor& x, int dim, DenseTensor* out); -} // phi +} // namespace phi diff --git a/paddle/phi/kernels/diagonal_kernel.h b/paddle/phi/kernels/diagonal_kernel.h index 7cf7282307a..10afd7dbe92 100644 --- a/paddle/phi/kernels/diagonal_kernel.h +++ b/paddle/phi/kernels/diagonal_kernel.h @@ -25,4 +25,4 @@ void DiagonalKernel(const Context& dev_ctx, int axis1, int axis2, DenseTensor* out); -} // phi +} // namespace phi diff --git a/paddle/phi/kernels/digamma_grad_kernel.h b/paddle/phi/kernels/digamma_grad_kernel.h index ae5346080d3..abd8634518d 100644 --- a/paddle/phi/kernels/digamma_grad_kernel.h +++ b/paddle/phi/kernels/digamma_grad_kernel.h @@ -24,4 +24,4 @@ void DigammaGradKernel(const Context& ctx, const DenseTensor& out_grad, DenseTensor* x_grad); -} // namepsace phi +} // namespace phi diff --git a/paddle/phi/kernels/digamma_kernel.h b/paddle/phi/kernels/digamma_kernel.h index ce25f2e148e..3cf1eae67cc 100644 --- a/paddle/phi/kernels/digamma_kernel.h +++ b/paddle/phi/kernels/digamma_kernel.h @@ -21,4 +21,4 @@ namespace phi { template void DigammaKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out); -} // namepsace phi +} // namespace phi diff --git a/paddle/phi/kernels/empty_kernel.cc b/paddle/phi/kernels/empty_kernel.cc index 06d258a8a4e..d8cf0bd2ef9 100644 --- a/paddle/phi/kernels/empty_kernel.cc +++ b/paddle/phi/kernels/empty_kernel.cc @@ -14,9 +14,8 @@ #include "paddle/phi/kernels/empty_kernel.h" #include "paddle/phi/backends/all_context.h" -#include "paddle/phi/core/kernel_registry.h" - #include "paddle/phi/common/complex.h" +#include "paddle/phi/core/kernel_registry.h" namespace phi { diff --git a/paddle/phi/kernels/expand_kernel.h b/paddle/phi/kernels/expand_kernel.h index 3b44c46e4dd..930240db6cc 100644 --- a/paddle/phi/kernels/expand_kernel.h +++ b/paddle/phi/kernels/expand_kernel.h @@ -26,4 +26,4 @@ void ExpandKernel(const Context& ctx, const IntArray& shape, DenseTensor* out); -} // namepsace phi +} // namespace phi diff --git a/paddle/phi/kernels/flatten_grad_kernel.cc b/paddle/phi/kernels/flatten_grad_kernel.cc index 83f96c1f9f5..54279fca6e4 100644 --- a/paddle/phi/kernels/flatten_grad_kernel.cc +++ b/paddle/phi/kernels/flatten_grad_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/flatten_grad_kernel.h" + #include "paddle/phi/backends/all_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/copy_kernel.h" diff --git a/paddle/phi/kernels/flatten_kernel.cc b/paddle/phi/kernels/flatten_kernel.cc index f304e7706ad..dd000896073 100644 --- a/paddle/phi/kernels/flatten_kernel.cc +++ b/paddle/phi/kernels/flatten_kernel.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/flatten_kernel.h" + #include "paddle/phi/backends/all_context.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/infermeta/unary.h" diff --git a/paddle/phi/kernels/frobenius_norm_grad_kernel.h b/paddle/phi/kernels/frobenius_norm_grad_kernel.h index cfe8192d1a6..65db8dd9e0a 100644 --- a/paddle/phi/kernels/frobenius_norm_grad_kernel.h +++ b/paddle/phi/kernels/frobenius_norm_grad_kernel.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/phi/core/dense_tensor.h" namespace phi { diff --git a/paddle/phi/kernels/frobenius_norm_kernel.h b/paddle/phi/kernels/frobenius_norm_kernel.h index f5f37ee0c0f..30122cb4160 100644 --- a/paddle/phi/kernels/frobenius_norm_kernel.h +++ b/paddle/phi/kernels/frobenius_norm_kernel.h @@ -15,6 +15,7 @@ #pragma once #include + #include "paddle/phi/core/dense_tensor.h" namespace phi { diff --git a/paddle/phi/kernels/full_kernel.h b/paddle/phi/kernels/full_kernel.h index d5785f2eeda..228e862a09c 100644 --- a/paddle/phi/kernels/full_kernel.h +++ b/paddle/phi/kernels/full_kernel.h @@ -19,7 +19,6 @@ #include "paddle/phi/common/int_array.h" #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/dense_tensor.h" - #include "paddle/phi/infermeta/nullary.h" #include "paddle/phi/kernels/empty_kernel.h" diff --git a/paddle/phi/kernels/funcs/activation_functor.h b/paddle/phi/kernels/funcs/activation_functor.h index f80117ccec7..f481821a7bf 100644 --- a/paddle/phi/kernels/funcs/activation_functor.h +++ b/paddle/phi/kernels/funcs/activation_functor.h @@ -15,14 +15,14 @@ #pragma once #include + #include +#include #include #include #include #include #include - -#include #ifndef _USE_MATH_DEFINES #define _USE_MATH_DEFINES #endif @@ -986,9 +986,9 @@ struct BReluGradFunctor : public BaseActivationFunctor { typename dOut, typename dX> void operator()(Device d, X x, Out out, dOut dout, dX dx) const { - dx.device(d) = dout * - ((x > static_cast(t_min)) * (x < static_cast(t_max))) - .template cast(); + dx.device(d) = + dout * ((x > static_cast(t_min)) * (x < static_cast(t_max))) + .template cast(); } static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } @@ -1054,11 +1054,10 @@ struct LeakyReluGradGradFunctor : public BaseActivationFunctor { GET_DATA_SAFELY(X, "Input", "X", "LeakyReluGradGrad")); auto ddout = EigenVector::Flatten( GET_DATA_SAFELY(ddOut, "Output", "DOut", "LeakyReluGradGrad")); - ddout.device(*d) = - ddx * - ((x > static_cast(0)).template cast() + - static_cast(alpha) * (x <= static_cast(0)).template cast()) - .template cast(); + ddout.device(*d) = ddx * ((x > static_cast(0)).template cast() + + static_cast(alpha) * + (x <= static_cast(0)).template cast()) + .template cast(); } } static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } @@ -1290,11 +1289,10 @@ struct ELUGradGradFunctor : public BaseActivationFunctor { if (ddOut) { auto ddout = EigenVector::Flatten( GET_DATA_SAFELY(ddOut, "Output", "DDOut", "ELUGradGrad")); - ddout.device(*d) = ddx * - ((x > static_cast(0)).template cast() + - static_cast(alpha) * x.exp() * - (x <= static_cast(0)).template cast()) - .template cast(); + ddout.device(*d) = ddx * ((x > static_cast(0)).template cast() + + static_cast(alpha) * x.exp() * + (x <= static_cast(0)).template cast()) + .template cast(); } } static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } @@ -1980,11 +1978,10 @@ struct CELUGradGradFunctor : public BaseActivationFunctor { if (ddOut) { auto ddout = EigenVector::Flatten( GET_DATA_SAFELY(ddOut, "Output", "DDOut", "CELUGradGrad")); - ddout.device(*d) = ddx * - ((x > static_cast(0)).template cast() + - (x / static_cast(alpha)).exp() * - (x <= static_cast(0)).template cast()) - .template cast(); + ddout.device(*d) = ddx * ((x > static_cast(0)).template cast() + + (x / static_cast(alpha)).exp() * + (x <= static_cast(0)).template cast()) + .template cast(); } } static constexpr ActBwdOpFwdDeps FwdDeps() { return ActBwdOpFwdDeps::kDepX; } diff --git a/paddle/phi/kernels/funcs/adam_functors.h b/paddle/phi/kernels/funcs/adam_functors.h index 2f706f0ef1c..b14ee7f072e 100644 --- a/paddle/phi/kernels/funcs/adam_functors.h +++ b/paddle/phi/kernels/funcs/adam_functors.h @@ -14,6 +14,7 @@ #pragma once #include // for sqrt in CPU and CUDA + #include #include "paddle/phi/kernels/funcs/algorithm.h" @@ -169,9 +170,8 @@ class AdamFunctor { moment1_out = beta1_ * mom1 + (1 - beta1_) * g; moment2_out = beta2_ * mom2 + (1 - beta2_) * g * g; - param_out = param - - lr * (moment1_out / - (moment2_out.sqrt() + epsilon_ * sqrt(1 - beta2_pow))); + param_out = param - lr * (moment1_out / (moment2_out.sqrt() + + epsilon_ * sqrt(1 - beta2_pow))); } }; diff --git a/paddle/phi/kernels/funcs/aligned_vector.h b/paddle/phi/kernels/funcs/aligned_vector.h index 14a9560b841..70f75d5352a 100644 --- a/paddle/phi/kernels/funcs/aligned_vector.h +++ b/paddle/phi/kernels/funcs/aligned_vector.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/phi/core/hostdevice.h" #if defined(__xpu__) #define CHAR_BIT 8 @@ -45,11 +46,11 @@ HOSTDEVICE inline void Store(const AlignedVector& vec, T* addr) { } /* -* Only the address of input data is the multiplier of 1,2,4, vectorized load -* with corresponding multiplier-value is possible. Moreover, the maximum length -* of vectorized load is 128 bits once. Hence, valid length of vectorized load -* shall be determined under both former constraints. -*/ + * Only the address of input data is the multiplier of 1,2,4, vectorized load + * with corresponding multiplier-value is possible. Moreover, the maximum length + * of vectorized load is 128 bits once. Hence, valid length of vectorized load + * shall be determined under both former constraints. + */ template int GetVectorizedSize(const T* pointer) { constexpr int max_load_bits = 128; @@ -60,11 +61,11 @@ int GetVectorizedSize(const T* pointer) { constexpr int vec2 = std::alignment_of>::value; // NOLINT if (address % vec8 == 0) { /* - * Currently, decide to deal with no more than 4 data once while adopting - * vectorization load/store, if performance test shows that dealing with - * 8 data once in vectorization load/store does get optimized, return code - * below can be changed into " return std::min(8, valid_vec_size); " . - */ + * Currently, decide to deal with no more than 4 data once while adopting + * vectorization load/store, if performance test shows that dealing with + * 8 data once in vectorization load/store does get optimized, return code + * below can be changed into " return std::min(8, valid_vec_size); " . + */ return std::min(4, valid_vec_size); } else if (address % vec4 == 0) { return std::min(4, valid_vec_size); diff --git a/paddle/phi/kernels/funcs/blas/blas_impl.cu.h b/paddle/phi/kernels/funcs/blas/blas_impl.cu.h index e2b16a1eb7f..3e197a18f96 100644 --- a/paddle/phi/kernels/funcs/blas/blas_impl.cu.h +++ b/paddle/phi/kernels/funcs/blas/blas_impl.cu.h @@ -14,11 +14,10 @@ #pragma once -#include "paddle/fluid/platform/dynload/cublas.h" -#include "paddle/phi/kernels/funcs/math_function.h" - #include "paddle/fluid/platform/device/gpu/gpu_info.h" +#include "paddle/fluid/platform/dynload/cublas.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/kernels/funcs/math_function.h" DECLARE_bool(enable_cublas_tensor_op_math); DECLARE_bool(gemm_use_half_precision_compute_type); diff --git a/paddle/phi/kernels/funcs/broadcast_function.h b/paddle/phi/kernels/funcs/broadcast_function.h index ecdfa7abcfd..88b87c07c76 100644 --- a/paddle/phi/kernels/funcs/broadcast_function.h +++ b/paddle/phi/kernels/funcs/broadcast_function.h @@ -456,21 +456,16 @@ void LaunchBroadcastKernel( read_lens * gpu_config.GetBlockSize(); int tail_tid = numel % (read_lens * gpu_config.GetBlockSize()); #endif - VectorizedBroadcastKernel<<>>( - ins_data, - outs_data, - use_broadcast, - numel, - configs, - main_offset, - tail_tid, - read_lens, - func); + VectorizedBroadcastKernel + <<>>(ins_data, + outs_data, + use_broadcast, + numel, + configs, + main_offset, + tail_tid, + read_lens, + func); } template dims().size()); } - axis = axis == -1 - ? *std::max_element(dims_size.begin(), dims_size.end()) - - *std::min_element(dims_size.begin(), dims_size.end()) - : axis; + axis = axis == -1 ? *std::max_element(dims_size.begin(), dims_size.end()) - + *std::min_element(dims_size.begin(), dims_size.end()) + : axis; BroadcastKernelForDifferentVecSize( ctx, ins, outs, axis, func); } diff --git a/paddle/phi/kernels/funcs/concat_and_split_functor.cu b/paddle/phi/kernels/funcs/concat_and_split_functor.cu index 06be592dd93..5abaf6c2ffa 100644 --- a/paddle/phi/kernels/funcs/concat_and_split_functor.cu +++ b/paddle/phi/kernels/funcs/concat_and_split_functor.cu @@ -12,10 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/phi/kernels/funcs/concat_and_split_functor.h" - #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/platform/cuda_graph_with_memory_pool.h" +#include "paddle/phi/kernels/funcs/concat_and_split_functor.h" namespace phi { namespace funcs { diff --git a/paddle/phi/kernels/funcs/deformable_conv_functor.cc b/paddle/phi/kernels/funcs/deformable_conv_functor.cc index ea256e93bba..48858fa5939 100644 --- a/paddle/phi/kernels/funcs/deformable_conv_functor.cc +++ b/paddle/phi/kernels/funcs/deformable_conv_functor.cc @@ -60,14 +60,12 @@ inline void ModulatedDeformableIm2colCPUKernel( const T* data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width; const T* data_offset_ptr = - data_offset + - (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * - kernel_w * height_col * width_col; + data_offset + (b_col * deformable_group + deformable_group_index) * 2 * + kernel_h * kernel_w * height_col * width_col; const T* data_mask_ptr = data_mask - ? data_mask + - (b_col * deformable_group + deformable_group_index) * - kernel_h * kernel_w * height_col * width_col + ? data_mask + (b_col * deformable_group + deformable_group_index) * + kernel_h * kernel_w * height_col * width_col : nullptr; for (int i = 0; i < kernel_h; ++i) { diff --git a/paddle/phi/kernels/funcs/deformable_conv_functor.cu b/paddle/phi/kernels/funcs/deformable_conv_functor.cu index 8bfb46c6636..bebea5dcb74 100644 --- a/paddle/phi/kernels/funcs/deformable_conv_functor.cu +++ b/paddle/phi/kernels/funcs/deformable_conv_functor.cu @@ -12,9 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/phi/kernels/funcs/deformable_conv_functor.h" - #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/kernels/funcs/deformable_conv_functor.h" namespace phi { namespace funcs { @@ -70,14 +69,12 @@ __global__ void ModulatedDeformableIm2colGpuKernel( const T* data_im_ptr = data_im + (b_col * num_channels + c_im) * height * width; const T* data_offset_ptr = - data_offset + - (b_col * deformable_group + deformable_group_index) * 2 * kernel_h * - kernel_w * height_col * width_col; + data_offset + (b_col * deformable_group + deformable_group_index) * 2 * + kernel_h * kernel_w * height_col * width_col; const T* data_mask_ptr = data_mask - ? data_mask + - (b_col * deformable_group + deformable_group_index) * - kernel_h * kernel_w * height_col * width_col + ? data_mask + (b_col * deformable_group + deformable_group_index) * + kernel_h * kernel_w * height_col * width_col : nullptr; for (int i = 0; i < kernel_h; ++i) { @@ -129,28 +126,28 @@ void ModulatedDeformableIm2col(const Context& dev_ctx, int blocks = NumBlocks(num_kernels); int threads = kNumCUDAThreads; - ModulatedDeformableIm2colGpuKernel< - T><<>>(num_kernels, - data_im, - data_offset, - data_mask, - im_shape[1], - im_shape[2], - filter_shape[2], - filter_shape[3], - paddings[0], - paddings[1], - strides[0], - strides[1], - dilations[0], - dilations[1], - channel_per_deformable_group, - col_shape[1], - im_shape[0], - deformable_groups, - col_shape[2], - col_shape[3], - data_col); + ModulatedDeformableIm2colGpuKernel + <<>>(num_kernels, + data_im, + data_offset, + data_mask, + im_shape[1], + im_shape[2], + filter_shape[2], + filter_shape[3], + paddings[0], + paddings[1], + strides[0], + strides[1], + dilations[0], + dilations[1], + channel_per_deformable_group, + col_shape[1], + im_shape[0], + deformable_groups, + col_shape[2], + col_shape[3], + data_col); } template void ModulatedDeformableIm2col( diff --git a/paddle/phi/kernels/funcs/detail/activation_functions.h b/paddle/phi/kernels/funcs/detail/activation_functions.h index 475557f1642..d41dca33f75 100644 --- a/paddle/phi/kernels/funcs/detail/activation_functions.h +++ b/paddle/phi/kernels/funcs/detail/activation_functions.h @@ -14,8 +14,10 @@ limitations under the License. */ #pragma once #include + #include #include + #include "paddle/fluid/platform/cpu_info.h" #include "paddle/phi/core/hostdevice.h" diff --git a/paddle/phi/kernels/funcs/detail/avx_mathfun.h b/paddle/phi/kernels/funcs/detail/avx_mathfun.h index e5e7388d51d..75e4922648c 100644 --- a/paddle/phi/kernels/funcs/detail/avx_mathfun.h +++ b/paddle/phi/kernels/funcs/detail/avx_mathfun.h @@ -356,11 +356,11 @@ v8sf sin256_ps(v8sf x) { // any x /* scale by 4/Pi */ y = _mm256_mul_ps(x, *(v8sf *)_ps256_cephes_FOPI); -/* - Here we start a series of integer operations, which are in the - realm of AVX2. - If we don't have AVX, let's perform them using SSE2 directives -*/ + /* + Here we start a series of integer operations, which are in the + realm of AVX2. + If we don't have AVX, let's perform them using SSE2 directives + */ #ifdef __AVX2__ /* store the integer part of y in mm0 */ diff --git a/paddle/phi/kernels/funcs/detail/gru_cpu_kernel.h b/paddle/phi/kernels/funcs/detail/gru_cpu_kernel.h index 0016bfb64c9..0fdf490c553 100644 --- a/paddle/phi/kernels/funcs/detail/gru_cpu_kernel.h +++ b/paddle/phi/kernels/funcs/detail/gru_cpu_kernel.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/phi/kernels/funcs/activation_functor.h" #include "paddle/phi/kernels/funcs/detail/activation_functions.h" diff --git a/paddle/phi/kernels/funcs/detail/gru_gpu_kernel.h b/paddle/phi/kernels/funcs/detail/gru_gpu_kernel.h index 6657417beac..93232d8f7f4 100644 --- a/paddle/phi/kernels/funcs/detail/gru_gpu_kernel.h +++ b/paddle/phi/kernels/funcs/detail/gru_gpu_kernel.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/platform/device/gpu/gpu_primitives.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/phi/kernels/funcs/detail/activation_functions.h" diff --git a/paddle/phi/kernels/funcs/detail/gru_kernel.h b/paddle/phi/kernels/funcs/detail/gru_kernel.h index db53fc4576d..9e2aef19406 100644 --- a/paddle/phi/kernels/funcs/detail/gru_kernel.h +++ b/paddle/phi/kernels/funcs/detail/gru_kernel.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/phi/core/hostdevice.h" #include "paddle/phi/kernels/funcs/detail/activation_functions.h" diff --git a/paddle/phi/kernels/funcs/detail/lstm_cpu_kernel.h b/paddle/phi/kernels/funcs/detail/lstm_cpu_kernel.h index ed8e749f7fd..02fddc57b31 100644 --- a/paddle/phi/kernels/funcs/detail/lstm_cpu_kernel.h +++ b/paddle/phi/kernels/funcs/detail/lstm_cpu_kernel.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/fluid/framework/eigen.h" #include "paddle/phi/kernels/funcs/activation_functor.h" #include "paddle/phi/kernels/funcs/detail/activation_functions.h" diff --git a/paddle/phi/kernels/funcs/detail/lstm_gpu_kernel.h b/paddle/phi/kernels/funcs/detail/lstm_gpu_kernel.h index 6d4c430d9e6..5d06dddd964 100644 --- a/paddle/phi/kernels/funcs/detail/lstm_gpu_kernel.h +++ b/paddle/phi/kernels/funcs/detail/lstm_gpu_kernel.h @@ -249,27 +249,27 @@ void gpu_lstm_forward(const paddle::platform::DeviceContext& context, if (batch_size == 1) { KeLstmForward<<>>( - op, - value, - frame_size, - batch_size, - cell_clip, - active_node, - active_gate, - active_state); + /* is_batch= */ false> + <<>>(op, + value, + frame_size, + batch_size, + cell_clip, + active_node, + active_gate, + active_state); } else { KeLstmForward<<>>( - op, - value, - frame_size, - batch_size, - cell_clip, - active_node, - active_gate, - active_state); + /* is_batch= */ true> + <<>>(op, + value, + frame_size, + batch_size, + cell_clip, + active_node, + active_gate, + active_state); } } @@ -303,29 +303,29 @@ void gpu_lstm_backward(const paddle::platform::DeviceContext& context, if (batch_size == 1) { KeLstmBackward<<>>( - op, - value, - grad, - frame_size, - batch_size, - cell_clip, - active_node, - active_gate, - active_state); + /* is_batch= */ false> + <<>>(op, + value, + grad, + frame_size, + batch_size, + cell_clip, + active_node, + active_gate, + active_state); } else { KeLstmBackward<<>>( - op, - value, - grad, - frame_size, - batch_size, - cell_clip, - active_node, - active_gate, - active_state); + /* is_batch= */ true> + <<>>(op, + value, + grad, + frame_size, + batch_size, + cell_clip, + active_node, + active_gate, + active_state); } } diff --git a/paddle/phi/kernels/funcs/detail/lstm_kernel.h b/paddle/phi/kernels/funcs/detail/lstm_kernel.h index 8b429264125..0846f05a0c2 100644 --- a/paddle/phi/kernels/funcs/detail/lstm_kernel.h +++ b/paddle/phi/kernels/funcs/detail/lstm_kernel.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once #include + #include "paddle/phi/core/hostdevice.h" #include "paddle/phi/kernels/funcs/detail/activation_functions.h" diff --git a/paddle/phi/kernels/funcs/diagonal.h b/paddle/phi/kernels/funcs/diagonal.h index 19a93970d09..81525cb2544 100644 --- a/paddle/phi/kernels/funcs/diagonal.h +++ b/paddle/phi/kernels/funcs/diagonal.h @@ -17,6 +17,7 @@ #if defined(__NVCC__) || defined(__HIPCC__) #include #include + #include "paddle/phi/kernels/primitive/kernel_primitives.h" #endif diff --git a/paddle/phi/kernels/funcs/distribution_helper.h b/paddle/phi/kernels/funcs/distribution_helper.h index 68e986c334e..0e6b3a3f9d7 100644 --- a/paddle/phi/kernels/funcs/distribution_helper.h +++ b/paddle/phi/kernels/funcs/distribution_helper.h @@ -319,10 +319,9 @@ void distribution_and_transform(const GPUContext &ctx, uint64_t seed = seed_offset.first; uint64_t offset = seed_offset.second; - DistributionKernel<<>>( - size, seed, offset, dist, trans, out_data, total_thread); + DistributionKernel + <<>>( + size, seed, offset, dist, trans, out_data, total_thread); } #endif diff --git a/paddle/phi/kernels/funcs/eigen/extensions.h b/paddle/phi/kernels/funcs/eigen/extensions.h index fbb9d8e3d2e..c724564417b 100644 --- a/paddle/phi/kernels/funcs/eigen/extensions.h +++ b/paddle/phi/kernels/funcs/eigen/extensions.h @@ -20,7 +20,6 @@ #include "paddle/phi/common/complex.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/hostdevice.h" - #include "unsupported/Eigen/CXX11/Tensor" namespace Eigen { diff --git a/paddle/phi/kernels/funcs/elementwise_base.h b/paddle/phi/kernels/funcs/elementwise_base.h index 1093bdfa726..71dfbc206a1 100644 --- a/paddle/phi/kernels/funcs/elementwise_base.h +++ b/paddle/phi/kernels/funcs/elementwise_base.h @@ -494,7 +494,7 @@ template