diff --git a/BCLOUD b/BCLOUD index ce0e561ae8a948df1f0f30056ca5b23c24b0e969..258e96b7f01e8c50a5a9df7a25d0a4c3301351a9 100644 --- a/BCLOUD +++ b/BCLOUD @@ -35,6 +35,7 @@ CONFIGS('baidu/third-party/gzstream@master@git_branch') CONFIGS('baidu/third-party/pybind11@v2.2.4@git_branch') CONFIGS('baidu/third-party/python@gcc482output@git_branch') CONFIGS('baidu/third-party/yaml-cpp@yaml-cpp_0-6-2-0_GEN_PD_BL@git_tag') +CONFIGS('third-64/gtest@base') HEADERS('paddle/fluid/memory/*.h', '$INC/paddle/fluid/memory/') HEADERS('paddle/fluid/memory/detail/*.h', '$INC/paddle/fluid/memory/detail/') @@ -79,8 +80,25 @@ HEADERS('paddle/fluid/train/custom_trainer/feed/accessor/*.h', '$INC/paddle/flui NEED_OUTPUT("baidu/third-party/mklml") OUTPUT('paddle/fluid/train/custom_trainer/feed/conf', '$OUT') OUTPUT('paddle/fluid/train/custom_trainer/feed/scripts', '$OUT') -custom_trainer_src = GLOB('paddle/fluid/train/custom_trainer/feed/*/*.cc') + +def UT_FILE(filename): + UT_DIR = 'paddle/fluid/train/custom_trainer/feed/unit_test' + return os.path.join(UT_DIR, filename) + +CPPFLAGS('-DPYBIND_AVX_MKLML') # for paddle with avx and mklml + +custom_trainer_src = GLOB('paddle/fluid/train/custom_trainer/feed/*/*.cc', Exclude(UT_FILE('*'))) Application('feed_trainer', Sources('paddle/fluid/train/custom_trainer/feed/main.cc', custom_trainer_src), CppFlags('-DHPPL_STUB_FUNC -DLAPACK_FOUND -DPADDLE_DISABLE_PROFILER -DPADDLE_NO_PYTHON -DCUSTOM_TRAINER -DPADDLE_ON_INFERENCE -DPADDLE_USE_DSO -DPADDLE_USE_PTHREAD_BARRIER -DPADDLE_USE_PTHREAD_SPINLOCK -DPADDLE_VERSION=0.0.0 -DPADDLE_WITH_AVX -DPADDLE_WITH_MKLML -DPADDLE_WITH_XBYAK -DXBYAK64 -DXBYAK_NO_OP_NAMES -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -DPYBIND_AVX_MKLML' + r" -DPADDLE_REVISION=\"%s@%s@%s\"" % (REPO_URL(), REPO_BRANCH(), REPO_REVISION())), CFlags('-std=c++11 -m64 -fPIC -fno-omit-frame-pointer -Werror -Wall -Wextra -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wno-unused-parameter -Wno-unused-function -Wno-error=literal-suffix -Wno-error=sign-compare -Wno-error=unused-local-typedefs -Wno-error=maybe-uninitialized -fopenmp -mavx -O3 -DNDEBUG '), CxxFlags('-std=c++11 -m64 -fPIC -fno-omit-frame-pointer -Werror -Wall -Wextra -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wno-unused-parameter -Wno-unused-function -Wno-error=literal-suffix -Wno-error=sign-compare -Wno-error=unused-local-typedefs -Wno-error=maybe-uninitialized -fopenmp -mavx -O3 -DNDEBUG '), Libraries('$OUT/lib/libpaddle_fluid_avx_mklml.a')) + +#feed unit test + +UT_MAIN = UT_FILE('main.cc') +UT_CPPFLAGS_STR = '-DHPPL_STUB_FUNC -DLAPACK_FOUND -DPADDLE_DISABLE_PROFILER -DPADDLE_NO_PYTHON -DCUSTOM_TRAINER -DPADDLE_ON_INFERENCE -DPADDLE_USE_DSO -DPADDLE_USE_PTHREAD_BARRIER -DPADDLE_USE_PTHREAD_SPINLOCK -DPADDLE_VERSION=0.0.0 -DPADDLE_WITH_AVX -DPADDLE_WITH_MKLML -DPADDLE_WITH_XBYAK -DXBYAK64 -DXBYAK_NO_OP_NAMES -D_GNU_SOURCE -D__STDC_LIMIT_MACROS -DPYBIND_AVX_MKLML' + r" -DPADDLE_REVISION=\"%s@%s@%s\"" % (REPO_URL(), REPO_BRANCH(), REPO_REVISION()) +UT_CFLAGS_STR = '-m64 -fPIC -fno-omit-frame-pointer -Werror -Wall -Wextra -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wno-unused-parameter -Wno-unused-function -Wno-error=literal-suffix -Wno-error=sign-compare -Wno-error=unused-local-typedefs -Wno-error=maybe-uninitialized -fopenmp -mavx -O3 -DNDEBUG ' +UT_CXXFLAGS_STR = '-std=c++11' + UT_CFLAGS_STR +# UTApplication('test_executor', Sources(UT_MAIN, UT_FILE('test_executor.cc'), custom_trainer_src), CppFlags(UT_CPPFLAGS_STR), CFlags(UT_CFLAGS_STR), CxxFlags(UT_CXXFLAGS_STR), Libraries('$OUT/lib/libpaddle_fluid_avx_mklml.a')) + + paddle_fluid_noavx_openblas_src = "paddle/fluid/memory/detail/memory_block.cc paddle/fluid/memory/detail/memory_block_desc.cc paddle/fluid/memory/detail/meta_cache.cc paddle/fluid/memory/detail/system_allocator.cc paddle/fluid/memory/detail/buddy_allocator.cc paddle/fluid/memory/allocation/allocator.cc paddle/fluid/memory/allocation/cpu_allocator.cc paddle/fluid/memory/allocation/locked_allocator.cc paddle/fluid/memory/allocation/buffered_allocator.cc paddle/fluid/memory/allocation/best_fit_allocator.cc paddle/fluid/memory/allocation/naive_best_fit_allocator.cc paddle/fluid/memory/allocation/retry_allocator.cc paddle/fluid/memory/allocation/aligned_allocator.cc paddle/fluid/memory/allocation/allocator_strategy.cc paddle/fluid/memory/allocation/allocator_facade.cc paddle/fluid/memory/allocation/auto_growth_best_fit_allocator.cc paddle/fluid/memory/malloc.cc paddle/fluid/memory/memcpy.cc paddle/fluid/platform/profiler.proto paddle/fluid/platform/enforce.cc paddle/fluid/platform/cpu_info.cc paddle/fluid/platform/place.cc paddle/fluid/platform/dynload/dynamic_loader.cc paddle/fluid/platform/dynload/warpctc.cc paddle/fluid/platform/cpu_helper.cc paddle/fluid/platform/temporary_allocator.cc paddle/fluid/platform/device_context.cc paddle/fluid/platform/init.cc paddle/fluid/platform/timer.cc paddle/fluid/platform/lodtensor_printer.cc paddle/fluid/platform/device_tracer.cc paddle/fluid/platform/profiler.cc paddle/fluid/platform/device_memory_aligment.cc paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_optimizer_op_pass.cc paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_adam_op_pass.cc paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_sgd_op_pass.cc paddle/fluid/framework/ir/fuse_optimizer_ops_pass/fuse_momentum_op_pass.cc paddle/fluid/framework/ir/memory_optimize_pass/op_graph_view.cc paddle/fluid/framework/ir/memory_optimize_pass/while_op_eager_deletion_pass.cc paddle/fluid/framework/ir/memory_optimize_pass/recurrent_op_eager_deletion_pass.cc paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass_helper.cc paddle/fluid/framework/ir/memory_optimize_pass/reference_count_pass.cc paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_helper.cc paddle/fluid/framework/ir/memory_optimize_pass/memory_optimize_pass.cc paddle/fluid/framework/ir/memory_optimize_pass/inplace_op_pass.cc paddle/fluid/framework/ir/memory_optimize_pass/eager_deletion_pass.cc paddle/fluid/framework/ir/memory_optimize_pass/record_skip_memory_opt_vars_pass.cc paddle/fluid/framework/ir/memory_optimize_pass/memory_reuse_pass.cc paddle/fluid/framework/ir/memory_optimize_pass/buffer_shared_inplace_op_pass.cc paddle/fluid/framework/ir/multi_devices_graph_pass/modify_op_lock_and_record_event_pass.cc paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_print_pass.cc paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_check_pass.cc paddle/fluid/framework/ir/multi_devices_graph_pass/multi_devices_graph_pass.cc paddle/fluid/framework/ir/multi_devices_graph_pass/sequential_execution_pass.cc paddle/fluid/framework/ir/multi_devices_graph_pass/fuse_all_reduce_op_pass.cc paddle/fluid/framework/ir/multi_devices_graph_pass/all_reduce_deps_pass.cc paddle/fluid/framework/ir/multi_devices_graph_pass/backward_optimizer_op_deps_pass.cc paddle/fluid/framework/ir/node.cc paddle/fluid/framework/ir/graph.cc paddle/fluid/framework/ir/graph_helper.cc paddle/fluid/framework/ir/pass.cc paddle/fluid/framework/ir/graph_traits.cc paddle/fluid/framework/ir/graph_pattern_detector.cc paddle/fluid/framework/ir/fuse_pass_base.cc paddle/fluid/framework/ir/coalesce_grad_tensor_pass.cc paddle/fluid/framework/ir/graph_to_program_pass.cc paddle/fluid/framework/ir/graph_viz_pass.cc paddle/fluid/framework/ir/lock_free_optimize_pass.cc paddle/fluid/framework/ir/fc_fuse_pass.cc paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc paddle/fluid/framework/ir/infer_clean_graph_pass.cc paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc paddle/fluid/framework/ir/embedding_fc_lstm_fuse_pass.cc paddle/fluid/framework/ir/fc_gru_fuse_pass.cc paddle/fluid/framework/ir/seq_concat_fc_fuse_pass.cc paddle/fluid/framework/ir/multi_batch_merge_pass.cc paddle/fluid/framework/ir/conv_bn_fuse_pass.cc paddle/fluid/framework/ir/seqconv_eltadd_relu_fuse_pass.cc paddle/fluid/framework/ir/seqpool_concat_fuse_pass.cc paddle/fluid/framework/ir/repeated_fc_relu_fuse_pass.cc paddle/fluid/framework/ir/squared_mat_sub_fuse_pass.cc paddle/fluid/framework/ir/is_test_pass.cc paddle/fluid/framework/ir/conv_elementwise_add_act_fuse_pass.cc paddle/fluid/framework/ir/conv_elementwise_add2_act_fuse_pass.cc paddle/fluid/framework/ir/conv_elementwise_add_fuse_pass.cc paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc paddle/fluid/framework/ir/transpose_flatten_concat_fuse_pass.cc paddle/fluid/framework/ir/identity_scale_op_clean_pass.cc paddle/fluid/framework/ir/sync_batch_norm_pass.cc paddle/fluid/framework/ir/runtime_context_cache_pass.cc paddle/fluid/framework/ir/quant_conv2d_dequant_fuse_pass.cc paddle/fluid/framework/ir/fillconstant_elementwisemul_fuse.cc paddle/fluid/framework/ir/shuffle_channel_detect_pass.cc paddle/fluid/framework/ir/delete_quant_dequant_op_pass.cc paddle/fluid/framework/ir/fuse_elewise_add_act_pass.cc paddle/fluid/framework/ir/fuse_relu_depthwise_conv_pass.cc paddle/fluid/framework/ir/pass_builder.cc paddle/fluid/framework/details/var_handle.cc paddle/fluid/framework/details/op_handle_base.cc paddle/fluid/framework/details/scale_loss_grad_op_handle.cc paddle/fluid/framework/details/fetch_op_handle.cc paddle/fluid/framework/details/computation_op_handle.cc paddle/fluid/framework/details/rpc_op_handle.cc paddle/fluid/framework/details/fetch_barrier_op_handle.cc paddle/fluid/framework/details/multi_devices_helper.cc paddle/fluid/framework/details/variable_visitor.cc paddle/fluid/framework/details/all_reduce_op_handle.cc paddle/fluid/framework/details/fused_all_reduce_op_handle.cc paddle/fluid/framework/details/reduce_op_handle.cc paddle/fluid/framework/details/broadcast_op_handle.cc paddle/fluid/framework/details/fused_broadcast_op_handle.cc paddle/fluid/framework/details/gather_op_handle.cc paddle/fluid/framework/details/eager_deletion_op_handle.cc paddle/fluid/framework/details/share_tensor_buffer_op_handle.cc paddle/fluid/framework/details/ssa_graph_executor.cc paddle/fluid/framework/details/threaded_ssa_graph_executor.cc paddle/fluid/framework/details/parallel_ssa_graph_executor.cc paddle/fluid/framework/details/async_ssa_graph_executor.cc paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc paddle/fluid/framework/details/build_strategy.cc paddle/fluid/framework/fleet/fleet_wrapper.cc paddle/fluid/framework/fleet/nccl_wrapper.cc paddle/fluid/framework/io/fs.cc paddle/fluid/framework/io/shell.cc paddle/fluid/framework/framework.proto paddle/fluid/framework/data_feed.proto paddle/fluid/framework/trainer_desc.proto paddle/fluid/framework/ddim.cc paddle/fluid/framework/data_type.cc paddle/fluid/framework/tensor.cc paddle/fluid/framework/tensor_util.cc paddle/fluid/framework/lod_tensor.cc paddle/fluid/framework/garbage_collector.cc paddle/fluid/framework/reader.cc paddle/fluid/framework/threadpool.cc paddle/fluid/framework/var_type_traits.cc paddle/fluid/framework/scope.cc paddle/fluid/framework/scope_pool.cc paddle/fluid/framework/data_device_transform.cc paddle/fluid/framework/data_type_transform.cc paddle/fluid/framework/data_layout_transform.cc paddle/fluid/framework/data_transform.cc paddle/fluid/framework/attribute.cc paddle/fluid/framework/op_proto_maker.cc paddle/fluid/framework/op_info.cc paddle/fluid/framework/shape_inference.cc paddle/fluid/framework/transfer_scope_cache.cc paddle/fluid/framework/op_kernel_type.cc paddle/fluid/framework/operator.cc paddle/fluid/framework/version.cc paddle/fluid/framework/var_desc.cc paddle/fluid/framework/op_desc.cc paddle/fluid/framework/block_desc.cc paddle/fluid/framework/program_desc.cc paddle/fluid/framework/op_registry.cc paddle/fluid/framework/lod_rank_table.cc paddle/fluid/framework/feed_fetch_method.cc paddle/fluid/framework/variable_helper.cc paddle/fluid/framework/naive_executor.cc paddle/fluid/framework/executor_gc_helper.cc paddle/fluid/framework/executor.cc paddle/fluid/framework/multi_trainer.cc paddle/fluid/framework/pipeline_trainer.cc paddle/fluid/framework/dataset_factory.cc paddle/fluid/framework/dist_multi_trainer.cc paddle/fluid/framework/trainer_factory.cc paddle/fluid/framework/trainer.cc paddle/fluid/framework/data_feed_factory.cc paddle/fluid/framework/data_feed.cc paddle/fluid/framework/device_worker.cc paddle/fluid/framework/hogwild_worker.cc paddle/fluid/framework/downpour_worker.cc paddle/fluid/framework/pull_dense_worker.cc paddle/fluid/framework/section_worker.cc paddle/fluid/framework/device_worker_factory.cc paddle/fluid/framework/data_set.cc paddle/fluid/framework/parallel_executor.cc paddle/fluid/framework/prune.cc paddle/fluid/framework/selected_rows.cc paddle/fluid/framework/dlpack_tensor.cc paddle/fluid/imperative/flags.cc paddle/fluid/operators/math/detail/avx_functions.cc paddle/fluid/operators/math/concat_and_split.cc paddle/fluid/operators/math/context_project.cc paddle/fluid/operators/math/cross_entropy.cc paddle/fluid/operators/math/cos_sim_functor.cc paddle/fluid/operators/math/im2col.cc paddle/fluid/operators/math/sample_prob.cc paddle/fluid/operators/math/sampler.cc paddle/fluid/operators/math/gru_compute.cc paddle/fluid/operators/math/lstm_compute.cc paddle/fluid/operators/math/blas.cc paddle/fluid/operators/math/math_function.cc paddle/fluid/operators/math/maxouting.cc paddle/fluid/operators/math/pooling.cc paddle/fluid/operators/math/selected_rows_functor.cc paddle/fluid/operators/math/sequence2batch.cc paddle/fluid/operators/math/sequence_padding.cc paddle/fluid/operators/math/sequence_pooling.cc paddle/fluid/operators/math/sequence_scale.cc paddle/fluid/operators/math/softmax.cc paddle/fluid/operators/math/beam_search.cc paddle/fluid/operators/math/matrix_bit_code.cc paddle/fluid/operators/math/unpooling.cc paddle/fluid/operators/math/vol2col.cc paddle/fluid/operators/math/tree2col.cc paddle/fluid/operators/controlflow/feed_op.cc paddle/fluid/operators/controlflow/logical_op.cc paddle/fluid/operators/controlflow/while_op.cc paddle/fluid/operators/controlflow/get_places_op.cc paddle/fluid/operators/controlflow/fetch_op.cc paddle/fluid/operators/controlflow/compare_op.cc paddle/fluid/operators/controlflow/conditional_block_infer_op.cc paddle/fluid/operators/controlflow/conditional_block_op.cc paddle/fluid/operators/controlflow/tensor_array_read_write_op.cc paddle/fluid/operators/controlflow/op_variant.cc paddle/fluid/operators/controlflow/recurrent_op_helper.cc paddle/fluid/operators/controlflow/while_op_helper.cc paddle/fluid/operators/detection/bipartite_match_op.cc paddle/fluid/operators/detection/box_coder_op.cc paddle/fluid/operators/detection/iou_similarity_op.cc paddle/fluid/operators/detection/mine_hard_examples_op.cc paddle/fluid/operators/detection/multiclass_nms_op.cc paddle/fluid/operators/detection/poly_util.cc paddle/fluid/operators/detection/gpc.cc paddle/fluid/operators/detection/prior_box_op.cc paddle/fluid/operators/detection/density_prior_box_op.cc paddle/fluid/operators/detection/anchor_generator_op.cc paddle/fluid/operators/detection/target_assign_op.cc paddle/fluid/operators/detection/polygon_box_transform_op.cc paddle/fluid/operators/detection/rpn_target_assign_op.cc paddle/fluid/operators/detection/generate_proposal_labels_op.cc paddle/fluid/operators/detection/box_clip_op.cc paddle/fluid/operators/detection/yolov3_loss_op.cc paddle/fluid/operators/detection/yolo_box_op.cc paddle/fluid/operators/detection/box_decoder_and_assign_op.cc paddle/fluid/operators/detection/sigmoid_focal_loss_op.cc paddle/fluid/operators/detection/retinanet_detection_output_op.cc paddle/fluid/operators/detection/generate_proposals_op.cc paddle/fluid/operators/detection/distribute_fpn_proposals_op.cc paddle/fluid/operators/detection/collect_fpn_proposals_op.cc paddle/fluid/operators/detection/roi_perspective_transform_op.cc paddle/fluid/operators/detection/mask_util.cc paddle/fluid/operators/detection/generate_mask_labels_op.cc paddle/fluid/operators/elementwise/elementwise_mod_op.cc paddle/fluid/operators/elementwise/elementwise_floordiv_op.cc paddle/fluid/operators/elementwise/elementwise_max_op.cc paddle/fluid/operators/elementwise/elementwise_pow_op.cc paddle/fluid/operators/elementwise/elementwise_sub_op.cc paddle/fluid/operators/elementwise/elementwise_add_op.cc paddle/fluid/operators/elementwise/elementwise_min_op.cc paddle/fluid/operators/elementwise/elementwise_div_op.cc paddle/fluid/operators/elementwise/elementwise_mul_op.cc paddle/fluid/operators/fused/fusion_squared_mat_sub_op.cc paddle/fluid/operators/fused/fusion_seqpool_concat_op.cc paddle/fluid/operators/fused/fused_embedding_fc_lstm_op.cc paddle/fluid/operators/fused/fusion_seqexpand_concat_fc_op.cc paddle/fluid/operators/fused/fused_embedding_seq_pool_op.cc paddle/fluid/operators/fused/fusion_seqconv_eltadd_relu_op.cc paddle/fluid/operators/fused/fusion_gru_op.cc paddle/fluid/operators/fused/fusion_repeated_fc_relu_op.cc paddle/fluid/operators/fused/fusion_lstm_op.cc paddle/fluid/operators/fused/fused_elemwise_activation_op.cc paddle/fluid/operators/metrics/accuracy_op.cc paddle/fluid/operators/metrics/precision_recall_op.cc paddle/fluid/operators/metrics/auc_op.cc paddle/fluid/operators/optimizers/adamax_op.cc paddle/fluid/operators/optimizers/sgd_op.cc paddle/fluid/operators/optimizers/lars_momentum_op.cc paddle/fluid/operators/optimizers/adagrad_op.cc paddle/fluid/operators/optimizers/ftrl_op.cc paddle/fluid/operators/optimizers/momentum_op.cc paddle/fluid/operators/optimizers/adadelta_op.cc paddle/fluid/operators/optimizers/rmsprop_op.cc paddle/fluid/operators/optimizers/lamb_op.cc paddle/fluid/operators/optimizers/proximal_gd_op.cc paddle/fluid/operators/optimizers/proximal_adagrad_op.cc paddle/fluid/operators/optimizers/adam_op.cc paddle/fluid/operators/optimizers/decayed_adagrad_op.cc paddle/fluid/operators/reduce_ops/reduce_all_op.cc paddle/fluid/operators/reduce_ops/reduce_min_op.cc paddle/fluid/operators/reduce_ops/reduce_sum_op.cc paddle/fluid/operators/reduce_ops/reduce_any_op.cc paddle/fluid/operators/reduce_ops/reduce_max_op.cc paddle/fluid/operators/reduce_ops/reduce_mean_op.cc paddle/fluid/operators/reduce_ops/reduce_prod_op.cc paddle/fluid/operators/sequence_ops/sequence_erase_op.cc paddle/fluid/operators/sequence_ops/sequence_unpad_op.cc paddle/fluid/operators/sequence_ops/sequence_mask_op.cc paddle/fluid/operators/sequence_ops/sequence_expand_op.cc paddle/fluid/operators/sequence_ops/sequence_pad_op.cc paddle/fluid/operators/sequence_ops/sequence_enumerate_op.cc paddle/fluid/operators/sequence_ops/sequence_slice_op.cc paddle/fluid/operators/sequence_ops/sequence_softmax_op.cc paddle/fluid/operators/sequence_ops/sequence_expand_as_op.cc paddle/fluid/operators/sequence_ops/sequence_pool_op.cc paddle/fluid/operators/sequence_ops/sequence_reverse_op.cc paddle/fluid/operators/sequence_ops/sequence_scatter_op.cc paddle/fluid/operators/sequence_ops/sequence_conv_op.cc paddle/fluid/operators/sequence_ops/sequence_concat_op.cc paddle/fluid/operators/sequence_ops/sequence_reshape_op.cc paddle/fluid/operators/jit/helper.cc paddle/fluid/operators/jit/kernel_key.cc paddle/fluid/operators/jit/gen_base.cc paddle/fluid/operators/jit/kernel_pool.cc paddle/fluid/operators/jit/refer/refer.cc paddle/fluid/operators/jit/more/mix/mix.cc paddle/fluid/operators/jit/gen/sgd.cc paddle/fluid/operators/jit/gen/hopv.cc paddle/fluid/operators/jit/gen/lstm.cc paddle/fluid/operators/jit/gen/gru.cc paddle/fluid/operators/jit/gen/vbroadcast.cc paddle/fluid/operators/jit/gen/matmul.cc paddle/fluid/operators/jit/gen/seqpool.cc paddle/fluid/operators/jit/gen/embseqpool.cc paddle/fluid/operators/jit/gen/act.cc paddle/fluid/operators/jit/gen/blas.cc paddle/fluid/operators/reader/reader_op_registry.cc paddle/fluid/operators/reader/py_reader.cc paddle/fluid/operators/reader/buffered_reader.cc paddle/fluid/operators/reader/open_files_op.cc paddle/fluid/operators/reader/create_random_data_generator_op.cc paddle/fluid/operators/reader/create_shuffle_reader_op.cc paddle/fluid/operators/reader/create_batch_reader_op.cc paddle/fluid/operators/reader/create_recordio_file_reader_op.cc paddle/fluid/operators/reader/create_double_buffer_reader_op.cc paddle/fluid/operators/reader/create_multi_pass_reader_op.cc paddle/fluid/operators/reader/create_custom_reader_op.cc paddle/fluid/operators/reader/create_py_reader_op.cc paddle/fluid/operators/reader/read_op.cc paddle/fluid/operators/increment_op.cc paddle/fluid/operators/stack_op.cc paddle/fluid/operators/fc_op.cc paddle/fluid/operators/assign_op.cc paddle/fluid/operators/load_op.cc paddle/fluid/operators/fill_op.cc paddle/fluid/operators/reorder_lod_tensor_by_rank_op.cc paddle/fluid/operators/conv_shift_op.cc paddle/fluid/operators/fill_zeros_like_op.cc paddle/fluid/operators/hash_op.cc paddle/fluid/operators/dequantize_op.cc paddle/fluid/operators/fake_quantize_op.cc paddle/fluid/operators/size_op.cc paddle/fluid/operators/scatter_op.cc paddle/fluid/operators/uniform_random_op.cc paddle/fluid/operators/beam_search_op.cc paddle/fluid/operators/beam_search_decode_op.cc paddle/fluid/operators/dropout_op.cc paddle/fluid/operators/interpolate_op.cc paddle/fluid/operators/sampling_id_op.cc paddle/fluid/operators/lstm_op.cc paddle/fluid/operators/modified_huber_loss_op.cc paddle/fluid/operators/temporal_shift_op.cc paddle/fluid/operators/sum_op.cc paddle/fluid/operators/arg_min_op.cc paddle/fluid/operators/psroi_pool_op.cc paddle/fluid/operators/uniform_random_batch_size_like_op.cc paddle/fluid/operators/rnn_memory_helper_op.cc paddle/fluid/operators/crf_decoding_op.cc paddle/fluid/operators/where_op.cc paddle/fluid/operators/fake_dequantize_op.cc paddle/fluid/operators/mean_iou_op.cc paddle/fluid/operators/roi_align_op.cc paddle/fluid/operators/range_op.cc paddle/fluid/operators/edit_distance_op.cc paddle/fluid/operators/multiplex_op.cc paddle/fluid/operators/clip_op.cc paddle/fluid/operators/gaussian_random_op.cc paddle/fluid/operators/norm_op.cc paddle/fluid/operators/rank_loss_op.cc paddle/fluid/operators/detection_map_op.cc paddle/fluid/operators/lstm_unit_op.cc paddle/fluid/operators/shard_index_op.cc paddle/fluid/operators/shape_op.cc paddle/fluid/operators/arg_max_op.cc paddle/fluid/operators/average_accumulates_op.cc paddle/fluid/operators/requantize_op.cc paddle/fluid/operators/conv_op.cc paddle/fluid/operators/add_position_encoding_op.cc paddle/fluid/operators/gru_unit_op.cc paddle/fluid/operators/batch_norm_op.cc paddle/fluid/operators/chunk_eval_op.cc paddle/fluid/operators/lod_rank_table_op.cc paddle/fluid/operators/unsqueeze_op.cc paddle/fluid/operators/positive_negative_pair_op.cc paddle/fluid/operators/im2sequence_op.cc paddle/fluid/operators/margin_rank_loss_op.cc paddle/fluid/operators/hinge_loss_op.cc paddle/fluid/operators/cvm_op.cc paddle/fluid/operators/huber_loss_op.cc paddle/fluid/operators/crop_op.cc paddle/fluid/operators/activation_op.cc paddle/fluid/operators/hierarchical_sigmoid_op.cc paddle/fluid/operators/unfold_op.cc paddle/fluid/operators/max_sequence_len_op.cc paddle/fluid/operators/mul_op.cc paddle/fluid/operators/attention_lstm_op.cc paddle/fluid/operators/top_k_op.cc paddle/fluid/operators/group_norm_op.cc paddle/fluid/operators/selu_op.cc paddle/fluid/operators/lstmp_op.cc paddle/fluid/operators/merge_lod_tensor_op.cc paddle/fluid/operators/truncated_gaussian_random_op.cc paddle/fluid/operators/label_smooth_op.cc paddle/fluid/operators/matmul_op.cc paddle/fluid/operators/spp_op.cc paddle/fluid/operators/unstack_op.cc paddle/fluid/operators/conv_transpose_op.cc paddle/fluid/operators/diag_op.cc paddle/fluid/operators/unpool_op.cc paddle/fluid/operators/lod_array_length_op.cc paddle/fluid/operators/affine_channel_op.cc paddle/fluid/operators/log_loss_op.cc paddle/fluid/operators/concat_op.cc paddle/fluid/operators/lod_tensor_to_array_op.cc paddle/fluid/operators/gru_op.cc paddle/fluid/operators/coalesce_tensor_op.cc paddle/fluid/operators/fsp_op.cc paddle/fluid/operators/linspace_op.cc paddle/fluid/operators/reverse_op.cc paddle/fluid/operators/recurrent_op.cc paddle/fluid/operators/split_selected_rows_op.cc paddle/fluid/operators/dgc_clip_by_norm_op.cc paddle/fluid/operators/scale_op.cc paddle/fluid/operators/save_op.cc paddle/fluid/operators/load_combine_op.cc paddle/fluid/operators/merge_selected_rows_op.cc paddle/fluid/operators/split_op.cc paddle/fluid/operators/cumsum_op.cc paddle/fluid/operators/deformable_psroi_pooling_op.cc paddle/fluid/operators/teacher_student_sigmoid_loss_op.cc paddle/fluid/operators/transpose_op.cc paddle/fluid/operators/fill_constant_batch_size_like_op.cc paddle/fluid/operators/sigmoid_cross_entropy_with_logits_op.cc paddle/fluid/operators/shuffle_channel_op.cc paddle/fluid/operators/affine_grid_op.cc paddle/fluid/operators/split_lod_tensor_op.cc paddle/fluid/operators/grid_sampler_op.cc paddle/fluid/operators/lookup_table_op.cc paddle/fluid/operators/cos_sim_op.cc paddle/fluid/operators/quantize_op.cc paddle/fluid/operators/spectral_norm_op.cc paddle/fluid/operators/cross_entropy_op.cc paddle/fluid/operators/print_op.cc paddle/fluid/operators/lrn_op.cc paddle/fluid/operators/nce_op.cc paddle/fluid/operators/similarity_focus_op.cc paddle/fluid/operators/get_tensor_from_selected_rows_op.cc paddle/fluid/operators/squared_l2_distance_op.cc paddle/fluid/operators/cudnn_lstm_op.cc paddle/fluid/operators/tree_conv_op.cc paddle/fluid/operators/one_hot_op.cc paddle/fluid/operators/lookup_sparse_table_op.cc paddle/fluid/operators/unique_op.cc paddle/fluid/operators/mean_op.cc paddle/fluid/operators/prelu_op.cc paddle/fluid/operators/delete_var_op.cc paddle/fluid/operators/ctc_align_op.cc paddle/fluid/operators/argsort_op.cc paddle/fluid/operators/data_norm_op.cc paddle/fluid/operators/minus_op.cc paddle/fluid/operators/shrink_rnn_memory_op.cc paddle/fluid/operators/lod_reset_op.cc paddle/fluid/operators/l1_norm_op.cc paddle/fluid/operators/gaussian_random_batch_size_like_op.cc paddle/fluid/operators/is_empty_op.cc paddle/fluid/operators/bilinear_tensor_product_op.cc paddle/fluid/operators/kldiv_loss_op.cc paddle/fluid/operators/squeeze_op.cc paddle/fluid/operators/softmax_op.cc paddle/fluid/operators/clip_by_norm_op.cc paddle/fluid/operators/pool_with_index_op.cc paddle/fluid/operators/linear_chain_crf_op.cc paddle/fluid/operators/reshape_op.cc paddle/fluid/operators/fill_constant_op.cc paddle/fluid/operators/space_to_depth_op.cc paddle/fluid/operators/gather_op.cc paddle/fluid/operators/softmax_with_cross_entropy_op.cc paddle/fluid/operators/slice_op.cc paddle/fluid/operators/sign_op.cc paddle/fluid/operators/expand_op.cc paddle/fluid/operators/smooth_l1_loss_op.cc paddle/fluid/operators/tensor_array_to_tensor_op.cc paddle/fluid/operators/row_conv_op.cc paddle/fluid/operators/pad2d_op.cc paddle/fluid/operators/pixel_shuffle_op.cc paddle/fluid/operators/assign_value_op.cc paddle/fluid/operators/random_crop_op.cc paddle/fluid/operators/squared_l2_norm_op.cc paddle/fluid/operators/save_combine_op.cc paddle/fluid/operators/pool_op.cc paddle/fluid/operators/cast_op.cc paddle/fluid/operators/array_to_lod_tensor_op.cc paddle/fluid/operators/fill_any_like_op.cc paddle/fluid/operators/flatten_op.cc paddle/fluid/operators/sample_logits_op.cc paddle/fluid/operators/pad_op.cc paddle/fluid/operators/bpr_loss_op.cc paddle/fluid/operators/roi_pool_op.cc paddle/fluid/operators/pad_constant_like_op.cc paddle/fluid/operators/isfinite_op.cc paddle/fluid/operators/layer_norm_op.cc paddle/fluid/operators/maxout_op.cc paddle/fluid/operators/warpctc_op.cc paddle/fluid/string/piece.cc paddle/fluid/string/pretty_log.cc paddle/fluid/string/string_helper.cc paddle/fluid/recordio/header.cc paddle/fluid/recordio/chunk.cc paddle/fluid/recordio/writer.cc paddle/fluid/recordio/scanner.cc paddle/fluid/inference/io.cc paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc paddle/fluid/inference/analysis/ir_passes/subgraph_util.cc paddle/fluid/inference/analysis/passes/ir_graph_build_pass.cc paddle/fluid/inference/analysis/passes/ir_analysis_pass.cc paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.cc paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.cc paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.cc paddle/fluid/inference/analysis/passes/inference_op_replace_pass.cc paddle/fluid/inference/analysis/passes/passes.cc paddle/fluid/inference/analysis/helper.cc paddle/fluid/inference/analysis/ir_pass_manager.cc paddle/fluid/inference/analysis/argument.cc paddle/fluid/inference/analysis/analysis_pass.cc paddle/fluid/inference/analysis/analyzer.cc paddle/fluid/inference/utils/benchmark.cc paddle/fluid/inference/api/api.cc paddle/fluid/inference/api/api_impl.cc paddle/fluid/inference/api/helper.cc paddle/fluid/inference/api/analysis_predictor.cc paddle/fluid/inference/api/details/zero_copy_tensor.cc paddle/fluid/inference/api/details/reset_tensor_array.cc paddle/fluid/inference/api/analysis_config.cc paddle/fluid/inference/api/paddle_pass_builder.cc" paddle_fluid_noavx_openblas_src += ' paddle/fluid/framework/revision.cc' diff --git a/paddle/fluid/pybind/.gitignore b/paddle/fluid/pybind/.gitignore deleted file mode 100644 index 8f222791edb016df65be5db75831f5f83cf63726..0000000000000000000000000000000000000000 --- a/paddle/fluid/pybind/.gitignore +++ /dev/null @@ -1 +0,0 @@ -pybind.h diff --git a/paddle/fluid/pybind/pybind.h b/paddle/fluid/pybind/pybind.h new file mode 100644 index 0000000000000000000000000000000000000000..40ed4ea0485935413f55075b6bd00de0385a96b8 --- /dev/null +++ b/paddle/fluid/pybind/pybind.h @@ -0,0 +1,553 @@ +#ifdef PYBIND_AVX_MKLML +// Generated by the paddle/fluid/operator/CMakeLists.txt. DO NOT EDIT! + +USE_NO_KERNEL_OP(feed); +USE_NO_KERNEL_OP(while); +USE_NO_KERNEL_OP(get_places); +USE_NO_KERNEL_OP(fetch); +USE_NO_KERNEL_OP(conditional_block_infer); +USE_NO_KERNEL_OP(conditional_block); +USE_OP(less_than); +USE_OP(logical_and); +USE_NO_KERNEL_OP(read_from_array); +USE_CPU_ONLY_OP(bipartite_match); +USE_OP(box_coder); +USE_OP(iou_similarity); +USE_CPU_ONLY_OP(mine_hard_examples); +USE_CPU_ONLY_OP(multiclass_nms); +USE_OP(prior_box); +USE_OP(density_prior_box); +USE_OP(anchor_generator); +USE_OP(target_assign); +USE_OP(polygon_box_transform); +USE_CPU_ONLY_OP(rpn_target_assign); +USE_CPU_ONLY_OP(generate_proposal_labels); +USE_OP(box_clip); +USE_CPU_ONLY_OP(yolov3_loss); +USE_OP(yolo_box); +USE_OP(box_decoder_and_assign); +USE_OP(sigmoid_focal_loss); +USE_CPU_ONLY_OP(retinanet_detection_output); +USE_CPU_ONLY_OP(generate_proposals); +USE_CPU_ONLY_OP(distribute_fpn_proposals); +USE_CPU_ONLY_OP(collect_fpn_proposals); +USE_OP(roi_perspective_transform); +USE_CPU_ONLY_OP(generate_mask_labels); +USE_OP(elementwise_mod); +USE_OP(elementwise_floordiv); +USE_OP(elementwise_max); +USE_OP(elementwise_pow); +USE_OP(elementwise_sub_grad); +USE_OP(elementwise_add_grad); +USE_OP(elementwise_min); +USE_OP(elementwise_div); +USE_OP(elementwise_mul); +USE_CPU_ONLY_OP(fusion_squared_mat_sub); +USE_CPU_ONLY_OP(fusion_seqpool_concat); +USE_CPU_ONLY_OP(fused_embedding_fc_lstm); +USE_CPU_ONLY_OP(fusion_seqexpand_concat_fc); +USE_CPU_ONLY_OP(fused_embedding_seq_pool); +USE_CPU_ONLY_OP(fusion_seqconv_eltadd_relu); +USE_CPU_ONLY_OP(fusion_gru); +USE_CPU_ONLY_OP(fusion_repeated_fc_relu); +USE_CPU_ONLY_OP(fusion_lstm); +USE_OP(fused_elemwise_activation); +USE_OP(accuracy); +USE_CPU_ONLY_OP(precision_recall); +USE_CPU_ONLY_OP(auc); +USE_OP(adamax); +USE_OP(sgd); +USE_OP(lars_momentum); +USE_OP(adagrad); +USE_OP(ftrl); +USE_OP(momentum); +USE_OP(adadelta); +USE_OP(rmsprop); +USE_OP(lamb); +USE_OP(proximal_gd); +USE_OP(proximal_adagrad); +USE_OP(adam); +USE_OP(decayed_adagrad); +USE_OP(reduce_all); +USE_OP(reduce_min); +USE_OP(reduce_sum); +USE_OP(reduce_any); +USE_OP(reduce_max); +USE_OP(reduce_mean); +USE_OP(reduce_prod); +USE_OP(sequence_erase); +USE_OP(sequence_unpad); +USE_OP(sequence_mask); +USE_OP(sequence_expand); +USE_OP(sequence_pad); +USE_OP(sequence_enumerate); +USE_OP(sequence_slice); +USE_OP(sequence_softmax); +USE_OP(sequence_expand_as); +USE_OP(sequence_pool); +USE_OP(sequence_reverse); +USE_CPU_ONLY_OP(sequence_scatter); +USE_OP(sequence_conv); +USE_OP(sequence_concat); +USE_OP(sequence_reshape); +USE_NO_KERNEL_OP(open_files); +USE_NO_KERNEL_OP(create_random_data_generator); +USE_NO_KERNEL_OP(create_shuffle_reader); +USE_NO_KERNEL_OP(create_batch_reader); +USE_NO_KERNEL_OP(create_recordio_file_reader); +USE_NO_KERNEL_OP(create_double_buffer_reader); +USE_NO_KERNEL_OP(create_multi_pass_reader); +USE_NO_KERNEL_OP(create_custom_reader); +USE_NO_KERNEL_OP(create_py_reader); +USE_NO_KERNEL_OP(read); +USE_OP(increment); +USE_OP(stack); +USE_CPU_ONLY_OP(fc); +USE_NO_KERNEL_OP(assign); +USE_OP(load); +USE_NO_KERNEL_OP(fill); +USE_NO_KERNEL_OP(reorder_lod_tensor_by_rank); +USE_OP(conv_shift); +USE_OP(fill_zeros_like); +USE_CPU_ONLY_OP(hash); +USE_NO_KERNEL_OP(dequantize); +USE_OP(fake_quantize_abs_max); +USE_OP(size); +USE_OP(scatter); +USE_OP(uniform_random); +USE_OP(beam_search); +USE_NO_KERNEL_OP(beam_search_decode); +USE_OP(dropout); +USE_OP(bilinear_interp); +USE_OP(sampling_id); +USE_OP(lstm); +USE_OP(modified_huber_loss); +USE_OP(temporal_shift); +USE_OP(sum); +USE_OP(arg_min); +USE_OP(psroi_pool); +USE_NO_KERNEL_OP(uniform_random_batch_size_like); +USE_NO_KERNEL_OP(rnn_memory_helper); +USE_CPU_ONLY_OP(crf_decoding); +USE_OP(where); +USE_OP(fake_dequantize_max_abs); +USE_OP(mean_iou); +USE_OP(roi_align); +USE_OP(range); +USE_OP(edit_distance); +USE_OP(multiplex); +USE_OP(clip); +USE_OP(gaussian_random); +USE_OP(norm); +USE_OP(rank_loss); +USE_CPU_ONLY_OP(detection_map); +USE_OP(lstm_unit); +USE_OP(shard_index); +USE_OP(shape); +USE_OP(arg_max); +USE_OP(average_accumulates); +USE_NO_KERNEL_OP(requantize); +USE_OP(conv2d); +USE_CPU_ONLY_OP(add_position_encoding); +USE_OP(gru_unit); +USE_OP(batch_norm); +USE_CPU_ONLY_OP(chunk_eval); +USE_NO_KERNEL_OP(lod_rank_table); +USE_NO_KERNEL_OP(unsqueeze); +USE_CPU_ONLY_OP(positive_negative_pair); +USE_OP(im2sequence); +USE_OP(margin_rank_loss); +USE_OP(hinge_loss); +USE_CPU_ONLY_OP(cvm); +USE_OP(huber_loss); +USE_OP(crop); +USE_OP(relu_grad); +USE_CPU_ONLY_OP(hierarchical_sigmoid); +USE_OP(unfold); +USE_NO_KERNEL_OP(max_sequence_len); +USE_OP(mul); +USE_CPU_ONLY_OP(attention_lstm); +USE_OP(top_k); +USE_OP(group_norm); +USE_OP(selu); +USE_OP(lstmp); +USE_NO_KERNEL_OP(merge_lod_tensor); +USE_OP(truncated_gaussian_random); +USE_OP(label_smooth); +USE_CPU_ONLY_OP(matmul); +USE_OP(spp); +USE_NO_KERNEL_OP(unstack); +USE_OP(conv2d_transpose); +USE_OP(diag); +USE_OP(unpool); +USE_NO_KERNEL_OP(lod_array_length); +USE_OP(affine_channel); +USE_OP(log_loss); +USE_OP(concat); +USE_NO_KERNEL_OP(lod_tensor_to_array); +USE_OP(gru); +USE_CPU_ONLY_OP(coalesce_tensor); +USE_OP(fsp); +USE_OP(linspace); +USE_OP(reverse); +USE_NO_KERNEL_OP(recurrent); +USE_OP(split_selected_rows); +USE_OP(dgc_clip_by_norm); +USE_OP(scale); +USE_OP(save); +USE_OP(load_combine); +USE_OP(merge_selected_rows); +USE_OP(split); +USE_OP(cumsum); +USE_OP(deformable_psroi_pooling); +USE_CPU_ONLY_OP(teacher_student_sigmoid_loss); +USE_OP(transpose); +USE_OP(fill_constant_batch_size_like); +USE_OP(sigmoid_cross_entropy_with_logits); +USE_OP(shuffle_channel); +USE_CPU_ONLY_OP(affine_grid); +USE_NO_KERNEL_OP(split_lod_tensor); +USE_CPU_ONLY_OP(grid_sampler); +USE_OP(lookup_table); +USE_OP(cos_sim); +USE_NO_KERNEL_OP(quantize); +USE_OP(spectral_norm); +USE_OP(cross_entropy); +USE_NO_KERNEL_OP(print); +USE_OP(lrn); +USE_CPU_ONLY_OP(nce); +USE_CPU_ONLY_OP(similarity_focus); +USE_CPU_ONLY_OP(get_tensor_from_selected_rows); +USE_OP(squared_l2_distance); +USE_OP(cudnn_lstm); +USE_OP(tree_conv); +USE_OP(one_hot); +USE_NO_KERNEL_OP(lookup_sparse_table); +USE_CPU_ONLY_OP(unique); +USE_OP(mean); +USE_OP(prelu); +USE_NO_KERNEL_OP(delete_var); +USE_OP(ctc_align); +USE_OP(argsort); +USE_CPU_ONLY_OP(data_norm); +USE_OP(minus); +USE_NO_KERNEL_OP(shrink_rnn_memory); +USE_OP(lod_reset); +USE_OP(l1_norm); +USE_NO_KERNEL_OP(gaussian_random_batch_size_like); +USE_OP(is_empty); +USE_OP(bilinear_tensor_product); +USE_OP(kldiv_loss); +USE_NO_KERNEL_OP(squeeze); +USE_OP(softmax); +USE_OP(clip_by_norm); +USE_OP(max_pool2d_with_index); +USE_OP(linear_chain_crf); +USE_CPU_ONLY_OP(reshape); +USE_OP(fill_constant); +USE_OP(space_to_depth); +USE_OP(gather); +USE_OP(softmax_with_cross_entropy); +USE_OP(slice); +USE_OP(sign); +USE_OP(expand); +USE_OP(smooth_l1_loss); +USE_NO_KERNEL_OP(tensor_array_to_tensor); +USE_OP(row_conv); +USE_OP(pad2d); +USE_OP(pixel_shuffle); +USE_OP(assign_value); +USE_OP(random_crop); +USE_OP(squared_l2_norm); +USE_OP(save_combine); +USE_OP(pool2d); +USE_OP(cast); +USE_NO_KERNEL_OP(array_to_lod_tensor); +USE_OP(fill_any_like); +USE_NO_KERNEL_OP(flatten); +USE_OP(sample_logits); +USE_OP(pad); +USE_CPU_ONLY_OP(bpr_loss); +USE_OP(roi_pool); +USE_OP(pad_constant_like); +USE_OP(isfinite); +USE_OP(layer_norm); +USE_OP(maxout); +USE_OP(warpctc); +#elif defined PYBIND_NOAVX_OPENBLAS +// Generated by the paddle/fluid/operator/CMakeLists.txt. DO NOT EDIT! + +USE_NO_KERNEL_OP(feed); +USE_NO_KERNEL_OP(while); +USE_NO_KERNEL_OP(get_places); +USE_NO_KERNEL_OP(fetch); +USE_NO_KERNEL_OP(conditional_block_infer); +USE_NO_KERNEL_OP(conditional_block); +USE_OP(less_than); +USE_OP(logical_and); +USE_NO_KERNEL_OP(read_from_array); +USE_CPU_ONLY_OP(bipartite_match); +USE_OP(box_coder); +USE_OP(iou_similarity); +USE_CPU_ONLY_OP(mine_hard_examples); +USE_CPU_ONLY_OP(multiclass_nms); +USE_OP(prior_box); +USE_OP(density_prior_box); +USE_OP(anchor_generator); +USE_OP(target_assign); +USE_OP(polygon_box_transform); +USE_CPU_ONLY_OP(rpn_target_assign); +USE_CPU_ONLY_OP(generate_proposal_labels); +USE_OP(box_clip); +USE_CPU_ONLY_OP(yolov3_loss); +USE_OP(yolo_box); +USE_OP(box_decoder_and_assign); +USE_OP(sigmoid_focal_loss); +USE_CPU_ONLY_OP(retinanet_detection_output); +USE_CPU_ONLY_OP(generate_proposals); +USE_CPU_ONLY_OP(distribute_fpn_proposals); +USE_CPU_ONLY_OP(collect_fpn_proposals); +USE_OP(roi_perspective_transform); +USE_CPU_ONLY_OP(generate_mask_labels); +USE_OP(elementwise_mod); +USE_OP(elementwise_floordiv); +USE_OP(elementwise_max); +USE_OP(elementwise_pow); +USE_OP(elementwise_sub_grad); +USE_OP(elementwise_add_grad); +USE_OP(elementwise_min); +USE_OP(elementwise_div); +USE_OP(elementwise_mul); +USE_CPU_ONLY_OP(fusion_squared_mat_sub); +USE_CPU_ONLY_OP(fusion_seqpool_concat); +USE_CPU_ONLY_OP(fused_embedding_fc_lstm); +USE_CPU_ONLY_OP(fusion_seqexpand_concat_fc); +USE_CPU_ONLY_OP(fused_embedding_seq_pool); +USE_CPU_ONLY_OP(fusion_seqconv_eltadd_relu); +USE_CPU_ONLY_OP(fusion_gru); +USE_CPU_ONLY_OP(fusion_repeated_fc_relu); +USE_CPU_ONLY_OP(fusion_lstm); +USE_OP(fused_elemwise_activation); +USE_OP(accuracy); +USE_CPU_ONLY_OP(precision_recall); +USE_CPU_ONLY_OP(auc); +USE_OP(adamax); +USE_OP(sgd); +USE_OP(lars_momentum); +USE_OP(adagrad); +USE_OP(ftrl); +USE_OP(momentum); +USE_OP(adadelta); +USE_OP(rmsprop); +USE_OP(lamb); +USE_OP(proximal_gd); +USE_OP(proximal_adagrad); +USE_OP(adam); +USE_OP(decayed_adagrad); +USE_OP(reduce_all); +USE_OP(reduce_min); +USE_OP(reduce_sum); +USE_OP(reduce_any); +USE_OP(reduce_max); +USE_OP(reduce_mean); +USE_OP(reduce_prod); +USE_OP(sequence_erase); +USE_OP(sequence_unpad); +USE_OP(sequence_mask); +USE_OP(sequence_expand); +USE_OP(sequence_pad); +USE_OP(sequence_enumerate); +USE_OP(sequence_slice); +USE_OP(sequence_softmax); +USE_OP(sequence_expand_as); +USE_OP(sequence_pool); +USE_OP(sequence_reverse); +USE_CPU_ONLY_OP(sequence_scatter); +USE_OP(sequence_conv); +USE_OP(sequence_concat); +USE_OP(sequence_reshape); +USE_NO_KERNEL_OP(open_files); +USE_NO_KERNEL_OP(create_random_data_generator); +USE_NO_KERNEL_OP(create_shuffle_reader); +USE_NO_KERNEL_OP(create_batch_reader); +USE_NO_KERNEL_OP(create_recordio_file_reader); +USE_NO_KERNEL_OP(create_double_buffer_reader); +USE_NO_KERNEL_OP(create_multi_pass_reader); +USE_NO_KERNEL_OP(create_custom_reader); +USE_NO_KERNEL_OP(create_py_reader); +USE_NO_KERNEL_OP(read); +USE_OP(increment); +USE_OP(stack); +USE_CPU_ONLY_OP(fc); +USE_NO_KERNEL_OP(assign); +USE_OP(load); +USE_NO_KERNEL_OP(fill); +USE_NO_KERNEL_OP(reorder_lod_tensor_by_rank); +USE_OP(conv_shift); +USE_OP(fill_zeros_like); +USE_CPU_ONLY_OP(hash); +USE_NO_KERNEL_OP(dequantize); +USE_OP(fake_quantize_abs_max); +USE_OP(size); +USE_OP(scatter); +USE_OP(uniform_random); +USE_OP(beam_search); +USE_NO_KERNEL_OP(beam_search_decode); +USE_OP(dropout); +USE_OP(bilinear_interp); +USE_OP(sampling_id); +USE_OP(lstm); +USE_OP(modified_huber_loss); +USE_OP(temporal_shift); +USE_OP(sum); +USE_OP(arg_min); +USE_OP(psroi_pool); +USE_NO_KERNEL_OP(uniform_random_batch_size_like); +USE_NO_KERNEL_OP(rnn_memory_helper); +USE_CPU_ONLY_OP(crf_decoding); +USE_OP(where); +USE_OP(fake_dequantize_max_abs); +USE_OP(mean_iou); +USE_OP(roi_align); +USE_OP(range); +USE_OP(edit_distance); +USE_OP(multiplex); +USE_OP(clip); +USE_OP(gaussian_random); +USE_OP(norm); +USE_OP(rank_loss); +USE_CPU_ONLY_OP(detection_map); +USE_OP(lstm_unit); +USE_OP(shard_index); +USE_OP(shape); +USE_OP(arg_max); +USE_OP(average_accumulates); +USE_NO_KERNEL_OP(requantize); +USE_OP(conv2d); +USE_CPU_ONLY_OP(add_position_encoding); +USE_OP(gru_unit); +USE_OP(batch_norm); +USE_CPU_ONLY_OP(chunk_eval); +USE_NO_KERNEL_OP(lod_rank_table); +USE_NO_KERNEL_OP(unsqueeze); +USE_CPU_ONLY_OP(positive_negative_pair); +USE_OP(im2sequence); +USE_OP(margin_rank_loss); +USE_OP(hinge_loss); +USE_CPU_ONLY_OP(cvm); +USE_OP(huber_loss); +USE_OP(crop); +USE_OP(relu_grad); +USE_CPU_ONLY_OP(hierarchical_sigmoid); +USE_OP(unfold); +USE_NO_KERNEL_OP(max_sequence_len); +USE_OP(mul); +USE_CPU_ONLY_OP(attention_lstm); +USE_OP(top_k); +USE_OP(group_norm); +USE_OP(selu); +USE_OP(lstmp); +USE_NO_KERNEL_OP(merge_lod_tensor); +USE_OP(truncated_gaussian_random); +USE_OP(label_smooth); +USE_CPU_ONLY_OP(matmul); +USE_OP(spp); +USE_NO_KERNEL_OP(unstack); +USE_OP(conv2d_transpose); +USE_OP(diag); +USE_OP(unpool); +USE_NO_KERNEL_OP(lod_array_length); +USE_OP(affine_channel); +USE_OP(log_loss); +USE_OP(concat); +USE_NO_KERNEL_OP(lod_tensor_to_array); +USE_OP(gru); +USE_CPU_ONLY_OP(coalesce_tensor); +USE_OP(fsp); +USE_OP(linspace); +USE_OP(reverse); +USE_NO_KERNEL_OP(recurrent); +USE_OP(split_selected_rows); +USE_OP(dgc_clip_by_norm); +USE_OP(scale); +USE_OP(save); +USE_OP(load_combine); +USE_OP(merge_selected_rows); +USE_OP(split); +USE_OP(cumsum); +USE_OP(deformable_psroi_pooling); +USE_CPU_ONLY_OP(teacher_student_sigmoid_loss); +USE_OP(transpose); +USE_OP(fill_constant_batch_size_like); +USE_OP(sigmoid_cross_entropy_with_logits); +USE_OP(shuffle_channel); +USE_CPU_ONLY_OP(affine_grid); +USE_NO_KERNEL_OP(split_lod_tensor); +USE_CPU_ONLY_OP(grid_sampler); +USE_OP(lookup_table); +USE_OP(cos_sim); +USE_NO_KERNEL_OP(quantize); +USE_OP(spectral_norm); +USE_OP(cross_entropy); +USE_NO_KERNEL_OP(print); +USE_OP(lrn); +USE_CPU_ONLY_OP(nce); +USE_CPU_ONLY_OP(similarity_focus); +USE_CPU_ONLY_OP(get_tensor_from_selected_rows); +USE_OP(squared_l2_distance); +USE_OP(cudnn_lstm); +USE_OP(tree_conv); +USE_OP(one_hot); +USE_NO_KERNEL_OP(lookup_sparse_table); +USE_CPU_ONLY_OP(unique); +USE_OP(mean); +USE_OP(prelu); +USE_NO_KERNEL_OP(delete_var); +USE_OP(ctc_align); +USE_OP(argsort); +USE_CPU_ONLY_OP(data_norm); +USE_OP(minus); +USE_NO_KERNEL_OP(shrink_rnn_memory); +USE_OP(lod_reset); +USE_OP(l1_norm); +USE_NO_KERNEL_OP(gaussian_random_batch_size_like); +USE_OP(is_empty); +USE_OP(bilinear_tensor_product); +USE_OP(kldiv_loss); +USE_NO_KERNEL_OP(squeeze); +USE_OP(softmax); +USE_OP(clip_by_norm); +USE_OP(max_pool2d_with_index); +USE_OP(linear_chain_crf); +USE_CPU_ONLY_OP(reshape); +USE_OP(fill_constant); +USE_OP(space_to_depth); +USE_OP(gather); +USE_OP(softmax_with_cross_entropy); +USE_OP(slice); +USE_OP(sign); +USE_OP(expand); +USE_OP(smooth_l1_loss); +USE_NO_KERNEL_OP(tensor_array_to_tensor); +USE_OP(row_conv); +USE_OP(pad2d); +USE_OP(pixel_shuffle); +USE_OP(assign_value); +USE_OP(random_crop); +USE_OP(squared_l2_norm); +USE_OP(save_combine); +USE_OP(pool2d); +USE_OP(cast); +USE_NO_KERNEL_OP(array_to_lod_tensor); +USE_OP(fill_any_like); +USE_NO_KERNEL_OP(flatten); +USE_OP(sample_logits); +USE_OP(pad); +USE_CPU_ONLY_OP(bpr_loss); +USE_OP(roi_pool); +USE_OP(pad_constant_like); +USE_OP(isfinite); +USE_OP(layer_norm); +USE_OP(maxout); +USE_OP(warpctc); +#endif diff --git a/paddle/fluid/train/custom_trainer/feed/executor/executor.cc b/paddle/fluid/train/custom_trainer/feed/executor/executor.cc new file mode 100644 index 0000000000000000000000000000000000000000..569b6d8224d0a1c6afde173fb2246efce49fd803 --- /dev/null +++ b/paddle/fluid/train/custom_trainer/feed/executor/executor.cc @@ -0,0 +1,129 @@ +#include "paddle/fluid/train/custom_trainer/feed/executor/executor.h" + +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/framework/executor.h" +#include "paddle/fluid/platform/init.h" +#include "paddle/fluid/platform/cpu_helper.h" +#include "paddle/fluid/inference/api/details/reset_tensor_array.h" +#include "paddle/fluid/platform/enforce.h" + + +namespace paddle { +namespace custom_trainer { +namespace feed { + +namespace { + +int ReadBinaryFile(const std::string& filename, std::string* contents) { + std::ifstream fin(filename, std::ios::in | std::ios::binary); + if (!fin) { + VLOG(2) << "Cannot open file " << filename; + return -1; + } + fin.seekg(0, std::ios::end); + contents->clear(); + contents->resize(fin.tellg()); + fin.seekg(0, std::ios::beg); + fin.read(&(contents->at(0)), contents->size()); + fin.close(); + return 0; +} + +std::unique_ptr Load( + paddle::framework::Executor* /*executor*/, const std::string& model_filename) { + VLOG(3) << "loading model from " << model_filename; + std::string program_desc_str; + if (ReadBinaryFile(model_filename, &program_desc_str) != 0) { + return nullptr; + } + std::unique_ptr main_program( + new paddle::framework::ProgramDesc(program_desc_str)); + return main_program; +} + +} + +struct SimpleExecute::Context { + Context(const ::paddle::platform::Place& place) : place(place), executor(place) { + } + const ::paddle::platform::Place& place; + ::paddle::framework::Executor executor; + ::std::unique_ptr<::paddle::framework::ProgramDesc> main_program; + ::std::unique_ptr prepare_context; + details::TensorArrayBatchCleaner tensor_array_batch_cleaner; +}; + + +SimpleExecute::SimpleExecute() { + +} + +SimpleExecute::~SimpleExecute() { + +} + +int SimpleExecute::initialize(YAML::Node exe_config, + std::shared_ptr context_ptr) { + + paddle::framework::InitDevices(false); + if (exe_config["num_threads"]) { + paddle::platform::SetNumThreads(exe_config["num_threads"].as()); + } else { + paddle::platform::SetNumThreads(1); + } + + if (!exe_config["startup_program"] || + !exe_config["main_program"]) { + VLOG(2) << "fail to load config"; + return -1; + } + + try { + _context.reset(new SimpleExecute::Context(context_ptr->cpu_place)); + auto startup_program = Load(&_context->executor, exe_config["startup_program"].as()); + if (startup_program == nullptr) { + VLOG(2) << "fail to load startup_program: " << exe_config["startup_program"].as(); + return -1; + } + + _context->executor.Run(*startup_program, this->scope(), 0, false, true); + + _context->main_program = Load(&_context->executor, exe_config["main_program"].as()); + if (_context->main_program == nullptr) { + VLOG(2) << "fail to load main_program: " << exe_config["main_program"].as(); + return -1; + } + _context->prepare_context = _context->executor.Prepare(*_context->main_program, 0); + _context->executor.CreateVariables(*_context->main_program, this->scope(), 0); + } catch (::paddle::platform::EnforceNotMet& err) { + VLOG(2) << err.what(); + _context.reset(nullptr); + return -1; + } + + return 0; +} + +int SimpleExecute::run() { + if (_context == nullptr) { + VLOG(2) << "need initialize before run"; + return -1; + } + try { + _context->executor.RunPreparedContext(_context->prepare_context.get(), this->scope(), + false, /* don't create local scope each time*/ + false /* don't create variable each time */); + + // For some other vector like containers not cleaned after each batch. + _context->tensor_array_batch_cleaner.CollectNoTensorVars(this->scope()); + _context->tensor_array_batch_cleaner.ResetNoTensorVars(); + } catch (::paddle::platform::EnforceNotMet& err) { + VLOG(2) << err.what(); + return -1; + } + return 0; +} + +} // namespace feed +} // namespace custom_trainer +} // namespace paddle diff --git a/paddle/fluid/train/custom_trainer/feed/executor/executor.h b/paddle/fluid/train/custom_trainer/feed/executor/executor.h index 359ba0b65ad22e202f2235d70f7eef144392a9b8..64eb7a76db875e2000949987dd247139b9d139e0 100644 --- a/paddle/fluid/train/custom_trainer/feed/executor/executor.h +++ b/paddle/fluid/train/custom_trainer/feed/executor/executor.h @@ -1,6 +1,6 @@ #pragma once #include -#include "paddle/fluid/framework/executor.h" +#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/train/custom_trainer/feed/common/registerer.h" #include "paddle/fluid/train/custom_trainer/feed/trainer_context.h" @@ -23,7 +23,7 @@ public: } //直接取var template - T* var(const std::string& name) { + const T& var(const std::string& name) { return _scope.Var(name)->Get(); } template @@ -31,8 +31,8 @@ public: return _scope.Var(name)->GetMutable(); } - //执行n轮训练,每轮回调(epoch_id, _scope) - virtual int run(uint32_t epoch_num, std::function) = 0; + //执行训练 + virtual int run() = 0; virtual bool is_dump_all_model() { return false; @@ -44,13 +44,14 @@ REGISTER_REGISTERER(Executor); class SimpleExecutor : public Executor { public: - SimpleExecutor() {} - virtual ~SimpleExecutor() {} + SimpleExecute(); + virtual ~SimpleExecute(); virtual int initialize(YAML::Node exe_config, std::shared_ptr context_ptr); - virtual int run(uint32_t epoch_num, std::function) = 0; + virtual int run(); protected: - std::shared_ptr<::paddle::framework::Executor> _executor; + struct Context; + std::unique_ptr _context; }; } // namespace feed diff --git a/paddle/fluid/train/custom_trainer/feed/main.cc b/paddle/fluid/train/custom_trainer/feed/main.cc index 240289115cc5d50279ea786c48fbd06dbcd9aced..ea3140c62d348fcbaaf34b650b6323c1a790a0d1 100644 --- a/paddle/fluid/train/custom_trainer/feed/main.cc +++ b/paddle/fluid/train/custom_trainer/feed/main.cc @@ -5,6 +5,8 @@ #include "paddle/fluid/train/custom_trainer/feed/trainer_context.h" #include "paddle/fluid/train/custom_trainer/feed/process/process.h" #include "paddle/fluid/train/custom_trainer/feed/process/init_env_process.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/pybind/pybind.h" using namespace paddle::custom_trainer::feed; @@ -19,7 +21,6 @@ int main(int argc, char* argv[]) { //load trainer config auto trainer_context_ptr = std::make_shared(); trainer_context_ptr->trainer_config = YAML::LoadFile(FLAGS_feed_trainer_conf_path); - VLOG(3) << "yaml node size" << trainer_context_ptr->trainer_config.size(); std::vector process_name_list = { "InitEnvProcess", diff --git a/paddle/fluid/train/custom_trainer/feed/process/init_env_process.cc b/paddle/fluid/train/custom_trainer/feed/process/init_env_process.cc index 351a82cede9f973d112cb9fa603b8ab11a9b6fd2..e8d3c570cf7f28678b666f33fd7f875daf78e194 100644 --- a/paddle/fluid/train/custom_trainer/feed/process/init_env_process.cc +++ b/paddle/fluid/train/custom_trainer/feed/process/init_env_process.cc @@ -17,10 +17,7 @@ int InitEnvProcess::initialize(std::shared_ptr context_ptr) { paddle::framework::InitDevices(false); context_ptr->cpu_place = paddle::platform::CPUPlace(); - YAML::Node config; - config.reset(_context_ptr->trainer_config); - VLOG(3) << "yaml node size : " << config.size(); - + YAML::Node config = _context_ptr->trainer_config; //environment std::string env_class = config["environment"]["environment_class"].as(); auto* environment = CREATE_CLASS(RuntimeEnvironment, env_class); diff --git a/paddle/fluid/train/custom_trainer/feed/unit_test/main.cc b/paddle/fluid/train/custom_trainer/feed/unit_test/main.cc new file mode 100644 index 0000000000000000000000000000000000000000..7ff531d8442508b2daf7b8e56b631e0829a821fb --- /dev/null +++ b/paddle/fluid/train/custom_trainer/feed/unit_test/main.cc @@ -0,0 +1,12 @@ +#include +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/pybind/pybind.h" + +int32_t main(int32_t argc, char** argv) { + ::google::InitGoogleLogging(argv[0]); + ::testing::InitGoogleTest(&argc, argv); + ::google::ParseCommandLineFlags(&argc, &argv, true); + return RUN_ALL_TESTS(); +} diff --git a/paddle/fluid/train/custom_trainer/feed/unit_test/test_executor.cc b/paddle/fluid/train/custom_trainer/feed/unit_test/test_executor.cc new file mode 100644 index 0000000000000000000000000000000000000000..ffecb3e8e3502ffa69734d592fbeb1cf7aab98f9 --- /dev/null +++ b/paddle/fluid/train/custom_trainer/feed/unit_test/test_executor.cc @@ -0,0 +1,80 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "paddle/fluid/train/custom_trainer/feed/executor/executor.h" +#include "paddle/fluid/framework/tensor_util.h" + +namespace paddle { +namespace custom_trainer { +namespace feed { + +TEST(testSimpleExecute, initialize) { + SimpleExecute execute; + auto context_ptr = std::make_shared(); + YAML::Node config = YAML::Load("[1, 2, 3]"); + ASSERT_NE(0, execute.initialize(config, context_ptr)); + config = YAML::Load("{startup_program: ./data/startup_program, main_program: ./data/main_program}"); + ASSERT_EQ(0, execute.initialize(config, context_ptr)); + config = YAML::Load("{thread_num: 2, startup_program: ./data/startup_program, main_program: ./data/main_program}"); + ASSERT_EQ(0, execute.initialize(config, context_ptr)); +} + +float uniform(float min, float max) { + float result = (float)rand() / RAND_MAX; + return min + result * (max - min); +} + +void next_batch(int batch_size, const paddle::platform::Place& place, paddle::framework::LoDTensor* x_tensor, paddle::framework::LoDTensor* y_tensor) { + + x_tensor->Resize({batch_size, 2}); + auto x_data = x_tensor->mutable_data(place); + + y_tensor->Resize({batch_size, 1}); + auto y_data = y_tensor->mutable_data(place); + + for (int i = 0; i < batch_size; ++i) { + x_data[i * 2] = uniform(-2, 2); + x_data[i * 2 + 1] = uniform(-2, 2); + float dis = x_data[i * 2] * x_data[i * 2] + x_data[i * 2 + 1] * x_data[i * 2 + 1]; + y_data[i] = dis < 1.0 ? 1.0 : 0.0; + } +} + +TEST(testSimpleExecute, run) { + SimpleExecute execute; + auto context_ptr = std::make_shared(); + auto config = YAML::Load("{thread_num: 2, startup_program: ./data/startup_program, main_program: ./data/main_program}"); + ASSERT_EQ(0, execute.initialize(config, context_ptr)); + + + auto x_var = execute.mutable_var<::paddle::framework::LoDTensor>("x"); + auto y_var = execute.mutable_var<::paddle::framework::LoDTensor>("y"); + ASSERT_NE(nullptr, x_var); + ASSERT_NE(nullptr, y_var); + + next_batch(1024, context_ptr->cpu_place, x_var, y_var); + + ASSERT_EQ(0, execute.run()); + + auto loss_var = execute.var<::paddle::framework::LoDTensor>("loss"); + auto loss = loss_var.data()[0]; + std::cout << "loss: " << loss << std::endl; +} + +} // namespace feed +} // namespace custom_trainer +} // namespace paddle